http://kishor-naik-dotnet.blogspot.com/2011/01/cnet-extract-image-from-pdf-file.html
C#.net - Extract image from PDF file.
In this article i will show you how to extract image from PDF file.
Step 1
First you need to download "ITextSharp.dll" from the following link.
http://sourceforge.net/projects/itextsharp/
Step 2
Create a Console application and give the solution name as ConExtractImagefromPDF.
Step 3
Add two assembly reference to the project from solution explorer.
1.ITextSharp.dll
2.System.Drawing.dll
Step 4
Write a static method for extracting image from pdf file,it is look like this
/// <summary>
/// Extract Image from PDF file and Store in Image Object
/// </summary>
/// <param name="PDFSourcePath">Specify PDF Source Path</param>
/// <returns>List</returns>
private static List<System.Drawing.Image> ExtractImages(String PDFSourcePath)
{
List<System.Drawing.Image> ImgList = new List<System.Drawing.Image>();
iTextSharp.text.pdf.RandomAccessFileOrArray RAFObj = null;
iTextSharp.text.pdf.PdfReader PDFReaderObj = null;
iTextSharp.text.pdf.PdfObject PDFObj = null;
iTextSharp.text.pdf.PdfStream PDFStremObj = null;
try
{
RAFObj = new iTextSharp.text.pdf.RandomAccessFileOrArray(PDFSourcePath);
PDFReaderObj = new iTextSharp.text.pdf.PdfReader(RAFObj, null);
for (int i = 0; i <= PDFReaderObj.XrefSize - 1; i++)
{
PDFObj = PDFReaderObj.GetPdfObject(i);
if ((PDFObj != null) && PDFObj.IsStream())
{
PDFStremObj = (iTextSharp.text.pdf.PdfStream)PDFObj;
iTextSharp.text.pdf.PdfObject subtype = PDFStremObj.Get(iTextSharp.text.pdf.PdfName.SUBTYPE);
if ((subtype != null) && subtype.ToString() == iTextSharp.text.pdf.PdfName.IMAGE.ToString())
{
byte[] bytes = iTextSharp.text.pdf.PdfReader.GetStreamBytesRaw((iTextSharp.text.pdf.PRStream)PDFStremObj);
if ((bytes != null))
{
try
{
System.IO.MemoryStream MS = new System.IO.MemoryStream(bytes);
MS.Position = 0;
System.Drawing.Image ImgPDF = System.Drawing.Image.FromStream(MS);
ImgList.Add(ImgPDF);
}
catch (Exception)
{
}
}
}
}
}
PDFReaderObj.Close();
}
catch (Exception ex)
{
throw new Exception(ex.Message);
}
return ImgList;
}
Step 5
Write a static method for store extracting image file in folder,it is look like this
/// <summary>
/// Write Image File
/// </summary>
private static void WriteImageFile()
{
try
{
System.Console.WriteLine("Wait for extracting image from PDF file....");
// Get a List of Image
List<System.Drawing.Image> ListImage = ExtractImages(@"C:\Users\Kishor\Desktop\TuterPDF\ASP.net\ASP.NET 3.5 Unleashed.pdf");
for (int i = 0; i < ListImage.Count; i++)
{
try
{
// Write Image File
ListImage[i].Save(AppDomain.CurrentDomain.BaseDirectory + "ImageStore\\Image" + i + ".jpeg", System.Drawing.Imaging.ImageFormat.Jpeg);
System.Console.WriteLine("Image" + i + ".jpeg write sucessfully");
}
catch (Exception)
{ }
}
}
catch (Exception ex)
{
throw new Exception(ex.Message);
}
}
Step 6
Call above function in main method,it is look like this
static void Main(string[] args)
{
try
{
WriteImageFile(); // write image file
}
catch (Exception ex)
{
System.Console.WriteLine(ex.Message);
}
}
Full Code
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace ConExtractImagefromPDF
{
class Program
{
static void Main(string[] args)
{
try
{
WriteImageFile(); // write image file
}
catch (Exception ex)
{
System.Console.WriteLine(ex.Message);
}
}
#region Methods
/// <summary>
/// Extract Image from PDF file and Store in Image Object
/// </summary>
/// <param name="PDFSourcePath">Specify PDF Source Path</param>
/// <returns>List</returns>
private static List<System.Drawing.Image> ExtractImages(String PDFSourcePath)
{
List<System.Drawing.Image> ImgList = new List<System.Drawing.Image>();
iTextSharp.text.pdf.RandomAccessFileOrArray RAFObj = null;
iTextSharp.text.pdf.PdfReader PDFReaderObj = null;
iTextSharp.text.pdf.PdfObject PDFObj = null;
iTextSharp.text.pdf.PdfStream PDFStremObj = null;
try
{
RAFObj = new iTextSharp.text.pdf.RandomAccessFileOrArray(PDFSourcePath);
PDFReaderObj = new iTextSharp.text.pdf.PdfReader(RAFObj, null);
for (int i = 0; i <= PDFReaderObj.XrefSize - 1; i++)
{
PDFObj = PDFReaderObj.GetPdfObject(i);
if ((PDFObj != null) && PDFObj.IsStream())
{
PDFStremObj = (iTextSharp.text.pdf.PdfStream)PDFObj;
iTextSharp.text.pdf.PdfObject subtype = PDFStremObj.Get(iTextSharp.text.pdf.PdfName.SUBTYPE);
if ((subtype != null) && subtype.ToString() == iTextSharp.text.pdf.PdfName.IMAGE.ToString())
{
byte[] bytes = iTextSharp.text.pdf.PdfReader.GetStreamBytesRaw((iTextSharp.text.pdf.PRStream)PDFStremObj);
if ((bytes != null))
{
try
{
System.IO.MemoryStream MS = new System.IO.MemoryStream(bytes);
MS.Position = 0;
System.Drawing.Image ImgPDF = System.Drawing.Image.FromStream(MS);
ImgList.Add(ImgPDF);
}
catch (Exception)
{
}
}
}
}
}
PDFReaderObj.Close();
}
catch (Exception ex)
{
throw new Exception(ex.Message);
}
return ImgList;
}
/// <summary>
/// Write Image File
/// </summary>
private static void WriteImageFile()
{
try
{
System.Console.WriteLine("Wait for extracting image from PDF file....");
// Get a List of Image
List<System.Drawing.Image> ListImage = ExtractImages(@"C:\Users\Kishor\Desktop\TuterPDF\ASP.net\ASP.NET 3.5 Unleashed.pdf");
for (int i = 0; i < ListImage.Count; i++)
{
try
{
// Write Image File
ListImage[i].Save(AppDomain.CurrentDomain.BaseDirectory + "ImageStore\\Image" + i + ".jpeg", System.Drawing.Imaging.ImageFormat.Jpeg);
System.Console.WriteLine("Image" + i + ".jpeg write sucessfully");
}
catch (Exception)
{ }
}
}
catch (Exception ex)
{
throw new Exception(ex.Message);
}
}
#endregion
}
}