Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

extract image from word file

I have been trying the following C# code to extract image from the doc file but it is not working:

object missing = System.Reflection.Missing.Value;            
            Microsoft.Office.Interop.Word.Application oWord = new Microsoft.Office.Interop.Word.Application();
            Microsoft.Office.Interop.Word.Document oDoc = new Microsoft.Office.Interop.Word.Document();
            oWord.Visible = false;
            object str1 = "C:\\doc.doc";
            oDoc = oWord.Documents.Open(ref str1, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing);

            if (oDoc.InlineShapes.Count > 0)            {


                for (int j = 0; j < oDoc.InlineShapes.Count; j++)
                {  

                    oWord.ActiveDocument.Select();
                    oDoc.ActiveWindow.Selection.CopyAsPicture();

                    IDataObject data = Clipboard.GetDataObject();                    

                    if (data.GetDataPresent(typeof(System.Drawing.Bitmap)))
                    {
                        object bm = data.GetData(DataFormats.Bitmap);

                        Bitmap bmp;
                        bmp = (Bitmap)data.GetData(typeof(System.Drawing.Bitmap));

                        bmp.Save("C:\\test.bmp");
                    }



                }

Can anybody give the proper code for extracting the image from word file?

like image 920
fawad Avatar asked Oct 29 '11 07:10

fawad


People also ask

How do I extract an object from a Word document?

To extract the contents of the file, right-click on the file and select “Extract All” from the popup menu. On the “Select a Destination and Extract Files” dialog box, the path where the content of the .

How do I find an embedded image in Word?

In the location where the file was saved, you'll see the html document and a folder with the same name of the saved html document but with _images appended or _files appended. This is where you'll find all the embedded images.


1 Answers

using System;
using System.Drawing;
using System.IO;
using System.Threading;
using Page = System.Web.UI.Page;
using Microsoft.Office.Interop.Word;
using Microsoft.VisualBasic.Devices;
public partial class ReadIMG : System.Web.UI.Page
{   
    private Application m_word;
    private int m_i;
    protected void Page_Load(object sender, EventArgs e)
    {
        object missing = Type.Missing;
        object FileName = Server.MapPath("~/LectureOrig/Word.docx");
        object readOnly = true;
        m_word = new Application();
        m_word.Documents.Open(ref FileName,
                                ref missing, ref readOnly, ref missing, ref missing,
                                ref missing, ref missing, ref missing, ref missing,
                                ref missing, ref missing, ref missing, ref missing, ref missing,ref missing,ref missing);
        try
        {
            for (int i = 1; i <= m_word.ActiveDocument.InlineShapes.Count; i++)
            {
                m_i = i;
               // CopyFromClipboardShape();
                Thread thread = new Thread(CopyFromClipbordInlineShape);
                thread.SetApartmentState(ApartmentState.STA);
                thread.Start();
                thread.Join();
            }
        }
        finally
        {
            object save = false;
            m_word.Quit(ref save, ref missing, ref missing);
            m_word = null;
        }
    }
    protected void CopyFromClipbordInlineShape()
    {   
        InlineShape inlineShape = m_word.ActiveDocument.InlineShapes[m_i];
        inlineShape.Select();
        m_word.Selection.Copy();
        Computer computer = new Computer();
        //Image img = computer.Clipboard.GetImage();
        if (computer.Clipboard.GetDataObject() != null)
        {
            System.Windows.Forms.IDataObject data = computer.Clipboard.GetDataObject();
            if (data.GetDataPresent(System.Windows.Forms.DataFormats.Bitmap))
            {
                Image image = (Image)data.GetData(System.Windows.Forms.DataFormats.Bitmap, true);                
                image.Save(Server.MapPath("~/ImagesGet/image.gif"), System.Drawing.Imaging.ImageFormat.Gif);
                image.Save(Server.MapPath("~/ImagesGet/image.jpg"), System.Drawing.Imaging.ImageFormat.Jpeg);

            }
            else
            {
                LabelMessage.Text="The Data In Clipboard is not as image format";
            }
        }
        else
        {
            LabelMessage.Text="The Clipboard was empty";
        }
    }

Code copy from How To Exctract images from Doc (Word) file in C#?

like image 91
Ekk Avatar answered Sep 19 '22 22:09

Ekk