Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to convert the WordOpenXML property into a System.IO.Packaging.Package?

I've been experimenting with building an Outlook 2010 addin in C#, using Visual Studio 2008. I am able to obtain a Microsoft.Office.Interop.Word.DocumentClass object from a Microsoft.Office.Interop.Outlook.Inspector object, which represents the e-mail being currently edited. I understand that this DocumentClass is used in various other contexts too (usually for Microsoft Word extensions, unsurprisingly).

That class helpfully has a property called WordOpenXML, which seems to be an XML representation of all the files constituting the the .docx package which would be saved to disk when saving this Word document as a .docx.

One way this property would be very useful is if it could be converted into a System.IO.Packaging.Package object in memory; ie. deserialize it. Does anyone know whether there is an easy way to do this, or do I need to write some XML parsing code to do it manually?

like image 695
Jez Avatar asked Sep 30 '11 09:09

Jez


2 Answers

I ended up modifying some code I found online to create a method which converts WordOpenXML to a Package, which is saved to disk:

using System;
using System.Xml;
using System.Xml.XPath;
using System.Text;
using System.IO;
using System.IO.Packaging;

// [...]
// Call like this: createPackageFromWordOpenXML(wordEditor.WordOpenXML, @"C:\outputFiles\testOut.docx");

/// <summary>
/// Creates a ZIP package (ie. Word's .docx format) from a WordOpenXML string, which is saved to the file at the path specified.
/// </summary>
/// <param name="wordOpenXML">The WordOpenXML string to get the ZIP package data from.</param>
/// <param name="filePath">The path of the file to save the ZIP package to.</param>
private void createPackageFromWordOpenXML(string wordOpenXML, string filePath)
{
    string packageXmlns = "http://schemas.microsoft.com/office/2006/xmlPackage";
    Package newPkg = System.IO.Packaging.ZipPackage.Open(filePath, FileMode.Create);

    try
    {
        XPathDocument xpDocument = new XPathDocument(new StringReader(wordOpenXML));
        XPathNavigator xpNavigator = xpDocument.CreateNavigator();

        XmlNamespaceManager nsManager = new XmlNamespaceManager(xpNavigator.NameTable);
        nsManager.AddNamespace("pkg", packageXmlns);
        XPathNodeIterator xpIterator = xpNavigator.Select("//pkg:part", nsManager);

        while (xpIterator.MoveNext())
        {
            Uri partUri = new Uri(xpIterator.Current.GetAttribute("name", packageXmlns), UriKind.Relative);

            PackagePart pkgPart = newPkg.CreatePart(partUri, xpIterator.Current.GetAttribute("contentType", packageXmlns));

            // Set this package part's contents to this XML node's inner XML, sans its surrounding xmlData element.
            string strInnerXml = xpIterator.Current.InnerXml
                .Replace("<pkg:xmlData xmlns:pkg=\"" + packageXmlns + "\">", "")
                .Replace("</pkg:xmlData>", "");
            byte[] buffer = Encoding.UTF8.GetBytes(strInnerXml);
            pkgPart.GetStream().Write(buffer, 0, buffer.Length);
        }

        newPkg.Flush();
    }
    finally
    {
        newPkg.Close();
    }
}
like image 58
Jez Avatar answered Nov 01 '22 19:11

Jez


I converted this to VB.NET and added a piece to deal with binary attachments. Thanks for the excellent work!

Public Sub createDocXFromWordOpenXML(ByRef sWordOpenXML As String, ByVal sfilePath As String)
  Dim sPackageXMLNS As String = "http://schemas.microsoft.com/office/2006/xmlPackage"
  Dim docxPkg As Package
  Dim xPathDoc As XPathDocument
  Dim xpNav As XPathNavigator
  Dim xNSMgnr As XmlNamespaceManager
  Dim xpPntr As XPathNodeIterator
  Dim partURI As Uri
  Dim pkgPart As PackagePart
  Dim sInnerXML As String
  Dim bytBuffer As Byte()

  docxPkg = System.IO.Packaging.ZipPackage.Open(sfilePath, FileMode.Create)

  Try

    xPathDoc = New XPathDocument(New StringReader(sWordOpenXML))
    xpNav = xPathDoc.CreateNavigator()

    xNSMgnr = New XmlNamespaceManager(xpNav.NameTable)
    xNSMgnr.AddNamespace("pkg", sPackageXMLNS)
    xpPntr = xpNav.Select("//pkg:part", xNSMgnr)

    While xpPntr.MoveNext()
      partURI = New Uri(xpPntr.Current.GetAttribute("name", sPackageXMLNS), UriKind.Relative)

      pkgPart = docxPkg.CreatePart(partURI, xpPntr.Current.GetAttribute("contentType", sPackageXMLNS))

      sInnerXML = xpPntr.Current.InnerXml

      Select Case True
        Case sInnerXML.Contains("xmlData")
          sInnerXML = sInnerXML.Replace("<pkg:xmlData xmlns:pkg=""" & sPackageXMLNS & """>", String.Empty)
          sInnerXML = sInnerXML.Replace("</pkg:xmlData>", String.Empty)

          bytBuffer = Encoding.UTF8.GetBytes(sInnerXML)
          pkgPart.GetStream().Write(bytBuffer, 0, bytBuffer.Length)
        Case sInnerXML.Contains("binaryData")
          sInnerXML = sInnerXML.Replace("<pkg:binaryData xmlns:pkg=""" & sPackageXMLNS & """>", String.Empty)
          sInnerXML = sInnerXML.Replace("</pkg:binaryData>", String.Empty)

          bytBuffer = Convert.FromBase64String(sInnerXML)
          pkgPart.GetStream().Write(bytBuffer, 0, bytBuffer.Length)
      End Select


    End While

    docxPkg.Flush()

  Catch ex As Exception
    Throw
  Finally
    docxPkg.Close()
  End Try
End Sub
like image 36
G.D.Carter Avatar answered Nov 01 '22 18:11

G.D.Carter