Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Ways to parse XML in C++ (Win32)

I'm looking for a way to parse XML in C++ in Windows and I've found a few such as MSXML, Xerces, TinyXml etc but I'm wondering which is best in terms of performance and features. My requirements are that it must be able to be static linked or have the source included in the project itself and must not require any additional toolits such as boost. MSXML would be the obvious choice as it's an MS library but it seems to be a COM library and rather convoluted to actually get any use out of it.

Does anyone have any suggestions as to something quick and simple to use?

Thanks, J

like image 598
JWood Avatar asked Jun 20 '10 17:06

JWood


2 Answers

I used libxml with success. The API is a bit confusing and complicated, but once you get it it works pretty good. Besides it is stuffed with functionality, so if you need that, go with libxml. You dont have to worry about bloated binaries since you can only link the parts you need. You dont need to include the complete libxml if you only need to parse xml and dont use the xpath stuff for example

like image 55
Henri Avatar answered Oct 21 '22 23:10

Henri


Since all supported Windows version (including Windows XP SP3) includes MSXML 6.0, you should use MS XML 6.0. You should implement own ISAXContentHandler class and usually I implement an ISequentialStream class.

An ISequentialStream implementation for parse:

class MySequentialStream : public ISequentialStream
{
public:
  MySequentialStream( istream &is )
    : is(is), ref_count(0)
  {
    InitializeCriticalSection( &this->critical_section );
  };
  virtual ~MySequentialStream( void )
  {
    DeleteCriticalSection( &this->critical_section );
  }
  virtual HRESULT __stdcall QueryInterface( const IID &riid, void ** ppvObject )
  {
    if ( riid == IID_ISequentialStream )
    {
      *ppvObject = static_cast<void*>(this);
      this->AddRef();
      return S_OK;
    }
    if (riid == IID_IUnknown)
    {
      *ppvObject = static_cast<void*>(this);
      this->AddRef();
      return S_OK;
    }
    *ppvObject = 0;
    return E_NOINTERFACE;
  };
  virtual ULONG __stdcall AddRef( void )
  {
    return InterlockedIncrement(&this->ref_count);
  };
  virtual ULONG __stdcall Release( void )
  {
    ULONG nRefCount = InterlockedDecrement(&this->ref_count);
    if ( nRefCount == 0 ) delete this;
    return nRefCount;
  };    
  virtual HRESULT __stdcall Read( void *pv, ULONG cb, ULONG *pcbRead )
  {
    EnterCriticalSection( &this->critical_section );
    this->is.read( reinterpret_cast<char*>(pv), cb );
    *pcbRead = static_cast<ULONG>( this->is.gcount() );
    LeaveCriticalSection( &this->critical_section );
    return S_OK;
  };
  virtual HRESULT __stdcall Write( void const *pv, ULONG cb, ULONG *pcbWritten )
  {
    *pcbWritten = cb;
    return S_OK;
  };    
private:
  istream &is;
  CRITICAL_SECTION critical_section;
  ULONG ref_count;
};

You should implement an ISAXContentHandler class, too (of course you should fill the methods when you needed):

class MyContentHandler : public ISAXContentHandler
{
public:
  MyContentHandler( void )
    : ref_count(0)
  {};
  virtual ~MyContentHandler( void ) {};
  virtual HRESULT __stdcall QueryInterface( const IID &riid, void ** ppvObject )
  {
    if ( riid == __uuidof(ISAXContentHandler) )
    {
      *ppvObject = static_cast<void*>(this);
      this->AddRef();
      return S_OK;
    }
    if (riid == IID_IUnknown)
    {
      *ppvObject = static_cast<void*>(this);
      this->AddRef();
      return S_OK;
    }
    *ppvObject = 0;
    return E_NOINTERFACE;
  };
  virtual ULONG __stdcall AddRef( void )
  {
    return InterlockedIncrement(&this->ref_count);
  };
  virtual ULONG __stdcall Release( void )
  {
    ULONG nRefCount = InterlockedDecrement(&this->ref_count);
    if ( nRefCount == 0 ) delete this;
    return nRefCount;
  };    
  virtual HRESULT __stdcall putDocumentLocator( ISAXLocator * pLocator) { return S_OK; };
  virtual HRESULT __stdcall startDocument( void ) { return S_OK; };
  virtual HRESULT __stdcall endDocument( void ) { return S_OK; };
  virtual HRESULT __stdcall startPrefixMapping( const wchar_t *pwchPrefix, int cchPrefix, const wchar_t *pwchUri, int cchUri ) { return S_OK; };
  virtual HRESULT __stdcall endPrefixMapping( const wchar_t *pwchPrefix, int cchPrefix) { return S_OK; };
  virtual HRESULT __stdcall startElement( const wchar_t *pwchNamespaceUri, int cchNamespaceUri, const wchar_t *pwchLocalName, int cchLocalName, const wchar_t *pwchQName, int cchQName, ISAXAttributes *pAttributes ) { return S_OK; };
  virtual HRESULT __stdcall endElement( const wchar_t *pwchNamespaceUri, int cchNamespaceUri, const wchar_t *pwchLocalName, int cchLocalName, const wchar_t *pwchQName, int cchQName) { return S_OK; };
  virtual HRESULT __stdcall characters( const wchar_t *pwchChars, int cchChars) { return S_OK; };
  virtual HRESULT __stdcall ignorableWhitespace( const wchar_t *pwchChars, int cchChars) { return S_OK; };
  virtual HRESULT __stdcall processingInstruction( const wchar_t *pwchTarget, int cchTarget, const wchar_t *pwchData, int cchData) { return S_OK; };
  virtual HRESULT __stdcall skippedEntity( const wchar_t *pwchName, int cchName) { return S_OK; };
protected:
  ULONG ref_count;
};

Then you could easily parse a stream:

bool ParseStream( istream &is )
{
  if ( FAILED(CoInitialize(NULL)) )
   return false;

  ISAXXMLReader * reader = 0;
  if ( FAILED( CoCreateInstance( __uuidof(SAXXMLReader60), NULL, CLSCTX_ALL, __uuidof(ISAXXMLReader),(void**) &reader ) ) )
  {
   CoUninitialize()
   return false;
  }

  ISequentialStream * my_stream = new MySequentialStream(is);
  ISAXContentHandler * content_handler = new MyContentHandler;

  my_stream->AddRef();
  content_handler->AddRef();

  if ( FAILED( reader->putContentHandler( content_handler ) ) )
  {
   my_stream->Release();
   content_handler->Release();
   reader->Release();
   return false;
  }

  VARIANT var;
  var.vt = VT_UNKNOWN;
  var.punkVal = my_stream;
  VARIANT_BOOL success = FALSE;

  bool value = SUCCEEDED( reader->parse( var ) );

  my_stream->Release();
  content_handler->Release();
  reader->Release();
  return ( value && ( success != VARIANT_FALSE ) );
}
like image 27
Naszta Avatar answered Oct 22 '22 01:10

Naszta