When an XMLDOMDocument saves itself, how can i get it to include the XML Declaration, e.g.:
<?xml version="1.0" encoding="UTF-8" ?>
<?xml version="1.0" encoding="UTF-16" ?>
<?xml version="1.0" encoding="UCS-2" ?>
<?xml version="1.0" encoding="UCS-4" ?>
<?xml version="1.0" encoding="ISO-10646-UCS-2" ?>
<?xml version="1.0" encoding="UNICODE-1-1-UTF-8" ?>
<?xml version="1.0" encoding="UNICODE-2-0-UTF-16" ?>
<?xml version="1.0" encoding="UNICODE-2-0-UTF-8" ?>
<?xml version="1.0" encoding="US-ASCII" ?>
<?xml version="1.0" encoding="ISO-8859-1" ?>
<?xml version="1.0" encoding="WINDOWS-1250" ?>
The XMLDOMDomcument object is being created in memory (i.e. the xml is not being loaded from some outside source):
{
IXMLDOMDocument2 doc = new DOMDocument60();
//add nodes to the doc
...
doc.Save(saveTarget);
}
Without the xml declaration you only get the body xml, e.g.:
<Customer>
...
</Customer>
rather than the full XML document:
<?xml version="1.0" encoding="US-ASCII" ?>
<Customer>
...
</Customer>
How can i control the encoding the XMLDOMDocument will use when it saves to a stream?
If the XML declaration is present in the XML, it must be placed as the first line in the XML document. If the XML declaration is included, it must contain version number attribute. The Parameter names and values are case-sensitive. The names are always in lower case.
XML documents can contain an XML declaration that if present, must be the first construct in the document. An XML declaration is made up of as many as three name/value pairs, syntactically identical to attributes. The three attributes are a mandatory version attribute and optional encoding and standalone attributes.
1 XML syntax. The first line of an XML document should be a declaration that this is an XML document, including the version of XML being used.
(a) <? xml version = “1.0”?> is a correct syntax of the declaration.
You need to use a MXXMLWriter60, instead of saving it directly. Sorry I don't have a C# example, but here is the VB.Net equivalent. See IMXWriter for details.
' Create and load a DOMDocument object.
Dim xmlDoc As New DOMDocument60
xmlDoc.loadXML("<doc><one>test1</one><two>test2</two></doc>")
' Set properties on the XML writer - including BOM, XML declaration and encoding
Dim wrt As New MXXMLWriter60
wrt.byteOrderMark = True
wrt.omitXMLDeclaration = False
wrt.encoding = "US-ASCII"
wrt.indent = True
' Set the XML writer to the SAX content handler.
Dim rdr As New SAXXMLReader60
Set rdr.contentHandler = wrt
Set rdr.dtdHandler = wrt
Set rdr.errorHandler = wrt
rdr.putProperty "http://xml.org/sax/properties/lexical-handler", wrt
rdr.putProperty "http://xml.org/sax/properties/declaration-handler", wrt
' Now pass the DOM through the SAX handler, and it will call the writer
rdr.parse xmlDoc
' Let the writer do its thing
Dim iFileNo As Integer
iFileNo = FreeFile
Open App.Path + "\saved.xml" For Output As #iFileNo
Print #iFileNo, wrt.output
Close #iFileNo
You should be able to acheive the same thing by using the CreateProcessingInstruction method.
Example;
' Create and load a DOMDocument object.
Dim xmlDoc As New DOMDocument
Dim xRecords As IXMLDOMElement
' Make the Records the root node and add instructional line to XML file.
Set xRecords = xmlDoc.createElement("HeuristicFiler")
xmlDoc.appendChild xmlDoc.createProcessingInstruction("xml", "version=""1.0"" encoding=""UTF-8"" standalone=""yes""")
xmlDoc.appendChild xRecords
' Add various records
' Save the XML File
xmlDoc.Save strFilePath
Here is a the same thing in c++
bool PPrintDOMDocument (IXMLDOMDocument* pDoc, IStream* pStream)
{
// Create the writer
CComPtr <IMXWriter> pMXWriter;
CHK_FAILED(pMXWriter.CoCreateInstance(__uuidof (MXXMLWriter), NULL, CLSCTX_ALL));
CComPtr <ISAXContentHandler> pISAXContentHandler;
CHK_FAILED (pMXWriter.QueryInterface(&pISAXContentHandler));
CComPtr <ISAXErrorHandler> pISAXErrorHandler;
CHK_FAILED(pMXWriter.QueryInterface (&pISAXErrorHandler));
CComPtr <ISAXDTDHandler> pISAXDTDHandler;
CHK_FAILED (pMXWriter.QueryInterface (&pISAXDTDHandler));
CHK_FAILED (pMXWriter->put_omitXMLDeclaration (VARIANT_FALSE));
CHK_FAILED (pMXWriter->put_standalone (VARIANT_TRUE));
CHK_FAILED (pMXWriter->put_indent (VARIANT_TRUE));
CHK_FAILED (pMXWriter->put_encoding (L"UTF-8"));
CComPtr <ISAXXMLReader> pSAXReader;
CHK_FAILED (pSAXReader.CoCreateInstance (__uuidof (SAXXMLReader), NULL, CLSCTX_ALL));
CHK_FAILED (pSAXReader ->putContentHandler (pISAXContentHandler));
CHK_FAILED (pSAXReader ->putDTDHandler (pISAXDTDHandler));
CHK_FAILED (pSAXReader ->putErrorHandler (pISAXErrorHandler));
CHK_FAILED (pSAXReader ->putProperty (
L"http://xml.org/sax/properties/lexical-handler", CComVariant (pMXWriter)));
CHK_FAILED (pSAXReader ->putProperty (
L"http://xml.org/sax/properties/declaration-handler", CComVariant (pMXWriter)));
if (! SUCCEEDED (pMXWriter ->put_output (CComVariant (pStream)))) return false;
if (! SUCCEEDED (pSAXReader->parse(CComVariant (pDoc)))) return false;
pMXWriter->flush();
}
...and you will need a suitable IStream...
//implement filestream that derives from IStream
class FileStream : public IStream
{
FileStream(HANDLE hFile)
{
_refcount = 1;
_hFile = hFile;
}
~FileStream()
{
if (_hFile != INVALID_HANDLE_VALUE)
{
::CloseHandle(_hFile);
}
}
public:
HRESULT static OpenFile(LPCWSTR pName, IStream ** ppStream, bool fWrite)
{
HANDLE hFile = ::CreateFileW(pName,
fWrite ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ,
NULL,
fWrite ? CREATE_ALWAYS : OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NULL);
if (hFile == INVALID_HANDLE_VALUE)
return HRESULT_FROM_WIN32(GetLastError());
*ppStream = new FileStream(hFile);
if(*ppStream == NULL)
CloseHandle(hFile);
return S_OK;
}
virtual HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, void ** ppvObject)
{
if (iid == __uuidof(IUnknown)
|| iid == __uuidof(IStream)
|| iid == __uuidof(ISequentialStream))
{
*ppvObject = static_cast<IStream*>(this);
AddRef();
return S_OK;
} else
return E_NOINTERFACE;
}
virtual ULONG STDMETHODCALLTYPE AddRef(void)
{
return (ULONG)InterlockedIncrement(&_refcount);
}
virtual ULONG STDMETHODCALLTYPE Release(void)
{
ULONG res = (ULONG) InterlockedDecrement(&_refcount);
if (res == 0)
delete this;
return res;
}
// ISequentialStream Interface
public:
virtual HRESULT STDMETHODCALLTYPE Read(void* pv, ULONG cb, ULONG* pcbRead)
{
BOOL rc = ReadFile(_hFile, pv, cb, pcbRead, NULL);
return (rc) ? S_OK : HRESULT_FROM_WIN32(GetLastError());
}
virtual HRESULT STDMETHODCALLTYPE Write(void const* pv, ULONG cb, ULONG* pcbWritten)
{
BOOL rc = WriteFile(_hFile, pv, cb, pcbWritten, NULL);
return rc ? S_OK : HRESULT_FROM_WIN32(GetLastError());
}
// IStream Interface
public:
virtual HRESULT STDMETHODCALLTYPE SetSize(ULARGE_INTEGER)
{
return E_NOTIMPL;
}
virtual HRESULT STDMETHODCALLTYPE CopyTo(IStream*, ULARGE_INTEGER, ULARGE_INTEGER*,
ULARGE_INTEGER*)
{
return E_NOTIMPL;
}
virtual HRESULT STDMETHODCALLTYPE Commit(DWORD)
{
return E_NOTIMPL;
}
virtual HRESULT STDMETHODCALLTYPE Revert(void)
{
return E_NOTIMPL;
}
virtual HRESULT STDMETHODCALLTYPE LockRegion(ULARGE_INTEGER, ULARGE_INTEGER, DWORD)
{
return E_NOTIMPL;
}
virtual HRESULT STDMETHODCALLTYPE UnlockRegion(ULARGE_INTEGER, ULARGE_INTEGER, DWORD)
{
return E_NOTIMPL;
}
virtual HRESULT STDMETHODCALLTYPE Clone(IStream **)
{
return E_NOTIMPL;
}
virtual HRESULT STDMETHODCALLTYPE Seek(LARGE_INTEGER liDistanceToMove, DWORD dwOrigin,
ULARGE_INTEGER* lpNewFilePointer)
{
DWORD dwMoveMethod;
switch(dwOrigin)
{
case STREAM_SEEK_SET:
dwMoveMethod = FILE_BEGIN;
break;
case STREAM_SEEK_CUR:
dwMoveMethod = FILE_CURRENT;
break;
case STREAM_SEEK_END:
dwMoveMethod = FILE_END;
break;
default:
return STG_E_INVALIDFUNCTION;
break;
}
if (SetFilePointerEx(_hFile, liDistanceToMove, (PLARGE_INTEGER) lpNewFilePointer,
dwMoveMethod) == 0)
return HRESULT_FROM_WIN32(GetLastError());
return S_OK;
}
virtual HRESULT STDMETHODCALLTYPE Stat(STATSTG* pStatstg, DWORD grfStatFlag)
{
if (GetFileSizeEx(_hFile, (PLARGE_INTEGER) &pStatstg->cbSize) == 0)
return HRESULT_FROM_WIN32(GetLastError());
return S_OK;
}
private:
HANDLE _hFile;
LONG _refcount;
};
...and an explanation of what is going on.
Here's the same thing in Delphi:
//writes the document to the WideString as UTF-16 (since it's a WideString)
class function TXMLHelper.WriteDocumentToString(
const Document60: DOMDocument60): WideString;
var
writer: IMXWriter;
reader: IVBSAXXMLReader;
bstr: OleVariant;
begin
{
From http://support.microsoft.com/kb/275883
INFO: XML Encoding and DOM Interface Methods
MSXML has native support for the following encodings:
UTF-8
UTF-16
UCS-2
UCS-4
ISO-10646-UCS-2
UNICODE-1-1-UTF-8
UNICODE-2-0-UTF-16
UNICODE-2-0-UTF-8
It also recognizes (internally using the WideCharToMultibyte
API function for mappings) the following encodings:
US-ASCII
ISO-8859-1
ISO-8859-2
ISO-8859-3
ISO-8859-4
ISO-8859-5
ISO-8859-6
ISO-8859-7
ISO-8859-8
ISO-8859-9
WINDOWS-1250
WINDOWS-1251
WINDOWS-1252
WINDOWS-1253
WINDOWS-1254
WINDOWS-1255
WINDOWS-1256
WINDOWS-1257
WINDOWS-1258
}
if Document60 = nil then
raise Exception.Create('TXMLHelper.WriteDocument: Document60 cannot be nil');
// Set properties on the XML writer
// - including BOM, XML declaration and encoding
writer := CoMXXMLWriter60.Create;
writer.byteOrderMark := False; //Don't write the BOM. Has no effect for BSTR output, but still: we don't want it. Determines whether to write the Byte Order Mark (BOM). The byteOrderMark property has no effect for BSTR or DOM output. (Default True)
writer.omitXMLDeclaration := False; //Don't skip the xml declaration. Forces the IMXWriter to skip the XML declaration. Useful for creating document fragments. (Default False)
// writer.encoding := 'UTF-8'; //Sets and gets encoding for the output. (Default "UTF-16")
writer.indent := True; //Sets whether to indent output. (Default False)
writer.standalone := True; //Sets the value of the standalone attribute in the XML declaration to "yes" or "no".
// Set the XML writer to the SAX content handler.
reader := CoSAXXMLReader60.Create;
reader.contentHandler := writer as IVBSAXContentHandler;
reader.dtdHandler := writer as IVBSAXDTDHandler;
reader.errorHandler := writer as IVBSAXErrorHandler;
reader.putProperty('http://xml.org/sax/properties/lexical-handler', writer);
reader.putProperty('http://xml.org/sax/properties/declaration-handler', writer);
// Now pass the DOM through the SAX handler, and it will call the writer
reader.parse(Document60);
// Let the writer do its thing
bstr := writer.output;
Result := bstr;
end;
And a version of writing to a stream:
class procedure TXMLHelper.WriteDocumentToStream(const Document60: DOMDocument60; const stream: IStream; Encoding: string);
var
writer: IMXWriter;
reader: IVBSAXXMLReader;
begin
{
From http://support.microsoft.com/kb/275883
INFO: XML Encoding and DOM Interface Methods
MSXML has native support for the following encodings:
UTF-8
UTF-16
UCS-2
UCS-4
ISO-10646-UCS-2
UNICODE-1-1-UTF-8
UNICODE-2-0-UTF-16
UNICODE-2-0-UTF-8
It also recognizes (internally using the WideCharToMultibyte API function for mappings) the following encodings:
US-ASCII
ISO-8859-1
ISO-8859-2
ISO-8859-3
ISO-8859-4
ISO-8859-5
ISO-8859-6
ISO-8859-7
ISO-8859-8
ISO-8859-9
WINDOWS-1250
WINDOWS-1251
WINDOWS-1252
WINDOWS-1253
WINDOWS-1254
WINDOWS-1255
WINDOWS-1256
WINDOWS-1257
WINDOWS-1258
}
if Document60 = nil then
raise Exception.Create('TXMLHelper.WriteDocument: Document60 cannot be nil');
if stream = nil then
raise Exception.Create('TXMLHelper.WriteDocument: stream cannot be nil');
// Set properties on the XML writer - including BOM, XML declaration and encoding
writer := CoMXXMLWriter60.Create;
writer.byteOrderMark := True; //Determines whether to write the Byte Order Mark (BOM). The byteOrderMark property has no effect for BSTR or DOM output. (Default True)
writer.omitXMLDeclaration := False; //Forces the IMXWriter to skip the XML declaration. Useful for creating document fragments. (Default False)
if Encoding <> '' then
writer.encoding := Encoding; //Sets and gets encoding for the output. (Default "UTF-16")
writer.indent := True; //Sets whether to indent output. (Default False)
writer.standalone := True;
// Set the XML writer to the SAX content handler.
reader := CoSAXXMLReader60.Create;
reader.contentHandler := writer as IVBSAXContentHandler;
reader.dtdHandler := writer as IVBSAXDTDHandler;
reader.errorHandler := writer as IVBSAXErrorHandler;
reader.putProperty('http://xml.org/sax/properties/lexical-handler', writer);
reader.putProperty('http://xml.org/sax/properties/declaration-handler', writer);
writer.output := stream; //The resulting document will be written into the provided IStream
// Now pass the DOM through the SAX handler, and it will call the writer
reader.parse(Document60);
writer.flush;
end;
Note: Any code is released into the public domain. No attribution required.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With