Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

XML parsing, TXMLDocument

I have a problem with parsing XML.

How to get field values se_url and phrase?
I need to get link1_1, link1_2, key1, link2_1, link2_2, key2... which are in se_url and phrase.

I did not find in Google how to do it (also did not find a manual on how to work with TXMLDocument).

<doc>
  <date2>20120214</date2>
  <date1>20120214</date1>
  <data count="116">
    <row>
      <search_engines count="2">
        <search_engine>
          <se_url>link1_1</se_url>
          <se_page>1</se_page>
          <se_id>2</se_id>
        </search_engine>
        <search_engine>
          <se_url>link1_2</se_url>
          <se_page>1</se_page>
          <se_id>3</se_id>
        </search_engine>
      </search_engines>
      <denial>0.4889</denial>
      <visits>45</visits>
      <page_views>52</page_views>
      <phrase>key1</phrase>
      <visit_time>126</visit_time>
      <depth>1.1556</depth>
    </row>
    <row>
      <search_engines count="2">
        <search_engine>
          <se_url>link2_1</se_url>
          <se_page>1</se_page>
          <se_id>3</se_id>
        </search_engine>
        <search_engine>
          <se_url>link2_2</se_url>
          <se_page>1</se_page>
          <se_id>6</se_id>
        </search_engine>
      </search_engines>
      <denial>0.5714</denial>
      <visits>42</visits>
      <page_views>50</page_views>
      <phrase>key2</phrase>
      <visit_time>109</visit_time>
      <depth>1.1905</depth>
    </row>
  </data>
</doc>
like image 780
dedoki Avatar asked Feb 14 '12 18:02

dedoki


People also ask

How XML data is parsed?

The parser reads an XML document from the beginning to the end. When it encounters a node in the document, it generates an event that triggers the corresponding event handler for that node. The handler thus applies the application logic to process the node specifically.

What is node in XML parsing?

According to the XML DOM, everything in an XML document is a node: The entire document is a document node. Every XML element is an element node. The text in the XML elements are text nodes. Every attribute is an attribute node.


2 Answers

Try this:

uses ComObj, MSXML;

procedure TForm1.Button1Click(Sender: TObject);
var
  xml: IXMLDOMDocument;
  node: IXMLDomNode;
  nodes_row, nodes_se: IXMLDomNodeList;
  i, j: Integer;
  url: string;
begin
  // put url or file name
  url := 'http://softez.pp.ua/gg.xml';

  xml := CreateOleObject('Microsoft.XMLDOM') as IXMLDOMDocument;
  xml.async := False;
  xml.load(url); // or use loadXML to load XML document using a supplied string
  if xml.parseError.errorCode <> 0 then
    raise Exception.Create('XML Load error:' + xml.parseError.reason);

  Memo1.Clear;
  nodes_row := xml.selectNodes('/doc/data/row');
  for i := 0 to nodes_row.length - 1 do
  begin
    node := nodes_row.item[i];
    Memo1.Lines.Add('phrase=' + node.selectSingleNode('phrase').text);
    nodes_se := node.selectNodes('search_engines/search_engine/se_url');
    for j := 0 to nodes_se.length - 1 do
    begin
      node := nodes_se.item[j];
      Memo1.Lines.Add('url=' + node.text);
    end;
    Memo1.Lines.Add('--------------');
  end;
end;

Result:

phrase=key1
url=link1_1
url=link1_2
--------------
phrase=key2
url=link2_1
url=link2_2
--------------

A Reference to IXMLDOMDocument

like image 65
kobik Avatar answered Oct 08 '22 02:10

kobik


If you first include these 3 general purpose library routines ....

uses XMLDoc, XMLIntf, xmldom;

function CreateXMLDocument( var Owner1: TComponent): TXMLDocument;
begin
Owner1 := TComponent.Create( nil);
result  := TXMLDocument.Create( Owner1);
result.Options := [doNodeAutoCreate, doNodeAutoIndent, doAttrNull,
                   doAutoPrefix, doNamespaceDecl];
result.DOMVendor := GetDOMVendor( 'MSXML');
end;

function XPATHSelect( const FocusNode: IXMLNode; const sXPath: string): TArray<IXMLNode>;
var
  DomNodeSelect: IDomNodeSelect;
  DOMNode      : IDomNode;
  DocAccess    : IXmlDocumentAccess;
  Doc          : TXmlDocument;
  DOMNodes     : IDOMNodeList;
  iDOMNode     : integer;
begin
SetLength( result, 0);
if assigned( FocusNode) and
   Supports( FocusNode.DOMNode, IDomNodeSelect, DomNodeSelect) then
    DOMNodes := DomNodeSelect.SelectNodes( sXPath);
if not assigned( DOMNodes) then exit;
SetLength( result, DOMNodes.Length);
for iDOMNode := 0 to DOMNodes.Length - 1 do
  begin
  Doc := nil;
  DOMNode := DOMNodes.item[iDOMNode];
  if Supports( DOMNode, IXmlDocumentAccess, DocAccess) then
    Doc := DocAccess.DocumentObject;
  result[ iDOMNode] := TXmlNode.Create( DOMNode, nil, Doc) as IXMLNode;
  end
end;


function XPATHSelectFirst( const FocusNode: IXMLNode; const sXPath: string; var SelectedNode: IXMLNode): boolean;
var
  DomNodeSelect: IDomNodeSelect;
  DOMNode      : IDomNode;
  DocAccess    : IXmlDocumentAccess;
  Doc          : TXmlDocument;
begin
SelectedNode := nil;
if assigned( FocusNode) and
   Supports( FocusNode.DOMNode, IDomNodeSelect, DomNodeSelect) then
  DOMNode := DomNodeSelect.selectNode( sXPath);
if assigned( DOMNode) and
   Supports( DOMNode.OwnerDocument, IXmlDocumentAccess, DocAccess) then
  Doc := DocAccess.DocumentObject;
if Assigned( DOMNode) then
  SelectedNode := TXmlNode.Create( DOMNode, nil, Doc);
result := assigned( SelectedNode)
end;

Then A much neater solution is ...

procedure TForm2.btn1Click(Sender: TObject);
const
  DocumentSource =  'http://softez.pp.ua/gg.xml';
var
  Doc: IXMLDocument;
  DocOwner: TComponent;
  RowNode, PhraseNode, UrlNode: IXMLNode;

  procedure PutLn( const LineFmt: string; const Args: array of const);
  begin
  memo2.Lines.Add( Format( LineFmt, Args))
  end;

begin
memo2.Clear;
Doc := CreateXMLDocument( DocOwner);
Doc.LoadFromFile( DocumentSource);
for RowNode in XPATHSelect( Doc.DocumentElement, '//row[phrase]') do
  begin
  if not XPATHSelectFirst( RowNode, 'phrase', PhraseNode) then continue;
  PutLn( 'phrase=%s', [PhraseNode.NodeValue]);
  for UrlNode in XPATHSelect( RowNode, 'search_engines/search_engine/se_url') do
    PutLn( 'url=%s', [UrlNode.NodeValue]);
  PutLn('--------------',[])
  end;
DocOwner.Free;
end;

This was tested on Delphi 2010 and works a treat.

like image 34
Sean B. Durkin Avatar answered Oct 08 '22 02:10

Sean B. Durkin