I am looking for a basic example of how to setup XQilla to use an XPath query on a std::string containing XML. The example on the XQilla site appears to use XQuery on either a file or an URL.
The XML Document Object Model (DOM) contains methods that allow you to use XML Path Language (XPath) navigation to query information in the DOM. You can use XPath to find a single, specific node or to find all nodes that match some criteria.
This document describes the interface for using XPath in JavaScript internally, in extensions, and from websites. Mozilla implements a fair amount of the DOM 3 XPath, which means that XPath expressions can be run against both HTML and XML documents.
This is an old question, but I looked for an answer to this and couldn't find one my self. Now I solved it and thought I should share the code.
--edit, licence if needed for below code is shared under MIT and BSD or what ever...
#ifndef JOPPLI_XPATHEXTRACTER_H
#define JOPPLI_XPATHEXTRACTER_H
#include <string>
#include <vector>
#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/dom/DOM.hpp>
namespace Joppli
{
using namespace xercesc;
class XPathExtracter
{
public:
typedef std::vector<std::string> Result;
XPathExtracter();
~XPathExtracter();
DOMDocument * getDocument(const std::string & xml);
void extract(const std::string & query, DOMDocument * document,
Result * result);
protected:
DOMLSParser * parser;
DOMImplementation * xqillaImplementation;
private:
static int count;
};
}
#endif
#include "XPathExtracter.h"
#include <xercesc/framework/MemBufInputSource.hpp>
#include <xqilla/xqilla-dom3.hpp>
namespace Joppli
{
XPathExtracter::XPathExtracter()
{
// Initialise Xerces-C and XQilla using XQillaPlatformUtils
if(count++ == 0)
XQillaPlatformUtils::initialize();
// Get the XQilla DOMImplementation object
this->xqillaImplementation =
DOMImplementationRegistry::getDOMImplementation(X("XPath2 3.0"));
this->parser = this->xqillaImplementation->createLSParser(
DOMImplementationLS::MODE_SYNCHRONOUS, 0);
}
XPathExtracter::~XPathExtracter()
{
this->parser->release();
if(--count == 0)
XQillaPlatformUtils::terminate();
}
DOMDocument * XPathExtracter::getDocument(const std::string & xml)
{
/*
// An alternative to simply setting the string input, as shown below
MemBufInputSource * memBuf = new MemBufInputSource(
(const XMLByte *) xml.c_str(),
xml.size(),
"xml (in memory)");
DOMLSInput * input = this->xqillaImplementation->createLSInput();
input->setByteStream(memBuf);
DOMDocument * document = parser->parse(input);
input->release();
delete memBuf;
return document;
*/
DOMLSInput * input = this->xqillaImplementation->createLSInput();
XMLCh * stringData = XMLString::transcode(xml.c_str());
input->setStringData(stringData);
DOMDocument * document = parser->parse(input);
input->release();
delete stringData;
return document;
}
void XPathExtracter::extract(const std::string & query,
DOMDocument * document, Result * result)
{
// Parse an XPath 2 expression
AutoRelease<DOMXPathExpression> expression(
document->createExpression(X(query.c_str()), 0));
// Execute the query
AutoRelease<DOMXPathResult> xQillaResult(
expression->evaluate(
document,
DOMXPathResult::ITERATOR_RESULT_TYPE, 0));
// Iterate over the results
while(xQillaResult->iterateNext())
{
char * content = XMLString::transcode(
xQillaResult->getStringValue());
result->push_back(content);
delete content;
}
}
int XPathExtracter::count = 0;
}
#include <iostream>
#include "XPathExtracter.h"
int main(void)
{
std::string * body = new std::string;
// ... (logic to fill the body string with an xml/html value)
// Extract
using namespace xercesc;
Joppli::XPathExtracter * driver = new Joppli::XPathExtracter();
Joppli::XPathExtracter::Result * results = new Joppli::XPathExtracter::Result;
DOMDocument * document = driver->getDocument(*body);
driver->extract("html/head//title", document, results);
driver->extract("html/head//meta//@name", document, results);
driver->extract("html//body//a[@id=\"link_mx_es\"]", document, results);
for(const auto & result : *results)
std::cout << result << std::endl;
delete results;
delete driver;
delete body;
return 0;
}
I ran this code through valgrind and it doesn't show any leaks.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With