Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

how to get json results from xml document in marklogic

I am trying to return an XML document in JSON format using a 2-tier server side javascript application. I know MarkLogic can easily switch between JSON and XML formats using the REST api.

However, the following returns xml:

cts.search('something', ['format-xml']) // options may be omitted

but this returns nothing:

cts.search('something', ['format-json'])

According to the documentation I think I have used the function correctly.

Is this possible using server side javascript?

Update:

Using transformToJsonObject as follows:

var json = require('/MarkLogic/json/json.xqy');
var doc = fn.doc('/content/rss/72eb9bf835521446be8d2176e1ac9d22.xml')
var jsonDoc = json.transformToJsonObject(doc, json.config('custom'));
jsonDoc

I get the following error:

500 Internal Server Error

XDMP-STACKOVERFLOW: Stack overflow
in /MarkLogic/json/custom.xqy, at 493:23,
in json-custom:is-ignore-attribute#2(json:object(<json:object xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:json="http://marklogic.com/xdmp/json"><json:entry key="element-qname-from-json-name"><json:value xsi:t...</json:object>...XDMP-ATOMIZEFUNC: (err:FOTY0013) Functions cannot be atomized...), fn:doc("/content/rss/72eb9bf835521446be8d2176e1ac9d22.xml")/newsitem/article/*:html/*:head/*:script[5]/*:script/*:script/*:script/*:link/*:script/*:script/*:script/*:script/*:script/*:script/*:style/*:script/*:script/*:script/*:script/*:script/*:script/*:script/*:script/*:script/*:script/*:link/*:script/*:script/*:noscript/*:link/*:meta/*:body/*:div/*:div/*:div/*:script/*:div/*:div/*:script/*:script/*:div/*:div/*:script/*:script/*:script/*:script/*:noscript/*:p/*:header/*:div/*:div/*:div/*:a/*:section/*:div/*:h2/*:ul/*:li/*:a/*:li/*:a/*:div/*:div/*:a/*:img/*:span/*:script/*:nav/*:div/*:h2/*:ul/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:span/*:div/*:a/*:form/*:div/*:label/*:div/*:script/*:div/*:div/*:div/*:div/*:div/*:a/*:svg/*:title/*:h2/*:a/*:div/*:button/*:div/*:ul/*:li/*:a/*:span/*:li/*:a/*:span/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:div/*:nav/*:a/*:span/*:span/*:ul/*:li/*:a/*:span/*:li/*:a/*:span/*:span/*:div/*:div/*:script/*:script/*:div/*:div/*:span/*:span/*:a/*:div/*:div/*:script/*:script/*:div/*:div/*:div/*:span/*:span/*:a/*:div/*:div/*:script/*:script/*:div/*:div/*:div/*:div/*:h1/*:div/*:ul/*:li/*:div/*:li/*:span/*:a/*:div/*:figure/*:span/*:img/*:span/*:span/*:figcaption/*:span/*:span/*:p/*:p/*:p/*:p/*:p/*:div/*:div/*:script/*:script/*:h2/*:p/*:p/*:figure/*:span/*:div/*:span/*:span/*:figcaption/*:span/*:span/*:p/*:p/*:p/*:p/*:p/*:p/*:p/*:p/*:div/*:a/*:h2/*:a/*:ul/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:div/*:div/*:div/*:h2/*:div/*:ul/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:div/*:div/*:div/*:h2/*:div/*:ul/*:li/*:div/*:div/*:a/*:div/*:span/*:p/*:div/*:div/*:div/*:div/*:script/*:script/*:div/*:div/*:script/*:script/*:div/*:div/*:script/*:script/*:div/*:div/*:h2/*:a/*:strong/*:p/*:div/*:a/*:strong/*:div/*:a/*:strong/*:div/*:div/*:div/*:script/*:script/*:div/*:div/*:h2/*:div/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:div/*:div/*:span/*:span/*:span/*:h3/@class) [1.0-ml]
$config = json:object(<json:object xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:json="http://marklogic.com/xdmp/json"><json:entry key="element-qname-from-json-name"><json:value xsi:t...</json:object>...XDMP-ATOMIZEFUNC: (err:FOTY0013) Functions cannot be atomized...)
$a = fn:doc("/content/rss/72eb9bf835521446be8d2176e1ac9d22.xml")/newsitem/article/*:html/*:head/*:script[5]/*:script/*:script/*:script/*:link/*:script/*:script/*:script/*:script/*:script/*:script/*:style/*:script/*:script/*:script/*:script/*:scr
like image 886
chriskelly Avatar asked Oct 27 '15 22:10

chriskelly


1 Answers

The format-xml and format-json options to cts.search() filter the search results by those formats; they don't do any transformations (see the cts.search() options documentation).

There are many ways to transform XML into JSON using MarkLogic; the simplest is probably the json XQuery library, specifically json:transform-to-json-object(). You can use that library in server-side JS like this:

var json = require('/MarkLogic/json/json.xqy');

var doc = cts.doc('/triplestore/97a5ab126bddeea0.xml');
var jsonDoc = json.transformToJsonObject(doc, json.config('custom'));

You can use json.config() to configure and customize the transformation.

cts.search() returns an Iterator, so you'll need a for-of loop (or some kind of accumulator function) to get the actual XML documents, which you could then transform.

Update:

That error could be a bug in the JSON library, but that's a very deep HTML path; and I don't think it makes much sense to transform HTML elements into JSON object properties. Instead, we'll serialize the HTML, and add the string back to our JSON object.

Here's an example transformation of search results; showing how to unwrap the Iterator, customize the JSON transformations, serialize XHTML content for use within JSON, etc.

Notes:

  • this uses fn.subsequence to limit the Iterator to the first 10 results.
  • I'm serializing the excluded <html/> elements to a string (using the xpath() method of the Node object and xdmp.quote(), and adding that to the JSON object as escapedContent.

Here's the combined example; you can run this in MarkLogic QConsole:

var json = require('/MarkLogic/json/json.xqy');
var conf = json.config('custom');

var htmlNs = 'http://www.w3.org/1999/xhtml';

// exclude <html:html/> elements

// Note: this is a little awkward because the JSON library is XQuery
// and requires an XDM sequence, not an Array
conf['ignore-element-names'] = json.arrayValues([
  fn.QName(htmlNs, 'html')
]);

var results = fn.subsequence(
  cts.search(cts.andQuery(null), 'format-xml'),
  1,
  10
);

var transformedResults = [];
var transformed = [];

for (var result of results) {
  // transformToJson() returns an object-node() wrapped in a document-node()
  // convert it to a regular JS object
  transformed = json.transformToJson(result, conf).toObject()

  transformed.escapedContent = xdmp.quote(
    result.xpath('.//html:html', { html: htmlNs}) 
  );

  transformedResults.push(transformed);
}

transformedResults
like image 52
joemfb Avatar answered Sep 23 '22 23:09

joemfb