how to get json results from xml document in marklogic

Question

I am trying to return an XML document in JSON format using a 2-tier server side javascript application. I know MarkLogic can easily switch between JSON and XML formats using the REST api.

However, the following returns xml:

cts.search('something', ['format-xml']) // options may be omitted

but this returns nothing:

cts.search('something', ['format-json'])

According to the documentation I think I have used the function correctly.

Is this possible using server side javascript?

Update:

Using transformToJsonObject as follows:

var json = require('/MarkLogic/json/json.xqy');
var doc = fn.doc('/content/rss/72eb9bf835521446be8d2176e1ac9d22.xml')
var jsonDoc = json.transformToJsonObject(doc, json.config('custom'));
jsonDoc

I get the following error:

500 Internal Server Error

XDMP-STACKOVERFLOW: Stack overflow
in /MarkLogic/json/custom.xqy, at 493:23,
in json-custom:is-ignore-attribute#2(json:object(<json:object xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:json="http://marklogic.com/xdmp/json"><json:entry key="element-qname-from-json-name"><json:value xsi:t...</json:object>...XDMP-ATOMIZEFUNC: (err:FOTY0013) Functions cannot be atomized...), fn:doc("/content/rss/72eb9bf835521446be8d2176e1ac9d22.xml")/newsitem/article/*:html/*:head/*:script[5]/*:script/*:script/*:script/*:link/*:script/*:script/*:script/*:script/*:script/*:script/*:style/*:script/*:script/*:script/*:script/*:script/*:script/*:script/*:script/*:script/*:script/*:link/*:script/*:script/*:noscript/*:link/*:meta/*:body/*:div/*:div/*:div/*:script/*:div/*:div/*:script/*:script/*:div/*:div/*:script/*:script/*:script/*:script/*:noscript/*:p/*:header/*:div/*:div/*:div/*:a/*:section/*:div/*:h2/*:ul/*:li/*:a/*:li/*:a/*:div/*:div/*:a/*:img/*:span/*:script/*:nav/*:div/*:h2/*:ul/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:li/*:a/*:span/*:div/*:a/*:form/*:div/*:label/*:div/*:script/*:div/*:div/*:div/*:div/*:div/*:a/*:svg/*:title/*:h2/*:a/*:div/*:button/*:div/*:ul/*:li/*:a/*:span/*:li/*:a/*:span/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:div/*:nav/*:a/*:span/*:span/*:ul/*:li/*:a/*:span/*:li/*:a/*:span/*:span/*:div/*:div/*:script/*:script/*:div/*:div/*:span/*:span/*:a/*:div/*:div/*:script/*:script/*:div/*:div/*:div/*:span/*:span/*:a/*:div/*:div/*:script/*:script/*:div/*:div/*:div/*:div/*:h1/*:div/*:ul/*:li/*:div/*:li/*:span/*:a/*:div/*:figure/*:span/*:img/*:span/*:span/*:figcaption/*:span/*:span/*:p/*:p/*:p/*:p/*:p/*:div/*:div/*:script/*:script/*:h2/*:p/*:p/*:figure/*:span/*:div/*:span/*:span/*:figcaption/*:span/*:span/*:p/*:p/*:p/*:p/*:p/*:p/*:p/*:p/*:div/*:a/*:h2/*:a/*:ul/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:li/*:a/*:span/*:div/*:div/*:div/*:h2/*:div/*:ul/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:li/*:a/*:div/*:div/*:div/*:span/*:div/*:div/*:div/*:div/*:div/*:h2/*:div/*:ul/*:li/*:div/*:div/*:a/*:div/*:span/*:p/*:div/*:div/*:div/*:div/*:script/*:script/*:div/*:div/*:script/*:script/*:div/*:div/*:script/*:script/*:div/*:div/*:h2/*:a/*:strong/*:p/*:div/*:a/*:strong/*:div/*:a/*:strong/*:div/*:div/*:div/*:script/*:script/*:div/*:div/*:h2/*:div/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:h3/*:p/*:div/*:a/*:div/*:div/*:div/*:div/*:div/*:span/*:span/*:span/*:h3/@class) [1.0-ml]
$config = json:object(<json:object xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:json="http://marklogic.com/xdmp/json"><json:entry key="element-qname-from-json-name"><json:value xsi:t...</json:object>...XDMP-ATOMIZEFUNC: (err:FOTY0013) Functions cannot be atomized...)
$a = fn:doc("/content/rss/72eb9bf835521446be8d2176e1ac9d22.xml")/newsitem/article/*:html/*:head/*:script[5]/*:script/*:script/*:script/*:link/*:script/*:script/*:script/*:script/*:script/*:script/*:style/*:script/*:script/*:script/*:script/*:scr

joemfb · Accepted Answer

The format-xml and format-json options to cts.search() filter the search results by those formats; they don't do any transformations (see the cts.search() options documentation).

There are many ways to transform XML into JSON using MarkLogic; the simplest is probably the json XQuery library, specifically json:transform-to-json-object(). You can use that library in server-side JS like this:

var json = require('/MarkLogic/json/json.xqy');

var doc = cts.doc('/triplestore/97a5ab126bddeea0.xml');
var jsonDoc = json.transformToJsonObject(doc, json.config('custom'));

You can use json.config() to configure and customize the transformation.

cts.search() returns an Iterator, so you'll need a for-of loop (or some kind of accumulator function) to get the actual XML documents, which you could then transform.

Update:

That error could be a bug in the JSON library, but that's a very deep HTML path; and I don't think it makes much sense to transform HTML elements into JSON object properties. Instead, we'll serialize the HTML, and add the string back to our JSON object.

Here's an example transformation of search results; showing how to unwrap the Iterator, customize the JSON transformations, serialize XHTML content for use within JSON, etc.

Notes:

this uses fn.subsequence to limit the Iterator to the first 10 results.
I'm serializing the excluded <html/> elements to a string (using the xpath() method of the Node object and xdmp.quote(), and adding that to the JSON object as escapedContent.

Here's the combined example; you can run this in MarkLogic QConsole:

var json = require('/MarkLogic/json/json.xqy');
var conf = json.config('custom');

var htmlNs = 'http://www.w3.org/1999/xhtml';

// exclude <html:html/> elements

// Note: this is a little awkward because the JSON library is XQuery
// and requires an XDM sequence, not an Array
conf['ignore-element-names'] = json.arrayValues([
  fn.QName(htmlNs, 'html')
]);

var results = fn.subsequence(
  cts.search(cts.andQuery(null), 'format-xml'),
  1,
  10
);

var transformedResults = [];
var transformed = [];

for (var result of results) {
  // transformToJson() returns an object-node() wrapped in a document-node()
  // convert it to a regular JS object
  transformed = json.transformToJson(result, conf).toObject()

  transformed.escapedContent = xdmp.quote(
    result.xpath('.//html:html', { html: htmlNs}) 
  );

  transformedResults.push(transformed);
}

transformedResults

how to get json results from xml document in marklogic

Tags:

marklogic

marklogic-8

chriskelly

1 Answers

joemfb

Recent Activity

Donate For Us

how to get json results from xml document in marklogic

Tags:

marklogic

marklogic-8

chriskelly

1 Answers

joemfb

Related questions

Recent Activity

Donate For Us