I have a JSON which come from spark:
val df = spark.read.parquet("hdfs://xxx-namespace/20190311")
val jsonStr = df.schema.json
jsonStr
is like this:
{
"type":"struct",
"fields":[
{
"name":"alm_dm_list",
"type":{
"type":"array",
"elementType":"integer",
"containsNull":true
},
"nullable":true,
"metadata":{
}
},
{
"name":"data_batt_sc_volt_lowest",
"type":"double",
"nullable":true,
"metadata":{
}
},
{
"name":"veh_dcdcst",
"type":"integer",
"nullable":true,
"metadata":{
}
},
{
"name":"esd_temp_data",
"type":{
"type":"array",
"elementType":{
"type":"struct",
"fields":[
{
"name":"esd_temp_probe_cnt",
"type":"integer",
"nullable":true,
"metadata":{
}
},
{
"name":"esd_temp_probe_list",
"type":{
"type":"array",
"elementType":"integer",
"containsNull":true
},
"nullable":true,
"metadata":{
}
},
{
"name":"esd_temp_subsys_seq",
"type":"integer",
"nullable":true,
"metadata":{
}
}
]
},
"containsNull":true
},
"nullable":true,
"metadata":{
}
},
{
"name":"esd_volt_data",
"type":{
"type":"array",
"elementType":{
"type":"struct",
"fields":[
{
"name":"esd_curr",
"type":"double",
"nullable":true,
"metadata":{
}
},
{
"name":"esd_frame_sc_cnt",
"type":"integer",
"nullable":true,
"metadata":{
}
},
{
"name":"esd_frame_sc_list",
"type":{
"type":"array",
"elementType":"double",
"containsNull":true
},
"nullable":true,
"metadata":{
}
},
{
"name":"esd_frame_start",
"type":"integer",
"nullable":true,
"metadata":{
}
},
{
"name":"esd_sc_cnt",
"type":"integer",
"nullable":true,
"metadata":{
}
},
{
"name":"esd_volt",
"type":"double",
"nullable":true,
"metadata":{
}
},
{
"name":"esd_volt_subsys_seq",
"type":"integer",
"nullable":true,
"metadata":{
}
}
]
},
"containsNull":true
},
"nullable":true,
"metadata":{
}
},
{
"name":"dm_data",
"type":{
"type":"array",
"elementType":{
"type":"struct",
"fields":[
{
"name":"dm_ctl_dc_curr",
"type":"double",
"nullable":true,
"metadata":{
}
},
{
"name":"dm_ctl_temp",
"type":"integer",
"nullable":true,
"metadata":{
}
},
{
"name":"dm_ctl_volt",
"type":"double",
"nullable":true,
"metadata":{
}
},
{
"name":"dm_seq",
"type":"integer",
"nullable":true,
"metadata":{
}
},
{
"name":"dm_spd",
"type":"integer",
"nullable":true,
"metadata":{
}
},
{
"name":"dm_st",
"type":"integer",
"nullable":true,
"metadata":{
}
},
{
"name":"dm_temp",
"type":"integer",
"nullable":true,
"metadata":{
}
},
{
"name":"dm_torq",
"type":"integer",
"nullable":true,
"metadata":{
}
}
]
},
"containsNull":true
},
"nullable":true,
"metadata":{
}
}]
}
I want to get a scheme based on the JSON string, so i need to parse it and get a struct like this:
val schema = new StructType()
.add("alm_dm_list", ArrayType(IntegerType, true), true)
.add("data_batt_sc_volt_lowest", DoubleType, true)
.add("veh_dcdcst", IntegerType, true)
.add("esd_temp_data", ArrayType(new StructType()
.add("esd_temp_probe_cnt", IntegerType, true)
.add("esd_temp_probe_list", ArrayType(IntegerType, true), true)
.add("esd_temp_subsys_seq", IntegerType, true)
), true)
.add("esd_volt_data", ArrayType(new StructType()
.add("esd_curr", DoubleType, true)
.add("esd_frame_sc_cnt", IntegerType, true)
.add("esd_frame_sc_list", ArrayType(DoubleType, true), true)
.add("esd_frame_start", IntegerType, true)
.add("esd_sc_cnt", IntegerType, true)
.add("esd_volt", DoubleType, true)
.add("esd_volt_subsys_seq", IntegerType, true)
), true)
.add("dm_data", ArrayType(new StructType()
.add("dm_ctl_dc_curr", DoubleType, true)
.add("dm_ctl_temp", IntegerType, true)
.add("dm_ctl_volt", DoubleType, true)
.add("dm_seq", IntegerType, true)
.add("dm_spd", IntegerType, true)
.add("dm_st", IntegerType, true)
.add("dm_temp", IntegerType, true)
.add("dm_torq", IntegerType, true)
), true)
JSON::Infer maybe helpful, but there are nested structures in my JSON, which is complex for me, any suggestions will help.
Use the JavaScript function JSON.parse() to convert text into a JavaScript object: const obj = JSON.parse('{"name":"John", "age":30, "city":"New York"}'); Make sure the text is in JSON format, or else you will get a syntax error.
Stringify a JavaScript Objectstringify() to convert it into a string. const myJSON = JSON. stringify(obj); The result will be a string following the JSON notation.
In case you do not care about identing (possible, but not so important, what about this ?) does not work in IE - guess template strings are only problem(?):
'use strict';
function run() {
buildRecursive(2);
res = res.substr(0, res.length-1) + ';';
console.log(res);
}
var typeRename = {
"array": "ArrayType",
"double": "DoubleType",
"integer": "IntegerType"
}, elementRename = {
"integer": "IntegerType",
"double": "DoubleType"
}
var res = "val schema = ";
function repeatIndent(no) {
var retVal = '';
while(no--) retVal += ' ';
return retVal;
}
function buildRow(indent, params) {
return repeatIndent(indent) + params.join('');
}
function buildRecursive(indent) {
var lev = it.ReadArray(), indentStep = 1;
if(lev.type == "struct") {
res += "new StructType()\n";
var under = lev.fields;
while(under && under.node) {
it.SetCurrent(under.node);
buildRecursive(indent + indentStep);
under = under.next;
}
} else if (lev.name) {
if(lev.type instanceof JNode) {
it.SetCurrent(lev.type.node);
var lev2 = it.ReadArray();
it.DepthFirst();
var elementType = it.FindKey("elementType");
if(elementType.value instanceof Object) {
res += buildRow(indent, ['.add("', lev.name, '", ', typeRename[lev2.type], '(']);
var here = it.Current;
it.SetCurrent(elementType.node);
buildRecursive(indent + indentStep);
it.SetCurrent(here);
res += buildRow(indent,['), ', lev.nullable, ')\n']);
} else res += buildRow(indent, ['.add("', lev.name, '", ', typeRename[lev2.type], '(',
elementRename[elementType.value], ', ', lev2.containsNull, '), ', lev.nullable, ')\n']);
} else {
res += buildRow(indent, ['.add("', lev.name, '", ', typeRename[lev.type], ', ', lev.nullable, ')\n']);
}
}
}
// My JSON iterator
var JNode = (function (jsNode) {
function JNode(_parent, _pred, _key, _value) {
this.parent = _parent;
this.pred = _pred;
this.node = null;
this.next = null;
this.key = _key;
this.value = _value;
}
return JNode;
})();
var JIterator = (function (json) {
var root, current, maxLevel = -1;
function JIterator(json, parent) {
if (parent === undefined) parent = null;
var pred = null, localCurrent;
for (var child in json) {
var obj = json[child] instanceof Object;
if(json instanceof Array) child = parseInt(child); // non-associative array
if (!root) root = localCurrent = new JNode(parent, null, child, json[child]);
else {
localCurrent = new JNode(parent, pred, child, obj ? ((json[child] instanceof Array) ? [] : {}) : json[child]);
}
if (pred) pred.next = localCurrent;
if (parent && parent.node == null) parent.node = localCurrent;
pred = localCurrent;
if (obj) {
var memPred = pred;
JIterator(json[child], pred);
pred = memPred;
}
}
if (this) {
current = root;
this.Level = 0;
}
}
JIterator.prototype.Current = function () { return current; }
JIterator.prototype.SetCurrent = function (newCurrent) { current = newCurrent; }
JIterator.prototype.Parent = function () {
var retVal = current.parent;
if (retVal == null) return false;
this.Level--;
return current = retVal;
}
JIterator.prototype.Pred = function () {
var retVal = current.pred;
if (retVal == null) return false;
return current = retVal;
}
JIterator.prototype.Node = function () {
var retVal = current.node;
if (retVal == null) return false;
this.Level++;
return current = retVal;
}
JIterator.prototype.Next = function () {
var retVal = current.next;
if (retVal == null) return false;
return current = retVal;
}
JIterator.prototype.Key = function () { return current.key; }
JIterator.prototype.KeyDots = function () { return (typeof(current.key) == "number")?"":(current.key+':'); }
JIterator.prototype.Value = function () { return current.value; }
JIterator.prototype.Reset = function () {
current = root;
this.Level = 0;
}
JIterator.prototype.RawPath = function () {
var steps = [], level = current;
do {
if (level != null && level.value instanceof Object) {
steps.push(level.key + (level.value instanceof Array ? "[]" : "{}"));
} else {
if (level != null) steps.push(level.key);
else break;
}
level = level.parent;
} while (level != null);
var retVal = "";
retVal = steps.reverse();
return retVal;
}
JIterator.prototype.Path = function () {
var steps = [], level = current;
do {
if (level != null && level.value instanceof Object) {
var size = 0;
var items = level.node;
if(typeof(level.key) == "number") steps.push('[' + level.key + ']');
else {
while(items) {
size++;
items = items.next;
}
var type = (level.value instanceof Array ? "[]" : "{}");
var prev = steps[steps.length-1];
if(prev && prev[0] == '[') {
var last = prev.length-1;
if(prev[last] == ']') {
last--;
if(!isNaN(prev.substr(1, last))) {
steps.pop();
size += '.' + prev.substr(1, last);
}
}
}
steps.push(level.key + type[0] + size + type[1]);
}
} else {
if (level != null) {
if(typeof(level.key) == "number") steps.push('[' + level.key + ']');
else steps.push(level.key);
}
else break;
}
level = level.parent;
} while (level != null);
var retVal = "";
retVal = steps.reverse();
return retVal;
}
JIterator.prototype.DepthFirst = function () {
if (current == null) return 0; // exit sign
if (current.node != null) {
current = current.node;
this.Level++;
if (maxLevel < this.Level) maxLevel = this.Level;
return 1; // moved down
} else if (current.next != null) {
current = current.next;
return 2; // moved right
} else {
while (current != null) {
if (current.next != null) {
current = current.next;
return 3; // returned up & moved next
}
this.Level--;
current = current.parent;
}
}
return 0; // exit sign
}
JIterator.prototype.BreadthFirst = function () {
if (current == null) return 0; // exit sign
if (current.next) {
current = current.next;
return 1; // moved right
} else if (current.parent) {
var level = this.Level, point = current;
while (this.DepthFirst() && level != this.Level);
if (current) return 2; // returned up & moved next
do {
this.Reset();
level++;
while (this.DepthFirst() && level != this.Level);
if (current) return 3; // returned up & moved next
} while (maxLevel >= level);
return current != null ? 3 : 0;
} else if (current.node) {
current = current.node;
return 3;
} else if (current.pred) {
while (current.pred) current = current.pred;
while (current && !current.node) current = current.next;
if (!current) return null;
else return this.DepthFirst();
}
}
JIterator.prototype.ReadArray = function () {
var retVal = {};
var item = current;
do {
if(item.value instanceof Object) {
if(item.value.length == 0) retVal[item.key] = item.node;
else retVal[item.key] = item;
} else retVal[item.key] = item.value;
item = item.next;
} while (item != null);
return retVal;
}
JIterator.prototype.FindKey = function (key) {
var pos = current;
while(current && current.key != key) this.DepthFirst();
if(current.key == key) {
var retVal = current;
current = pos;
return retVal;
} else {
current = pos;
return null;
}
}
return JIterator;
})();
var json = {
"type": "struct",
"fields": [
{
"name": "alm_dm_list",
"type": {
"type": "array",
"elementType": "integer",
"containsNull": true
},
"nullable": true,
"metadata": {}
},
{
"name": "data_batt_sc_volt_lowest",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "veh_dcdcst",
"type": "integer",
"nullable": true,
"metadata": {}
},
{
"name": "esd_temp_data",
"type": {
"type": "array",
"elementType": {
"type": "struct",
"fields": [
{
"name": "esd_temp_probe_cnt",
"type": "integer",
"nullable": true,
"metadata": {}
},
{
"name": "esd_temp_probe_list",
"type": {
"type": "array",
"elementType": "integer",
"containsNull": true
},
"nullable": true,
"metadata": {}
},
{
"name": "esd_temp_subsys_seq",
"type": "integer",
"nullable": true,
"metadata": {}
}
]
},
"containsNull": true
},
"nullable": true,
"metadata": {}
},
{
"name": "esd_volt_data",
"type": {
"type": "array",
"elementType": {
"type": "struct",
"fields": [
{
"name": "esd_curr",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "esd_frame_sc_cnt",
"type": "integer",
"nullable": true,
"metadata": {}
},
{
"name": "esd_frame_sc_list",
"type": {
"type": "array",
"elementType": "double",
"containsNull": true
},
"nullable": true,
"metadata": {}
},
{
"name": "esd_frame_start",
"type": "integer",
"nullable": true,
"metadata": {}
},
{
"name": "esd_sc_cnt",
"type": "integer",
"nullable": true,
"metadata": {}
},
{
"name": "esd_volt",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "esd_volt_subsys_seq",
"type": "integer",
"nullable": true,
"metadata": {}
}
]
},
"containsNull": true
},
"nullable": true,
"metadata": {}
},
{
"name": "dm_data",
"type": {
"type": "array",
"elementType": {
"type": "struct",
"fields": [
{
"name": "dm_ctl_dc_curr",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "dm_ctl_temp",
"type": "integer",
"nullable": true,
"metadata": {}
},
{
"name": "dm_ctl_volt",
"type": "double",
"nullable": true,
"metadata": {}
},
{
"name": "dm_seq",
"type": "integer",
"nullable": true,
"metadata": {}
},
{
"name": "dm_spd",
"type": "integer",
"nullable": true,
"metadata": {}
},
{
"name": "dm_st",
"type": "integer",
"nullable": true,
"metadata": {}
},
{
"name": "dm_temp",
"type": "integer",
"nullable": true,
"metadata": {}
},
{
"name": "dm_torq",
"type": "integer",
"nullable": true,
"metadata": {}
}
]
},
"containsNull": true
},
"nullable": true,
"metadata": {}
}
]
};
var it = new JIterator(json);
run();
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With