Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to parser JSON and add some custom strings?

Tags:

json

raku

I have a JSON which come from spark:

val df = spark.read.parquet("hdfs://xxx-namespace/20190311")
val jsonStr = df.schema.json

jsonStr is like this:

{
    "type":"struct",
    "fields":[
        {
            "name":"alm_dm_list",
            "type":{
                "type":"array",
                "elementType":"integer",
                "containsNull":true
            },
            "nullable":true,
            "metadata":{

            }
        },
        {
            "name":"data_batt_sc_volt_lowest",
            "type":"double",
            "nullable":true,
            "metadata":{

            }
        },
        {
            "name":"veh_dcdcst",
            "type":"integer",
            "nullable":true,
            "metadata":{

            }
        },
        {
            "name":"esd_temp_data",
            "type":{
                "type":"array",
                "elementType":{
                    "type":"struct",
                    "fields":[
                        {
                            "name":"esd_temp_probe_cnt",
                            "type":"integer",
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"esd_temp_probe_list",
                            "type":{
                                "type":"array",
                                "elementType":"integer",
                                "containsNull":true
                            },
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"esd_temp_subsys_seq",
                            "type":"integer",
                            "nullable":true,
                            "metadata":{

                            }
                        }
                    ]
                },
                "containsNull":true
            },
            "nullable":true,
            "metadata":{

            }
        },
        {
            "name":"esd_volt_data",
            "type":{
                "type":"array",
                "elementType":{
                    "type":"struct",
                    "fields":[
                        {
                            "name":"esd_curr",
                            "type":"double",
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"esd_frame_sc_cnt",
                            "type":"integer",
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"esd_frame_sc_list",
                            "type":{
                                "type":"array",
                                "elementType":"double",
                                "containsNull":true
                            },
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"esd_frame_start",
                            "type":"integer",
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"esd_sc_cnt",
                            "type":"integer",
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"esd_volt",
                            "type":"double",
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"esd_volt_subsys_seq",
                            "type":"integer",
                            "nullable":true,
                            "metadata":{

                            }
                        }
                    ]
                },
                "containsNull":true
            },
            "nullable":true,
            "metadata":{

            }
        },
        {
            "name":"dm_data",
            "type":{
                "type":"array",
                "elementType":{
                    "type":"struct",
                    "fields":[
                        {
                            "name":"dm_ctl_dc_curr",
                            "type":"double",
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"dm_ctl_temp",
                            "type":"integer",
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"dm_ctl_volt",
                            "type":"double",
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"dm_seq",
                            "type":"integer",
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"dm_spd",
                            "type":"integer",
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"dm_st",
                            "type":"integer",
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"dm_temp",
                            "type":"integer",
                            "nullable":true,
                            "metadata":{

                            }
                        },
                        {
                            "name":"dm_torq",
                            "type":"integer",
                            "nullable":true,
                            "metadata":{

                            }
                        }
                    ]
                },
                "containsNull":true
            },
            "nullable":true,
            "metadata":{

            }
        }]
}       

I want to get a scheme based on the JSON string, so i need to parse it and get a struct like this:

  val schema = new StructType()
    .add("alm_dm_list", ArrayType(IntegerType, true), true)
    .add("data_batt_sc_volt_lowest", DoubleType, true)
    .add("veh_dcdcst", IntegerType, true)
    .add("esd_temp_data", ArrayType(new StructType()
      .add("esd_temp_probe_cnt", IntegerType, true)
      .add("esd_temp_probe_list", ArrayType(IntegerType, true), true)
      .add("esd_temp_subsys_seq", IntegerType, true)
    ), true)
    .add("esd_volt_data", ArrayType(new StructType()
      .add("esd_curr", DoubleType, true)
      .add("esd_frame_sc_cnt", IntegerType, true)
      .add("esd_frame_sc_list", ArrayType(DoubleType, true), true)
      .add("esd_frame_start", IntegerType, true)
      .add("esd_sc_cnt", IntegerType, true)
      .add("esd_volt", DoubleType, true)
      .add("esd_volt_subsys_seq", IntegerType, true)
    ), true)
    .add("dm_data", ArrayType(new StructType()
      .add("dm_ctl_dc_curr", DoubleType, true)
      .add("dm_ctl_temp", IntegerType, true)
      .add("dm_ctl_volt", DoubleType, true)
      .add("dm_seq", IntegerType, true)
      .add("dm_spd", IntegerType, true)
      .add("dm_st", IntegerType, true)
      .add("dm_temp", IntegerType, true)
      .add("dm_torq", IntegerType, true)
    ), true)

JSON::Infer maybe helpful, but there are nested structures in my JSON, which is complex for me, any suggestions will help.

like image 742
chenyf Avatar asked Mar 11 '19 03:03

chenyf


People also ask

How do I parse a string in JSON?

Use the JavaScript function JSON.parse() to convert text into a JavaScript object: const obj = JSON.parse('{"name":"John", "age":30, "city":"New York"}'); Make sure the text is in JSON format, or else you will get a syntax error.

Can JSON object convert to string?

Stringify a JavaScript Objectstringify() to convert it into a string. const myJSON = JSON. stringify(obj); The result will be a string following the JSON notation.


1 Answers

In case you do not care about identing (possible, but not so important, what about this ?) does not work in IE - guess template strings are only problem(?):

'use strict';

function run() {
	buildRecursive(2);
	res = res.substr(0, res.length-1) + ';';
	console.log(res);
}

var typeRename = {
	"array": "ArrayType",
	"double": "DoubleType",
	"integer": "IntegerType"
}, elementRename = {
	"integer": "IntegerType",
	"double": "DoubleType"
}
var res = "val schema = ";

function repeatIndent(no) {
    var retVal = '';
    while(no--) retVal += ' ';
    return retVal;
}

function buildRow(indent, params) {
    return repeatIndent(indent) + params.join('');
}

function buildRecursive(indent) {
	var lev = it.ReadArray(), indentStep = 1;
	if(lev.type == "struct") {
		res += "new StructType()\n";
		var under = lev.fields;
		while(under && under.node) {
			it.SetCurrent(under.node);
			buildRecursive(indent + indentStep);
			under = under.next;
		}
	} else if (lev.name) {
		if(lev.type instanceof JNode) {
			it.SetCurrent(lev.type.node);
			var lev2 = it.ReadArray();
			it.DepthFirst();
			var elementType = it.FindKey("elementType");
			if(elementType.value instanceof Object) {
                res += buildRow(indent, ['.add("', lev.name, '", ', typeRename[lev2.type], '(']);
				var here = it.Current;
				it.SetCurrent(elementType.node);
				buildRecursive(indent + indentStep);
				it.SetCurrent(here);
				res += buildRow(indent,['), ', lev.nullable, ')\n']);
            } else res += buildRow(indent, ['.add("', lev.name, '", ', typeRename[lev2.type], '(',
                    elementRename[elementType.value], ', ', lev2.containsNull, '), ', lev.nullable, ')\n']);
		} else {
			res += buildRow(indent, ['.add("', lev.name, '", ', typeRename[lev.type], ', ', lev.nullable, ')\n']);
		}
	}
}

// My JSON iterator
var JNode = (function (jsNode) {
    
	function JNode(_parent, _pred, _key, _value) {
		this.parent = _parent;
		this.pred = _pred;
		this.node = null;
		this.next = null;
		this.key = _key;
		this.value = _value;
	}

	return JNode;
})();
var JIterator = (function (json) {
	var root, current, maxLevel = -1;

	function JIterator(json, parent) {
		if (parent === undefined) parent = null;
		var pred = null, localCurrent;
		for (var child in json) {
			var obj = json[child] instanceof Object;
			if(json instanceof Array) child = parseInt(child); // non-associative array
			if (!root) root = localCurrent = new JNode(parent, null, child, json[child]);
			else {
				localCurrent = new JNode(parent, pred, child, obj ? ((json[child] instanceof Array) ? [] : {}) : json[child]);
			}
			if (pred) pred.next = localCurrent;
			if (parent && parent.node == null) parent.node = localCurrent;
			pred = localCurrent;
			if (obj) {
				var memPred = pred;
				JIterator(json[child], pred);
				pred = memPred;
			}
		}
		if (this) {
			current = root;
			this.Level = 0;
		}
	}

	JIterator.prototype.Current = function () { return current; }
	JIterator.prototype.SetCurrent = function (newCurrent) { current = newCurrent; }
	JIterator.prototype.Parent = function () {
		var retVal = current.parent;
		if (retVal == null) return false;
		this.Level--;
		return current = retVal;
	}
	JIterator.prototype.Pred = function () {
		var retVal = current.pred;
		if (retVal == null) return false;
		return current = retVal;
	}
	JIterator.prototype.Node = function () {
		var retVal = current.node;
		if (retVal == null) return false;
		this.Level++;
		return current = retVal;
	}
	JIterator.prototype.Next = function () {
		var retVal = current.next;
		if (retVal == null) return false;
		return current = retVal;
	}
	JIterator.prototype.Key = function () { return current.key; }
	JIterator.prototype.KeyDots = function () { return (typeof(current.key) == "number")?"":(current.key+':'); }
	JIterator.prototype.Value = function () { return current.value; }
	JIterator.prototype.Reset = function () {
		current = root;
		this.Level = 0;
	}
	JIterator.prototype.RawPath = function () {
		var steps = [], level = current;
		do {
			if (level != null && level.value instanceof Object) {
				steps.push(level.key + (level.value instanceof Array ? "[]" : "{}"));
			} else {
				if (level != null) steps.push(level.key);
				else break;
			}
			level = level.parent;
		} while (level != null);
		var retVal = "";
		retVal = steps.reverse();
		return retVal;
	}
	JIterator.prototype.Path = function () {
		var steps = [], level = current;
		do {
			if (level != null && level.value instanceof Object) {
				var size = 0;
				var items = level.node;
				if(typeof(level.key) == "number") steps.push('[' + level.key + ']');
				else {
					while(items) {
						size++;
						items = items.next;
					}
					var type = (level.value instanceof Array ? "[]" : "{}");
					var prev = steps[steps.length-1];
					if(prev && prev[0] == '[') {
						var last = prev.length-1;
						if(prev[last] == ']') {
							last--;
							if(!isNaN(prev.substr(1, last))) {
								steps.pop();
								size += '.' + prev.substr(1, last);
							}
						}
					}
					steps.push(level.key + type[0] + size + type[1]);
				}
			} else {
				if (level != null) {
					if(typeof(level.key) == "number") steps.push('[' + level.key + ']');
					else steps.push(level.key);
				}
				else break;
			}
			level = level.parent;
		} while (level != null);
		var retVal = "";
		retVal = steps.reverse();
		return retVal;
	}
	JIterator.prototype.DepthFirst = function () {
		if (current == null) return 0; // exit sign
		if (current.node != null) {
			current = current.node;
			this.Level++;
			if (maxLevel < this.Level) maxLevel = this.Level;
			return 1; // moved down
		} else if (current.next != null) {
			current = current.next;
			return 2; // moved right
		} else {
			while (current != null) {
				if (current.next != null) {
					current = current.next;
					return 3; // returned up & moved next
				}
				this.Level--;
				current = current.parent;
			}
		}
		return 0; // exit sign
	}
	JIterator.prototype.BreadthFirst = function () {
		if (current == null) return 0; // exit sign
		if (current.next) {
			current = current.next;
			return 1; // moved right
		} else if (current.parent) {
			var level = this.Level, point = current;
			while (this.DepthFirst() && level != this.Level);
			if (current) return 2; // returned up & moved next
			do {
				this.Reset();
				level++;
				while (this.DepthFirst() && level != this.Level);
				if (current) return 3; // returned up & moved next
			} while (maxLevel >= level);
			return current != null ? 3 : 0;
		} else if (current.node) {
			current = current.node;
			return 3;
		} else if (current.pred) {
			while (current.pred) current = current.pred;
			while (current && !current.node) current = current.next;
			if (!current) return null;
			else return this.DepthFirst();
		}
	}
	JIterator.prototype.ReadArray = function () {
		var retVal = {};
		var item = current;
		do {
			if(item.value instanceof Object) {
				if(item.value.length == 0) retVal[item.key] = item.node;
				else retVal[item.key] = item;
			} else retVal[item.key] = item.value;
			item = item.next;
		} while (item != null);
		return retVal;
	}
	JIterator.prototype.FindKey = function (key) {
		var pos = current;
		while(current && current.key != key) this.DepthFirst();
		if(current.key == key) {
			var retVal = current;
			current = pos;
			return retVal;
		} else {
			current = pos;
			return null;
		}
	}

	return JIterator;
})();

var json = {
	"type": "struct",
	"fields": [
		{
			"name": "alm_dm_list",
			"type": {
				"type": "array",
				"elementType": "integer",
				"containsNull": true
			},
			"nullable": true,
			"metadata": {}
		},
		{
			"name": "data_batt_sc_volt_lowest",
			"type": "double",
			"nullable": true,
			"metadata": {}
		},
		{
			"name": "veh_dcdcst",
			"type": "integer",
			"nullable": true,
			"metadata": {}
		},
		{
			"name": "esd_temp_data",
			"type": {
				"type": "array",
				"elementType": {
					"type": "struct",
					"fields": [
						{
							"name": "esd_temp_probe_cnt",
							"type": "integer",
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "esd_temp_probe_list",
							"type": {
								"type": "array",
								"elementType": "integer",
								"containsNull": true
							},
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "esd_temp_subsys_seq",
							"type": "integer",
							"nullable": true,
							"metadata": {}
						}
					]
				},
				"containsNull": true
			},
			"nullable": true,
			"metadata": {}
		},
		{
			"name": "esd_volt_data",
			"type": {
				"type": "array",
				"elementType": {
					"type": "struct",
					"fields": [
						{
							"name": "esd_curr",
							"type": "double",
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "esd_frame_sc_cnt",
							"type": "integer",
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "esd_frame_sc_list",
							"type": {
								"type": "array",
								"elementType": "double",
								"containsNull": true
							},
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "esd_frame_start",
							"type": "integer",
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "esd_sc_cnt",
							"type": "integer",
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "esd_volt",
							"type": "double",
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "esd_volt_subsys_seq",
							"type": "integer",
							"nullable": true,
							"metadata": {}
						}
					]
				},
				"containsNull": true
			},
			"nullable": true,
			"metadata": {}
		},
		{
			"name": "dm_data",
			"type": {
				"type": "array",
				"elementType": {
					"type": "struct",
					"fields": [
						{
							"name": "dm_ctl_dc_curr",
							"type": "double",
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "dm_ctl_temp",
							"type": "integer",
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "dm_ctl_volt",
							"type": "double",
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "dm_seq",
							"type": "integer",
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "dm_spd",
							"type": "integer",
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "dm_st",
							"type": "integer",
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "dm_temp",
							"type": "integer",
							"nullable": true,
							"metadata": {}
						},
						{
							"name": "dm_torq",
							"type": "integer",
							"nullable": true,
							"metadata": {}
						}
					]
				},
				"containsNull": true
			},
			"nullable": true,
			"metadata": {}
		}
	]
};

var it = new JIterator(json);
run();
like image 107
Tom Avatar answered Oct 19 '22 18:10

Tom