I am trying to combine multiple JSON files in a Ubuntu platform. As an example, the data from two files are as follows:
File_1
{
"artist":"Gob",
"timestamp":"2011-08-09 01:59:41.352247",
"similars":[
[
"TRTOVWD128F92F4227",
1
],
[
"TRUXNUD128F92F41D0",
0.97294099999999994
],
[
"TRNNOJO128F42992E9",
0.073926900000000004
],
[
"TRGZHTT128F423B2A4",
0.068387699999999996
],
[
"TRGYKYD128F42625F6",
0.065579700000000005
],
[
"TRGIWHY128F42625F5",
0.064063700000000001
],
[
"TRJCJTX128F930CACE",
0.063140100000000005
],
[
"TRMYNWT128F426254B",
0.0613825
],
[
"TRRQOJI128F428C865",
0.061121599999999998
],
[
"TRBNYHM128F428A569",
0.061121599999999998
],
[
"TRDLOYE128F4241E72",
0.060951900000000003
],
[
"TRNRVEW12903CBA24F",
0.060332700000000003
],
[
"TRKKIPG12903CBA083",
0.060155
],
[
"TRZHTGP128F428A63B",
0.059873599999999999
],
[
"TRKQSGZ128F428A851",
0.059873599999999999
],
[
"TRTOPDF128F42AD88A",
0.059687799999999999
],
[
"TRIWOPM128F4241E53",
0.058958900000000002
],
[
"TRCCJUW128F14652DB",
0.057935
],
[
"TRERDDF128F428ECC4",
0.057566600000000002
],
[
"TROKWNN128F421A3D8",
0.057379800000000002
],
[
"TRWGOOK128F42AE765",
0.057125000000000002
],
[
"TRFMNKP128F428ADC0",
0.056875099999999998
],
[
"TRDMLZT128F42A01A8",
0.055808900000000002
],
[
"TRGCJVM128E0780E48",
0.0547389
],
[
"TRRXGAY128F14652D7",
0.0538065
],
[
"TRIPEHH128F1462DFF",
0.052843000000000001
],
[
"TRDUOIP128F147D5A7",
0.051851500000000002
],
[
"TRZCHHD12903CC80A1",
0.051251699999999997
],
[
"TRFDDQS128F426243F",
0.051018300000000003
],
[
"TRZDKAR128F42591B8",
0.050740899999999999
],
[
"TRDVXUG128F1456CBF",
0.050486299999999998
],
[
"TRULRYN128F145FC1C",
0.050219800000000002
],
[
"TRMOWIA128F425CE0F",
0.049977500000000001
],
[
"TRUVPMZ128F42B6DF3",
0.049762000000000001
],
[
"TRSBDWW128F4262666",
0.049643699999999999
],
[
"TRKPHWQ128F4264F8C",
0.0495173
],
[
"TRBBLXU128F42623A1",
0.049416700000000001
],
[
"TRJKLLM128F1456C57",
0.049001599999999999
],
[
"TRSAAEI128F4216C24",
0.048813500000000003
],
[
"TRFXICT128F4264F8A",
0.048776199999999999
],
[
"TRINVLH12903CBE5A1",
0.048334500000000002
],
[
"TRMUUJR128F4262475",
0.048306500000000002
],
[
"TRTORTD128F1456AFA",
0.0468265
],
[
"TRECUJO12903CA7120",
0.046065599999999998
],
[
"TRXIRBQ128F93431BB",
0.0456938
],
[
"TRFDDVK128F42B6DF0",
0.045623799999999999
],
[
"TRSRGPM128F421A30B",
0.043976800000000003
],
[
"TRVUPPR128F429507D",
0.042872500000000001
],
[
"TRMHCZC128F428A4CD",
0.040675200000000002
],
[
"TRUFDRV128F4262352",
0.040675200000000002
],
[
"TRUZZHT128F93229AF",
0.039422199999999998
],
[
"TRLSIHL128F429AF18",
0.039002099999999998
],
[
"TRGETCK128F1460DB1",
0.038499499999999999
],
[
"TRSXXNU128F428AEF2",
0.038303799999999999
],
[
"TRFZXSY128F9330D9F",
0.037855199999999999
],
[
"TRPHFYF128F92F27FA",
0.037772100000000003
],
[
"TRNRHSL128F9337B55",
0.036998000000000003
],
[
"TRPTGNZ128F421A56B",
0.036713099999999999
],
[
"TRPAASI128F9337B6E",
0.036410499999999998
],
[
"TRGCROO128F93431C4",
0.035754300000000003
],
[
"TRCUHZL128F4235446",
0.034968699999999998
],
[
"TRDPOTJ128F429AF0C",
0.034860500000000003
],
[
"TROZUXM128F42790A2",
0.0346483
],
[
"TRJVLOQ128F9345A82",
0.034547799999999997
],
[
"TRQTFRP128F145FC1E",
0.033934600000000002
],
[
"TRQEWHR128F421A3F5",
0.032314599999999999
],
[
"TRNTPJA128F4265039",
0.030702900000000002
],
[
"TRDGXWY12903CF52BD",
0.030292300000000001
],
[
"TRBLEMZ128F93102D0",
0.029224300000000002
],
[
"TRBUUYO128F421A405",
0.028448500000000002
],
[
"TREVBDI12903CED7E6",
0.0279674
],
[
"TRKREBF128F429B317",
0.0258321
],
[
"TRZBYPR128F4233A8D",
0.025655000000000001
],
[
"TRTAZUQ12903CFEA78",
0.024545399999999998
],
[
"TRAIPRO128F429AE69",
0.024304699999999999
],
[
"TRTTVUZ128F92FADD3",
0.023320899999999999
],
[
"TRUYEJI128F4265041",
0.022173700000000001
],
[
"TRAXVGT128F9344507",
0.0213992
],
[
"TRJJBLH128F4260DA1",
0.0175365
],
[
"TRAMCWR128F4233F7F",
0.0161158
],
[
"TRXBLME128F424330F",
0.015760900000000001
],
[
"TRMUQXM128F4260D99",
0.015696000000000002
],
[
"TRHRZBJ128EF345514",
0.0156951
],
[
"TRJXIBT128F42454DB",
0.014519199999999999
],
[
"TRTHPOY128F9345AA5",
0.0137264
],
[
"TRRFGJU128F933B2E6",
0.0012336199999999999
],
[
"TRMYJUA128F428A590",
0.00123149
],
[
"TRNMVTE128F933B2EC",
0.00122703
],
[
"TRYALZM128F1483C7D",
0.0012245299999999999
],
[
"TRZVEJU128F4234F4E",
0.00121805
],
[
"TRQAZDO128F145639F",
0.0012166600000000001
],
[
"TRJXNJM12903CF57ED",
0.0012155
],
[
"TRVAOGO128F427C9D6",
0.00120951
],
[
"TRZMZDS128F422843B",
0.0012065000000000001
],
[
"TRXIEOF12903CE8212",
0.0012058699999999999
],
[
"TRPVVUG128F42A36AA",
0.0012057599999999999
],
[
"TRXGVXS128F428AA5C",
0.0012019400000000001
],
[
"TRUBOGF128E078A5B9",
0.0012017900000000001
],
[
"TRITZSB128F4277CC2",
0.0012014
],
[
"TRGHPHX128F9343544",
0.0011975600000000001
],
[
"TRUKWPE128F428114F",
0.00119666
],
[
"TROBGRB128F93229AB",
0.0011964199999999999
],
[
"TRGKTMW12903CFAE65",
0.00119637
]
],
"tags":[
[
"punk rock",
"100"
],
[
"punk",
"60"
]
],
"track_id":"TRAAAFD128F92F423A",
"title":"Face the Ashes"
}
File_2
{
"artist":"CLP",
"timestamp":"2011-08-02 06:36:59.879759",
"similars":[
],
"tags":[
],
"track_id":"TRAAAVG12903CFA543",
"title":"Insatiable (Instrumental Version)"
}
I wrote a Python script to combine them. I added a new line and a comma after each record.
import glob
read_files = glob.glob("*.json")
with open("merged_file.json", "wb") as outfile:
for f in read_files:
with open(f, "rb") as infile:
outfile.write(infile.read())
outfile.write(',\n')
The output from the merge file is:
{
"artist":"Gob",
"timestamp":"2011-08-09 01:59:41.352247",
"similars":[
[
"TRTOVWD128F92F4227",
1
],
[
"TRUXNUD128F92F41D0",
0.97294099999999994
],
[
"TRNNOJO128F42992E9",
0.073926900000000004
],
[
"TRGZHTT128F423B2A4",
0.068387699999999996
],
[
"TRGYKYD128F42625F6",
0.065579700000000005
],
[
"TRGIWHY128F42625F5",
0.064063700000000001
],
[
"TRJCJTX128F930CACE",
0.063140100000000005
],
[
"TRMYNWT128F426254B",
0.0613825
],
[
"TRRQOJI128F428C865",
0.061121599999999998
],
[
"TRBNYHM128F428A569",
0.061121599999999998
],
[
"TRDLOYE128F4241E72",
0.060951900000000003
],
[
"TRNRVEW12903CBA24F",
0.060332700000000003
],
[
"TRKKIPG12903CBA083",
0.060155
],
[
"TRZHTGP128F428A63B",
0.059873599999999999
],
[
"TRKQSGZ128F428A851",
0.059873599999999999
],
[
"TRTOPDF128F42AD88A",
0.059687799999999999
],
[
"TRIWOPM128F4241E53",
0.058958900000000002
],
[
"TRCCJUW128F14652DB",
0.057935
],
[
"TRERDDF128F428ECC4",
0.057566600000000002
],
[
"TROKWNN128F421A3D8",
0.057379800000000002
],
[
"TRWGOOK128F42AE765",
0.057125000000000002
],
[
"TRFMNKP128F428ADC0",
0.056875099999999998
],
[
"TRDMLZT128F42A01A8",
0.055808900000000002
],
[
"TRGCJVM128E0780E48",
0.0547389
],
[
"TRRXGAY128F14652D7",
0.0538065
],
[
"TRIPEHH128F1462DFF",
0.052843000000000001
],
[
"TRDUOIP128F147D5A7",
0.051851500000000002
],
[
"TRZCHHD12903CC80A1",
0.051251699999999997
],
[
"TRFDDQS128F426243F",
0.051018300000000003
],
[
"TRZDKAR128F42591B8",
0.050740899999999999
],
[
"TRDVXUG128F1456CBF",
0.050486299999999998
],
[
"TRULRYN128F145FC1C",
0.050219800000000002
],
[
"TRMOWIA128F425CE0F",
0.049977500000000001
],
[
"TRUVPMZ128F42B6DF3",
0.049762000000000001
],
[
"TRSBDWW128F4262666",
0.049643699999999999
],
[
"TRKPHWQ128F4264F8C",
0.0495173
],
[
"TRBBLXU128F42623A1",
0.049416700000000001
],
[
"TRJKLLM128F1456C57",
0.049001599999999999
],
[
"TRSAAEI128F4216C24",
0.048813500000000003
],
[
"TRFXICT128F4264F8A",
0.048776199999999999
],
[
"TRINVLH12903CBE5A1",
0.048334500000000002
],
[
"TRMUUJR128F4262475",
0.048306500000000002
],
[
"TRTORTD128F1456AFA",
0.0468265
],
[
"TRECUJO12903CA7120",
0.046065599999999998
],
[
"TRXIRBQ128F93431BB",
0.0456938
],
[
"TRFDDVK128F42B6DF0",
0.045623799999999999
],
[
"TRSRGPM128F421A30B",
0.043976800000000003
],
[
"TRVUPPR128F429507D",
0.042872500000000001
],
[
"TRMHCZC128F428A4CD",
0.040675200000000002
],
[
"TRUFDRV128F4262352",
0.040675200000000002
],
[
"TRUZZHT128F93229AF",
0.039422199999999998
],
[
"TRLSIHL128F429AF18",
0.039002099999999998
],
[
"TRGETCK128F1460DB1",
0.038499499999999999
],
[
"TRSXXNU128F428AEF2",
0.038303799999999999
],
[
"TRFZXSY128F9330D9F",
0.037855199999999999
],
[
"TRPHFYF128F92F27FA",
0.037772100000000003
],
[
"TRNRHSL128F9337B55",
0.036998000000000003
],
[
"TRPTGNZ128F421A56B",
0.036713099999999999
],
[
"TRPAASI128F9337B6E",
0.036410499999999998
],
[
"TRGCROO128F93431C4",
0.035754300000000003
],
[
"TRCUHZL128F4235446",
0.034968699999999998
],
[
"TRDPOTJ128F429AF0C",
0.034860500000000003
],
[
"TROZUXM128F42790A2",
0.0346483
],
[
"TRJVLOQ128F9345A82",
0.034547799999999997
],
[
"TRQTFRP128F145FC1E",
0.033934600000000002
],
[
"TRQEWHR128F421A3F5",
0.032314599999999999
],
[
"TRNTPJA128F4265039",
0.030702900000000002
],
[
"TRDGXWY12903CF52BD",
0.030292300000000001
],
[
"TRBLEMZ128F93102D0",
0.029224300000000002
],
[
"TRBUUYO128F421A405",
0.028448500000000002
],
[
"TREVBDI12903CED7E6",
0.0279674
],
[
"TRKREBF128F429B317",
0.0258321
],
[
"TRZBYPR128F4233A8D",
0.025655000000000001
],
[
"TRTAZUQ12903CFEA78",
0.024545399999999998
],
[
"TRAIPRO128F429AE69",
0.024304699999999999
],
[
"TRTTVUZ128F92FADD3",
0.023320899999999999
],
[
"TRUYEJI128F4265041",
0.022173700000000001
],
[
"TRAXVGT128F9344507",
0.0213992
],
[
"TRJJBLH128F4260DA1",
0.0175365
],
[
"TRAMCWR128F4233F7F",
0.0161158
],
[
"TRXBLME128F424330F",
0.015760900000000001
],
[
"TRMUQXM128F4260D99",
0.015696000000000002
],
[
"TRHRZBJ128EF345514",
0.0156951
],
[
"TRJXIBT128F42454DB",
0.014519199999999999
],
[
"TRTHPOY128F9345AA5",
0.0137264
],
[
"TRRFGJU128F933B2E6",
0.0012336199999999999
],
[
"TRMYJUA128F428A590",
0.00123149
],
[
"TRNMVTE128F933B2EC",
0.00122703
],
[
"TRYALZM128F1483C7D",
0.0012245299999999999
],
[
"TRZVEJU128F4234F4E",
0.00121805
],
[
"TRQAZDO128F145639F",
0.0012166600000000001
],
[
"TRJXNJM12903CF57ED",
0.0012155
],
[
"TRVAOGO128F427C9D6",
0.00120951
],
[
"TRZMZDS128F422843B",
0.0012065000000000001
],
[
"TRXIEOF12903CE8212",
0.0012058699999999999
],
[
"TRPVVUG128F42A36AA",
0.0012057599999999999
],
[
"TRXGVXS128F428AA5C",
0.0012019400000000001
],
[
"TRUBOGF128E078A5B9",
0.0012017900000000001
],
[
"TRITZSB128F4277CC2",
0.0012014
],
[
"TRGHPHX128F9343544",
0.0011975600000000001
],
[
"TRUKWPE128F428114F",
0.00119666
],
[
"TROBGRB128F93229AB",
0.0011964199999999999
],
[
"TRGKTMW12903CFAE65",
0.00119637
]
],
"tags":[
[
"punk rock",
"100"
],
[
"punk",
"60"
]
],
"track_id":"TRAAAFD128F92F423A",
"title":"Face the Ashes"
},
{
"artist":"CLP",
"timestamp":"2011-08-02 06:36:59.879759",
"similars":[
],
"tags":[
],
"track_id":"TRAAAVG12903CFA543",
"title":"Insatiable (Instrumental Version)"
}
When I validate these records using JSON Lint (http://jsonlint.com/), it tells me that the file is broken and not a valid JSON. even after spending quite sometime, I am not able to figure out what is going wrong with the merging. It would be helpful is anyone has any thoughts on this.
There are a couple of reasons to merge JSON files:To combine the information in two JSON files – simple JSON Data Merge.
You can't just concatenate two JSON strings to make valid JSON (or combine them by tacking ',\n'
to the end of each).
Instead, you could combine the two (as Python objects) into a Python list, then use json.dump
to write it to a file as JSON:
import json
import glob
result = []
for f in glob.glob("*.json"):
with open(f, "rb") as infile:
result.append(json.load(infile))
with open("merged_file.json", "wb") as outfile:
json.dump(result, outfile)
If you wanted to do it without the (unnecesssary) intermediate step of parsing each JSON file, you could merge them into a list like this:
import glob
read_files = glob.glob("*.json")
with open("merged_file.json", "wb") as outfile:
outfile.write('[{}]'.format(
','.join([open(f, "rb").read() for f in read_files])))
There is a module called jsonmerge
which merges dictionaries. It can be used very simple by just providing two dictionaries or you can define schema's that described how to merge, like instead of overwriting same key's, automatically create a list and append to it.
base = {
"foo": 1,
"bar": [ "one" ],
}
head = {
"bar": [ "two" ],
"baz": "Hello, world!"
}
from jsonmerge import merge
result = merge(base, head)
print(result)
>>> {'foo': 1, 'bar': ['two'], 'baz': 'Hello, world!'}
More examples with complex rules: https://pypi.org/project/jsonmerge/#description
If you wanted to produce a JSON list of those objects, you are missing opening [
and closing ]
brackets here, and will have written one comma too many.
It'll be easier to have Python decode the objects, then write the output as a new JSON list:
import json
import glob
read_files = glob.glob("*.json")
output_list = []
for f in read_files:
with open(f, "rb") as infile:
output_list.append(json.load(infile))
with open("merged_file.json", "wb") as outfile:
json.dump(output_list, outfile)
Python solution:
You have file1.json and file2.json files.
Each file has structure:
[{"key1": "value1"}] - (in file1)
[{"key2": "value2"}] - (in file2)
And your goal to merge them and get next view:
[{"key1": "value1"},
{"key2": "value2"}]
Code:
import glob
glob_data = []
for file in glob.glob('../../file*.json'):
with open(file) as json_file:
data = json.load(json_file)
i = 0
while i < len(data):
glob_data.append(data[i])
i += 1
with open('../../finalFile.json', 'w') as f:
json.dump(glob_data, f, indent=4)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With