Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Convert Pandas Dataframe to nested JSON

I am new to Python and Pandas. I am trying to convert a Pandas Dataframe to a nested JSON. The function .to_json() doens't give me enough flexibility for my aim.

Here are some data points of the dataframe (in csv, comma separated):

,ID,Location,Country,Latitude,Longitude,timestamp,tide   0,1,BREST,FRA,48.383,-4.495,1807-01-01,6905.0   1,1,BREST,FRA,48.383,-4.495,1807-02-01,6931.0   2,1,BREST,FRA,48.383,-4.495,1807-03-01,6896.0   3,1,BREST,FRA,48.383,-4.495,1807-04-01,6953.0   4,1,BREST,FRA,48.383,-4.495,1807-05-01,7043.0   2508,7,CUXHAVEN 2,DEU,53.867,8.717,1843-01-01,7093.0   2509,7,CUXHAVEN 2,DEU,53.867,8.717,1843-02-01,6688.0   2510,7,CUXHAVEN 2,DEU,53.867,8.717,1843-03-01,6493.0   2511,7,CUXHAVEN 2,DEU,53.867,8.717,1843-04-01,6723.0   2512,7,CUXHAVEN 2,DEU,53.867,8.717,1843-05-01,6533.0   4525,9,MAASSLUIS,NLD,51.918,4.25,1848-02-01,6880.0   4526,9,MAASSLUIS,NLD,51.918,4.25,1848-03-01,6700.0   4527,9,MAASSLUIS,NLD,51.918,4.25,1848-04-01,6775.0   4528,9,MAASSLUIS,NLD,51.918,4.25,1848-05-01,6580.0   4529,9,MAASSLUIS,NLD,51.918,4.25,1848-06-01,6685.0   6540,8,WISMAR 2,DEU,53.898999999999994,11.458,1848-07-01,6957.0   6541,8,WISMAR 2,DEU,53.898999999999994,11.458,1848-08-01,6944.0   6542,8,WISMAR 2,DEU,53.898999999999994,11.458,1848-09-01,7084.0   6543,8,WISMAR 2,DEU,53.898999999999994,11.458,1848-10-01,6898.0   6544,8,WISMAR 2,DEU,53.898999999999994,11.458,1848-11-01,6859.0   8538,10,SAN FRANCISCO,USA,37.806999999999995,-122.465,1854-07-01,6909.0   8539,10,SAN FRANCISCO,USA,37.806999999999995,-122.465,1854-08-01,6940.0   8540,10,SAN FRANCISCO,USA,37.806999999999995,-122.465,1854-09-01,6961.0   8541,10,SAN FRANCISCO,USA,37.806999999999995,-122.465,1854-10-01,6952.0   8542,10,SAN FRANCISCO,USA,37.806999999999995,-122.465,1854-11-01,6952.0   

There is a lot of repetitive information and I would like to have a JSON like this:

[ {     "ID": 1,     "Location": "BREST",     "Latitude": 48.383,     "Longitude": -4.495,     "Country": "FRA",     "Tide-Data": {         "1807-02-01": 6931,         "1807-03-01": 6896,         "1807-04-01": 6953,         "1807-05-01": 7043     } }, {     "ID": 5,     "Location": "HOLYHEAD",     "Latitude": 53.31399999999999,     "Longitude": -4.62,     "Country": "GBR",     "Tide-Data": {         "1807-02-01": 6931,         "1807-03-01": 6896,         "1807-04-01": 6953,         "1807-05-01": 7043     } } ] 

How could I achieve this?

EDIT:

Code to reproduce the dataframe:

# input json json_str = '[{"ID":1,"Location":"BREST","Country":"FRA","Latitude":48.383,"Longitude":-4.495,"timestamp":"1807-01-01","tide":6905},{"ID":1,"Location":"BREST","Country":"FRA","Latitude":48.383,"Longitude":-4.495,"timestamp":"1807-02-01","tide":6931},{"ID":1,"Location":"BREST","Country":"DEU","Latitude":48.383,"Longitude":-4.495,"timestamp":"1807-03-01","tide":6896},{"ID":7,"Location":"CUXHAVEN 2","Country":"DEU","Latitude":53.867,"Longitude":-8.717,"timestamp":"1843-01-01","tide":7093},{"ID":7,"Location":"CUXHAVEN 2","Country":"DEU","Latitude":53.867,"Longitude":-8.717,"timestamp":"1843-02-01","tide":6688},{"ID":7,"Location":"CUXHAVEN 2","Country":"DEU","Latitude":53.867,"Longitude":-8.717,"timestamp":"1843-03-01","tide":6493}]'  # load json object data_list = json.loads(json_str)  # create dataframe df = json_normalize(data_list, None, None) 
like image 518
Felix Avatar asked Nov 07 '16 17:11

Felix


1 Answers

UPDATE:

j = (df.groupby(['ID','Location','Country','Latitude','Longitude'])        .apply(lambda x: x[['timestamp','tide']].to_dict('records'))        .reset_index()        .rename(columns={0:'Tide-Data'})        .to_json(orient='records'))       

Result (formatted):

In [103]: print(json.dumps(json.loads(j), indent=2, sort_keys=True)) [   {     "Country": "FRA",     "ID": 1,     "Latitude": 48.383,     "Location": "BREST",     "Longitude": -4.495,     "Tide-Data": [       {         "tide": 6905.0,         "timestamp": "1807-01-01"       },       {         "tide": 6931.0,         "timestamp": "1807-02-01"       },       {         "tide": 6896.0,         "timestamp": "1807-03-01"       },       {         "tide": 6953.0,         "timestamp": "1807-04-01"       },       {         "tide": 7043.0,         "timestamp": "1807-05-01"       }     ]   },   {     "Country": "DEU",     "ID": 7,     "Latitude": 53.867,     "Location": "CUXHAVEN 2",     "Longitude": 8.717,     "Tide-Data": [       {         "tide": 7093.0,         "timestamp": "1843-01-01"       },       {         "tide": 6688.0,         "timestamp": "1843-02-01"       },       {         "tide": 6493.0,         "timestamp": "1843-03-01"       },       {         "tide": 6723.0,         "timestamp": "1843-04-01"       },       {         "tide": 6533.0,         "timestamp": "1843-05-01"       }     ]   },   {     "Country": "DEU",     "ID": 8,     "Latitude": 53.899,     "Location": "WISMAR 2",     "Longitude": 11.458,     "Tide-Data": [       {         "tide": 6957.0,         "timestamp": "1848-07-01"       },       {         "tide": 6944.0,         "timestamp": "1848-08-01"       },       {         "tide": 7084.0,         "timestamp": "1848-09-01"       },       {         "tide": 6898.0,         "timestamp": "1848-10-01"       },       {         "tide": 6859.0,         "timestamp": "1848-11-01"       }     ]   },   {     "Country": "NLD",     "ID": 9,     "Latitude": 51.918,     "Location": "MAASSLUIS",     "Longitude": 4.25,     "Tide-Data": [       {         "tide": 6880.0,         "timestamp": "1848-02-01"       },       {         "tide": 6700.0,         "timestamp": "1848-03-01"       },       {         "tide": 6775.0,         "timestamp": "1848-04-01"       },       {         "tide": 6580.0,         "timestamp": "1848-05-01"       },       {         "tide": 6685.0,         "timestamp": "1848-06-01"       }     ]   },   {     "Country": "USA",     "ID": 10,     "Latitude": 37.807,     "Location": "SAN FRANCISCO",     "Longitude": -122.465,     "Tide-Data": [       {         "tide": 6909.0,         "timestamp": "1854-07-01"       },       {         "tide": 6940.0,         "timestamp": "1854-08-01"       },       {         "tide": 6961.0,         "timestamp": "1854-09-01"       },       {         "tide": 6952.0,         "timestamp": "1854-10-01"       },       {         "tide": 6952.0,         "timestamp": "1854-11-01"       }     ]   } ] 

OLD answer:

You can do it using groupby(), apply() and to_json() methods:

j = (df.groupby(['ID','Location','Country','Latitude','Longitude'], as_index=False)        .apply(lambda x: dict(zip(x.timestamp,x.tide)))        .reset_index()        .rename(columns={0:'Tide-Data'})        .to_json(orient='records')) 

Output:

In [112]: print(json.dumps(json.loads(j), indent=2, sort_keys=True)) [   {     "Country": "FRA",     "ID": 1,     "Latitude": 48.383,     "Location": "BREST",     "Longitude": -4.495,     "Tide-Data": {       "1807-01-01": 6905.0,       "1807-02-01": 6931.0,       "1807-03-01": 6896.0,       "1807-04-01": 6953.0,       "1807-05-01": 7043.0     }   },   {     "Country": "DEU",     "ID": 7,     "Latitude": 53.867,     "Location": "CUXHAVEN 2",     "Longitude": 8.717,     "Tide-Data": {       "1843-01-01": 7093.0,       "1843-02-01": 6688.0,       "1843-03-01": 6493.0,       "1843-04-01": 6723.0,       "1843-05-01": 6533.0     }   },   {     "Country": "DEU",     "ID": 8,     "Latitude": 53.899,     "Location": "WISMAR 2",     "Longitude": 11.458,     "Tide-Data": {       "1848-07-01": 6957.0,       "1848-08-01": 6944.0,       "1848-09-01": 7084.0,       "1848-10-01": 6898.0,       "1848-11-01": 6859.0     }   },   {     "Country": "NLD",     "ID": 9,     "Latitude": 51.918,     "Location": "MAASSLUIS",     "Longitude": 4.25,     "Tide-Data": {       "1848-02-01": 6880.0,       "1848-03-01": 6700.0,       "1848-04-01": 6775.0,       "1848-05-01": 6580.0,       "1848-06-01": 6685.0     }   },   {     "Country": "USA",     "ID": 10,     "Latitude": 37.807,     "Location": "SAN FRANCISCO",     "Longitude": -122.465,     "Tide-Data": {       "1854-07-01": 6909.0,       "1854-08-01": 6940.0,       "1854-09-01": 6961.0,       "1854-10-01": 6952.0,       "1854-11-01": 6952.0     }   } ] 

PS if you don't care of idents you can write directly to JSON file:

(df.groupby(['ID','Location','Country','Latitude','Longitude'], as_index=False)    .apply(lambda x: dict(zip(x.timestamp,x.tide)))    .reset_index()    .rename(columns={0:'Tide-Data'})    .to_json('/path/to/file_name.json', orient='records')) 
like image 124
MaxU - stop WAR against UA Avatar answered Oct 03 '22 00:10

MaxU - stop WAR against UA