Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Create a nested JSON from a python dataframe

Tags:

python

json

I have an input dataframe like this :

   CUSTOMER_ID PRODUCT_ID VENDOR_ID         DAT        ORDER_ID COLOR_ID  
0     10078229  508136536       450  2018-11-23  20183200576771     1000   
1     10078229  508136532       450  2018-11-23  20183200576771     1000   
2     10202280  506894206       450  2018-11-23  20183231461778     1000   
3     10207584  500970872      2097  2018-11-23  20183231430937     1002   
4     10207584  500970872      2097  2018-11-23  20183231430937     1000   
5     10268028  511131122       450  2018-11-23  20183231418341     1000   
6     10268028  509736876       450  2018-11-23  20183231418341     1000   
7     10268028  507095754       450  2018-11-23  20183231418341     1000   
8     10268028  513902792       450  2018-11-23  20183231418341     1000   
9     10383692  508229004       450  2018-11-23  20183190670154     1000

And I want a 3 level json output like this for my output :

[
    {
    "CUSTOMER_ID": 10078229,
    "ORDER" : [
        {
        "ORDER_ID": 20183200576771,
        "DAT": "2018-11-23",
        "PRODUCT": [
            {
            "PRODUCT_ID": 508136536,
            "COLOR_ID": 1000,
            "SIZE_ID" : 1002
            },
            {
            "PRODUCT_ID": 508136532,
            "COLOR_ID": 1000,
            "SIZE_ID" : 1003
            }
                ],
        "VENDOR": [
            {
            "VENDOR_ID" : 1234
            },
            {
            "VENDOR_ID" : 12345
            }    ]
        },
        {
        "ORDER_ID" : 2222 ...
        }   ]
    }
    , "CUSTOMER_ID" : 12345 ....
 ]

I am able to create a json with customer on the header level and other columns in a second level. But I can't do a hierarchy like customer/order/(products and vendors).

Does someone has a hint ? Thanks,

EDIT : what I've done from now on (I try to create 1 json by customer) :

df_final1=pd.DataFrame(columns=['CUSTOMER_ID','JSON_EVENT'])

for i in data_test['CUSTOMER_ID'].unique():
    for j in data_test[['ORDER_ID','DAT']].apply(lambda x: ' '.join(x.astype(str)), axis=1).unique():
        a=pd.DataFrame([[i,
            data_test[(data_test[['ORDER_ID','DAT']].apply(lambda x: ' '.join(x.astype(str)), axis=1)==j) & (data_test['CUSTOMER_ID']==i) ]
                         .groupby(['ORDER_ID','DAT'])['PRODUCT_ID', 
            'VENDOR_ID','SIZE','COLOR'].apply(
            lambda x: x.to_dict(orient='records')).reset_index('PRODUITS NON NOTES').to_json(orient='records')]], 
               columns=['CUSTOMER_ID','JSON_EVENT'])
        df_final1=df_final1.append(a,ignore_index=True) 
like image 965
urdelLR Avatar asked Nov 06 '22 23:11

urdelLR


1 Answers

You can use itertools.groupby:

from itertools import groupby, count
import json
_count = count(1002)
data = df_final1.reset_index().values.tolist()
#[[10078229, 508136536, 450, '2018-11-23', 20183200576771, 1000], [10078229, 508136532, 450, '2018-11-23', 20183200576771, 1000], [10202280, 506894206, 450, '2018-11-23', 20183231461778, 1000], [10207584, 500970872, 2097, '2018-11-23', 20183231430937, 1002], [10207584, 500970872, 2097, '2018-11-23', 20183231430937, 1000], [10268028, 511131122, 450, '2018-11-23', 20183231418341, 1000], [10268028, 509736876, 450, '2018-11-23', 20183231418341, 1000], [10268028, 507095754, 450, '2018-11-23', 20183231418341, 1000], [10268028, 513902792, 450, '2018-11-23', 20183231418341, 1000], [10383692, 508229004, 450, '2018-11-23', 20183190670154, 1000]]
grouped = [[a, [c for _, *c in b]] for a, b in groupby(data, key=lambda x:x[0])]
new_grouped = [[a, [[c, list(d)] for c, d in groupby(b, key=lambda _k:_k[-2])]] for a, b in grouped]
final_structure = [{'CUSTOMER_ID':a, 'ORDER':[{'ORDER_ID':c, 'DAT':b[0][2], 
      'PRODUCT':[{'PRODUCT_ID':h, 'COLOR_ID':j, 'SIZE_ID':next(_count)} for h, *_, j in b], 
            'VENDOR':[h[1] for h in b]} for c, b in j]} for a, j in new_grouped]

print(json.dumps(final_structure, indent=4))

Output:

[
   {
    "CUSTOMER_ID": 10078229,
    "ORDER": [
        {
            "ORDER_ID": 20183200576771,
            "DAT": "2018-11-23",
            "PRODUCT": [
                {
                    "PRODUCT_ID": 508136536,
                    "COLOR_ID": 1000,
                    "SIZE_ID": 1002
                },
                {
                    "PRODUCT_ID": 508136532,
                    "COLOR_ID": 1000,
                    "SIZE_ID": 1003
                }
            ],
            "VENDOR": [
                450,
                450
            ]
        }
    ]
 },
 {
    "CUSTOMER_ID": 10202280,
    "ORDER": [
        {
            "ORDER_ID": 20183231461778,
            "DAT": "2018-11-23",
            "PRODUCT": [
                {
                    "PRODUCT_ID": 506894206,
                    "COLOR_ID": 1000,
                    "SIZE_ID": 1004
                }
            ],
            "VENDOR": [
                450
            ]
        }
    ]
 },
 {
    "CUSTOMER_ID": 10207584,
    "ORDER": [
        {
            "ORDER_ID": 20183231430937,
            "DAT": "2018-11-23",
            "PRODUCT": [
                {
                    "PRODUCT_ID": 500970872,
                    "COLOR_ID": 1002,
                    "SIZE_ID": 1005
                },
                {
                    "PRODUCT_ID": 500970872,
                    "COLOR_ID": 1000,
                    "SIZE_ID": 1006
                }
            ],
            "VENDOR": [
                2097,
                2097
            ]
        }
    ]
},
{
    "CUSTOMER_ID": 10268028,
    "ORDER": [
        {
            "ORDER_ID": 20183231418341,
            "DAT": "2018-11-23",
            "PRODUCT": [
                {
                    "PRODUCT_ID": 511131122,
                    "COLOR_ID": 1000,
                    "SIZE_ID": 1006
                },
                {
                    "PRODUCT_ID": 509736876,
                    "COLOR_ID": 1000,
                    "SIZE_ID": 1007
                },
                {
                    "PRODUCT_ID": 507095754,
                    "COLOR_ID": 1000,
                    "SIZE_ID": 1008
                },
                {
                    "PRODUCT_ID": 513902792,
                    "COLOR_ID": 1000,
                    "SIZE_ID": 1009
                }
            ],
            "VENDOR": [
                450,
                450,
                450,
                450
            ]
        }
    ]
},
{
    "CUSTOMER_ID": 10383692,
    "ORDER": [
        {
            "ORDER_ID": 20183190670154,
            "DAT": "2018-11-23",
            "PRODUCT": [
                {
                    "PRODUCT_ID": 508229004,
                    "COLOR_ID": 1000,
                    "SIZE_ID": 1010
                }
            ],
            "VENDOR": [
                450
            ]
        }
     ]
  }
]
like image 107
Ajax1234 Avatar answered Nov 14 '22 21:11

Ajax1234