Basically I´m reading a pandas dataframe and converting it to Json. I´m a beginner in coding, but I know that is preferable to use apply function instead iterrows (and I already tried to use apply function, but some difficulties in understand the syntax and find out my solution arose)!!
===============================
Data that I´m reading from excel
id label id_customer label_customer part_number number_customer product label_product key country value_product
6 Sao Paulo CUST-99992 Brazil 982 10 sho1564 shoes SH-99 Chile 1.5
6 Sao Paulo CUST-99992 Brazil 982 10 sn47282 sneakers SN-71 Germany 43.8
6 Sao Paulo CUST-43535 Argentina 435 15 sk84393 skirt SK-11 Netherlands 87.1
92 Hong Hong CUST-88888 China 785 58 ca40349 cap CA-82 Russia 3.95
===============================
CODE:
import pandas as pd
import json
df = pd.read_excel(path)
result = []
for labels, df1 in df.groupby(['id', 'label'],sort=False):
id_, label = labels
record = {'id': int(id_), 'label': label, 'Customer': []}
for inner_labels, df2 in df1.groupby(['id_customer', 'label_customer'],sort=False):
id_,label = inner_labels
record['Customer'].append({
'id': id_,
'label': label,
'Number': [{'part': str(p), 'number_customer': str(s)} for p, s in zip(df2['part_number'], df2['number_customer'])]
})
result.append(record)
===============================
Json I'm getting:
[
{
"id": 6,
"label": "Sao Paulo",
"Customer": [
{
"id": "CUST-99992",
"label": "Brazil",
"Number": [
{
"part": "982",
"number_customer": "10"
},
{
"part": "982",
"number_customer": "10"
}
]
},
{
"id": "CUST-43535",
"label": "Argentina",
"Number": [
{
"part": "435",
"number_customer": "15"
}
]
}
]
},
{
"id": 92,
"label": "Hong Kong",
"Customer": [
{
"id": "CUST-88888",
"label": "China",
"Number": [
{
"part": "785",
"number_customer": "58"
}
]
}
]
}
]
===============================
Json expected:
[
{
"id": 6,
"label": "Sao Paulo",
"Customer": [
{
"id": "CUST-99992",
"label": "Brazil",
"Number": [
{
"part": "982",
"number_customer": "10",
"Procucts": [
{
"product": "sho1564",
"label_product": "shoes",
"Order": [
{
"key": "SH-99",
"country": "Chile",
"value_product": "1.5"
}
]
},
{
"product": "sn47282",
"label_product": "sneakers",
"Order": [
{
"key": "SN-71",
"country": "Germany",
"value_product": "43.8"
}
]
}
]
}
]
},
{
"id": "CUST-43535",
"label": "Argentina",
"Number": [
{
"part": "435",
"number_customer": "15",
"Procucts": [
{
"product": "sk84393",
"label_product": "skirt",
"Order": [
{
"key": "SK-11",
"country": "Netherlands",
"value_product": "87.1"
}
]
}
]
}
]
}
]
},
{
"id": 92,
"label": "Hong Kong",
"Customer": [
{
"id": "CUST-88888",
"label": "China",
"Number": [
{
"part": "785",
"number_customer": "58",
"Procucts": [
{
"product": "ca40349",
"label_product": "cap",
"Order": [
{
"key": "CA-82",
"country": "Russia",
"value_product": "3.95"
}
]
}
]
}
]
}
]
}
]
===============================
Look that id and label is group of information even as id_customer and label customer is another group, part_number and number_customer is another, product and label_product another, key, country and value_product another.
My expected Json depends of my information inside my dataframe.
Can somebody help me in any way pls?
import pandas as pd
import json
df = pd.read_excel(path)
result = []
for labels, df1 in df.groupby(['id', 'label'], sort=False):
id_, label = labels
record = {'id': int(id_), 'label': label, 'Customer': []}
for inner_labels, df2 in df1.groupby(['id_customer', 'label_customer'], sort=False):
id_, label = inner_labels
customer = {'id': id_, 'label': label, 'Number': []}
for inner_labels, df3 in df2.groupby(['part_number', 'number_customer'], sort=False):
p, s = inner_labels
number = {'part': str(p), 'number_customer': str(s), 'Products': []}
for inner_labels, df4 in df3.groupby(['product', 'label_product'], sort=False):
p, lp = inner_labels
product = {'product': p, 'label_product': lp, 'Order': []}
for k, c, v in zip(df4['key'], df4['country'], df4['value_product']):
product['Order'].append({'key': k, 'country': c, 'value_product': v})
number['Products'].append(product)
customer['Number'].append(number)
record['Customer'].append(customer)
result.append(record)
Hope this is of use!
from io import StringIO
import pandas as pd
import json
csv = """id,label,id_customer,label_customer,part_number,number_customer,product,label_product,key,country,value_product
6,Sao Paulo,CUST-99992,Brazil,982,10,sho1564,shoes,SH-99,Chile,1.5
6,Sao Paulo,CUST-99992,Brazil,982,10,sn47282,sneakers,SN-71,Germany,43.8
6,Sao Paulo,CUST-43535,Argentina,435,15,sk84393,skirt,SK-11,Netherlands,87.1
92,Hong Hong,CUST-88888,China,785,58,ca40349,cap,CA-82,Russia,3.95"""
csv = StringIO(csv)
df = pd.read_csv(csv)
def split(df, groupby, json_func):
for x, group in df.groupby(groupby):
yield json_func(group, *x)
a = list(split(df, ['id', 'label'], lambda grp, id_, label: {"id": id_, "label": label, "Customer": list(
split(grp, ['id_customer', 'label_customer'], lambda grp_1, id_cust, label_cust: {"id": id_cust, "label": label_cust, "Number": list(
split(grp_1, ['part_number', 'number_customer'], lambda grp_2, part, num_cust: {"part": part, "number_customer": num_cust, "Products": list(
split(grp_2, ['product', 'label_product'], lambda grp_3, product, label_product: {"product": product, "label_product": label_product, "Order": list(
split(grp_3, ['key', 'country', 'value_product'], lambda _, key, country, value_product: {"key": key, "country": country, "value_product": value_product}))}
))})
)}))}))
display(a)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With