Background
I have a complex nested JSON object, which I am trying to unpack into a pandas df in a very specific way.
JSON Object
this is an extract, containing randomized data of the JSON object, which shows examples of the hierarchy (inc. children) for 1x family (i.e. 'Falconer Family'), however there is 100s of them in total and this extract just has 1x family, however the full JSON object has multiple -
{
"meta": {
"columns": [{
"key": "value",
"display_name": "Adjusted Value (No Div, USD)",
"output_type": "Number",
"currency": "USD"
},
{
"key": "time_weighted_return",
"display_name": "Current Quarter TWR (USD)",
"output_type": "Percent",
"currency": "USD"
},
{
"key": "time_weighted_return_2",
"display_name": "YTD TWR (USD)",
"output_type": "Percent",
"currency": "USD"
},
{
"key": "_custom_twr_audit_note_911328",
"display_name": "TWR Audit Note",
"output_type": "Word"
}
],
"groupings": [{
"key": "_custom_name_747205",
"display_name": "* Reporting Client Name"
},
{
"key": "_custom_new_entity_group_453577",
"display_name": "NEW Entity Group"
},
{
"key": "_custom_level_2_624287",
"display_name": "* Level 2"
},
{
"key": "legal_entity",
"display_name": "Legal Entity"
}
]
},
"data": {
"type": "portfolio_views",
"attributes": {
"total": {
"name": "Total",
"columns": {
"time_weighted_return": -0.046732301295604683,
"time_weighted_return_2": -0.046732301295604683,
"_custom_twr_audit_note_911328": null,
"value": 23132492.905107163
},
"children": [{
"name": "Falconer Family",
"grouping": "_custom_name_747205",
"columns": {
"time_weighted_return": -0.046732301295604683,
"time_weighted_return_2": -0.046732301295604683,
"_custom_twr_audit_note_911328": null,
"value": 23132492.905107163
},
"children": [{
"name": "Wealth Bucket A",
"grouping": "_custom_new_entity_group_453577",
"columns": {
"time_weighted_return": -0.045960317420568164,
"time_weighted_return_2": -0.045960317420568164,
"_custom_twr_audit_note_911328": null,
"value": 13264448.506587159
},
"children": [{
"name": "Asset Class A",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": 0.000003434094574039648,
"time_weighted_return_2": 0.000003434094574039648,
"_custom_twr_audit_note_911328": null,
"value": 3337.99
},
"children": [{
"entity_id": 10604454,
"name": "HUDJ Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000003434094574039648,
"time_weighted_return_2": 0.000003434094574039648,
"_custom_twr_audit_note_911328": null,
"value": 3337.99
},
"children": []
}]
},
{
"name": "Asset Class B",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.025871339096964152,
"time_weighted_return_2": -0.025871339096964152,
"_custom_twr_audit_note_911328": null,
"value": 1017004.7192636987
},
"children": [{
"entity_id": 10604454,
"name": "HUDG Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.025871339096964152,
"time_weighted_return_2": -0.025871339096964152,
"_custom_twr_audit_note_911328": null,
"value": 1017004.7192636987
},
"children": []
}]
},
{
"name": "Asset Class C",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.030370376329670656,
"time_weighted_return_2": -0.030370376329670656,
"_custom_twr_audit_note_911328": null,
"value": 231142.67772000004
},
"children": [{
"entity_id": 10604454,
"name": "HKDJ Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.030370376329670656,
"time_weighted_return_2": -0.030370376329670656,
"_custom_twr_audit_note_911328": null,
"value": 231142.67772000004
},
"children": []
}]
},
{
"name": "Asset Class D",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.05382756475465478,
"time_weighted_return_2": -0.05382756475465478,
"_custom_twr_audit_note_911328": null,
"value": 9791282.570000006
},
"children": [{
"entity_id": 10604454,
"name": "HUDW Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.05382756475465478,
"time_weighted_return_2": -0.05382756475465478,
"_custom_twr_audit_note_911328": null,
"value": 9791282.570000006
},
"children": []
}]
},
{
"name": "Asset Class E",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.01351630404081805,
"time_weighted_return_2": -0.01351630404081805,
"_custom_twr_audit_note_911328": null,
"value": 2153366.6396034593
},
"children": [{
"entity_id": 10604454,
"name": "HJDJ Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.01351630404081805,
"time_weighted_return_2": -0.01351630404081805,
"_custom_twr_audit_note_911328": null,
"value": 2153366.6396034593
},
"children": []
}]
},
{
"name": "Asset Class F",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.002298190175237247,
"time_weighted_return_2": -0.002298190175237247,
"_custom_twr_audit_note_911328": null,
"value": 68313.90999999999
},
"children": [{
"entity_id": 10604454,
"name": "HADJ Trust",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.002298190175237247,
"time_weighted_return_2": -0.002298190175237247,
"_custom_twr_audit_note_911328": null,
"value": 68313.90999999999
},
"children": []
}]
}
]
},
{
"name": "Wealth Bucket B",
"grouping": "_custom_new_entity_group_453577",
"columns": {
"time_weighted_return": -0.04769870075659244,
"time_weighted_return_2": -0.04769870075659244,
"_custom_twr_audit_note_911328": null,
"value": 9868044.398519998
},
"children": [{
"name": "Asset Class A",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": 0.000028632718065191298,
"time_weighted_return_2": 0.000028632718065191298,
"_custom_twr_audit_note_911328": null,
"value": 10234.94
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.0000282679297198829,
"time_weighted_return_2": 0.0000282679297198829,
"_custom_twr_audit_note_911328": null,
"value": 244.28
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000049373572795108345,
"time_weighted_return_2": 0.000049373572795108345,
"_custom_twr_audit_note_911328": null,
"value": 5081.08
},
"children": []
},
{
"entity_id": 10598341,
"name": "Cht 11th Tr HBO Shirley",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000006609603754315074,
"time_weighted_return_2": 0.000006609603754315074,
"_custom_twr_audit_note_911328": null,
"value": 1523.62
},
"children": []
},
{
"entity_id": 10598337,
"name": "Cht 11th Tr HBO Hannah",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000010999769004760296,
"time_weighted_return_2": 0.000010999769004760296,
"_custom_twr_audit_note_911328": null,
"value": 1828.9
},
"children": []
},
{
"entity_id": 10598334,
"name": "Cht 11th Tr HBO Lau",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0.000006466673995619843,
"time_weighted_return_2": 0.000006466673995619843,
"_custom_twr_audit_note_911328": null,
"value": 1557.06
},
"children": []
}
]
},
{
"name": "Asset Class B",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.024645947842438676,
"time_weighted_return_2": -0.024645947842438676,
"_custom_twr_audit_note_911328": null,
"value": 674052.31962
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.043304004172576405,
"time_weighted_return_2": -0.043304004172576405,
"_custom_twr_audit_note_911328": null,
"value": 52800.96
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.022408434778798836,
"time_weighted_return_2": -0.022408434778798836,
"_custom_twr_audit_note_911328": null,
"value": 599594.11962
},
"children": []
},
{
"entity_id": 10598341,
"name": "Cht 11th Tr HBO Shirley",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.039799855483646174,
"time_weighted_return_2": -0.039799855483646174,
"_custom_twr_audit_note_911328": null,
"value": 7219.08
},
"children": []
},
{
"entity_id": 10598337,
"name": "Cht 11th Tr HBO Hannah",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.039799855483646174,
"time_weighted_return_2": -0.039799855483646174,
"_custom_twr_audit_note_911328": null,
"value": 7219.08
},
"children": []
},
{
"entity_id": 10598334,
"name": "Cht 11th Tr HBO Lau",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.039799855483646174,
"time_weighted_return_2": -0.039799855483646174,
"_custom_twr_audit_note_911328": null,
"value": 7219.08
},
"children": []
}
]
},
{
"name": "Asset Class C",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.03037038746301135,
"time_weighted_return_2": -0.03037038746301135,
"_custom_twr_audit_note_911328": null,
"value": 114472.69744
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.030370390035505124,
"time_weighted_return_2": -0.030370390035505124,
"_custom_twr_audit_note_911328": null,
"value": 114472.68744000001
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": 0,
"time_weighted_return_2": 0,
"_custom_twr_audit_note_911328": null,
"value": 0.01
},
"children": []
}
]
},
{
"name": "Asset Class D",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.06604362523792162,
"time_weighted_return_2": -0.06604362523792162,
"_custom_twr_audit_note_911328": null,
"value": 5722529.229999997
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.06154960593668424,
"time_weighted_return_2": -0.06154960593668424,
"_custom_twr_audit_note_911328": null,
"value": 1191838.9399999995
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.06750460387418267,
"time_weighted_return_2": -0.06750460387418267,
"_custom_twr_audit_note_911328": null,
"value": 4416618.520000002
},
"children": []
},
{
"entity_id": 10598341,
"name": "Cht 11th Tr HBO Shirley",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.05604507809250081,
"time_weighted_return_2": -0.05604507809250081,
"_custom_twr_audit_note_911328": null,
"value": 38190.33
},
"children": []
},
{
"entity_id": 10598337,
"name": "Cht 11th Tr HBO Hannah",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.05604507809250081,
"time_weighted_return_2": -0.05604507809250081,
"_custom_twr_audit_note_911328": null,
"value": 37940.72
},
"children": []
},
{
"entity_id": 10598334,
"name": "Cht 11th Tr HBO Lau",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.05604507809250081,
"time_weighted_return_2": -0.05604507809250081,
"_custom_twr_audit_note_911328": null,
"value": 37940.72
},
"children": []
}
]
},
{
"name": "Asset Class E",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.017118805423322003,
"time_weighted_return_2": -0.017118805423322003,
"_custom_twr_audit_note_911328": null,
"value": 3148495.0914600003
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.015251157805867277,
"time_weighted_return_2": -0.015251157805867277,
"_custom_twr_audit_note_911328": null,
"value": 800493.06146
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.01739609576880241,
"time_weighted_return_2": -0.01739609576880241,
"_custom_twr_audit_note_911328": null,
"value": 2215511.2700000005
},
"children": []
},
{
"entity_id": 10598341,
"name": "Cht 11th Tr HBO Shirley",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.02085132265594647,
"time_weighted_return_2": -0.02085132265594647,
"_custom_twr_audit_note_911328": null,
"value": 44031.21
},
"children": []
},
{
"entity_id": 10598337,
"name": "Cht 11th Tr HBO Hannah",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.02089393244695803,
"time_weighted_return_2": -0.02089393244695803,
"_custom_twr_audit_note_911328": null,
"value": 44394.159999999996
},
"children": []
},
{
"entity_id": 10598334,
"name": "Cht 11th Tr HBO Lau",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.020607507059866248,
"time_weighted_return_2": -0.020607507059866248,
"_custom_twr_audit_note_911328": null,
"value": 44065.39000000001
},
"children": []
}
]
},
{
"name": "Asset Class F",
"grouping": "_custom_level_2_624287",
"columns": {
"time_weighted_return": -0.0014710489231547497,
"time_weighted_return_2": -0.0014710489231547497,
"_custom_twr_audit_note_911328": null,
"value": 198260.12
},
"children": [{
"entity_id": 10868778,
"name": "2012 Desc Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.0014477244560456848,
"time_weighted_return_2": -0.0014477244560456848,
"_custom_twr_audit_note_911328": null,
"value": 44612.33
},
"children": []
},
{
"entity_id": 10643052,
"name": "2013 Irrev Tr HBO Thalia",
"grouping": "legal_entity",
"columns": {
"time_weighted_return": -0.001477821083437858,
"time_weighted_return_2": -0.001477821083437858,
"_custom_twr_audit_note_911328": null,
"value": 153647.78999999998
},
"children": []
}
]
}
]
}
]
}]
}
},
"included": []
}
}
Notes on JSON Object extract
data - data in here can be ignored, these are aggregated values for underlying children.meta - columns – contains the column header values I want to use for each applicable children ‘column` key:pair values.groupings - can be ignored.children hierarchy – there are 4x levels of children which can be identified by their name as follows –
name (i.e., ‘Falconer Family’)name (e.g., ‘Wealth Bucket A’)name (e.g., ‘Asset Class A’)name (e.g., ‘HUDJ Trust’)Target Output
this is an extract of target df structure I am trying to achieve -
| portfolio | name | entity_id | Adjusted Value (No Div, USD) | Current Quarter TWR (USD) | YTD TWR (USD) | TWR Audit Note |
|---|---|---|---|---|---|---|
| Falconer Family | Falconer Family | 23132492.90510712 | -0.046732301295604683 | -0.046732301295604683 | None | |
| Falconer Family | Wealth Bucket A | 13264448.506587146 | -0.045960317420568164 | -0.045960317420568164 | None | |
| Falconer Family | Asset Class A | 3337.99 | 0.000003434094574039648 | 0.000003434094574039648 | None | |
| Falconer Family | HUDJ Trust | 10604454 | 3337.99 | 0.000003434094574039648 | 0.000003434094574039648 | None |
| Falconer Family | Asset Class B | 1017004.7192636987 | -0.025871339096964152 | -0.025871339096964152 | None | |
| Falconer Family | HUDG Trust | 10604454 | 1017004.7192636987 | -0.025871339096964152 | -0.025871339096964152 | None |
| Falconer Family | Asset Class C | 231142.67772000004 | -0.030370376329670656 | -0.030370376329670656 | None | |
| Falconer Family | HKDJ Trust | 10604454 | 231142.67772000004 | -0.030370376329670656 | -0.030370376329670656 | None |
| Falconer Family | Asset Class D | 9791282.570000006 | -0.05382756475465478 | -0.05382756475465478 | None | |
| Falconer Family | HUDW Trust | 10604454 | 9791282.570000006 | -0.05382756475465478 | -0.05382756475465478 | None |
Notes on Target Output
children name value [family name]. E.g., ‘Falconer Family.name value from each respective children.children entity_id value should be mapped to this column.children have identical time_weighted_return, time-weighted_return2 and value columns which should be mapped respectively.children _custom_twr_audit_note_911318 values are currently blank, but will be utilized in the future.Current Output
My main issue is that you can see that I have only been able to tap into the 1st [Family] and 2nd [Wealth Bucket] children level. This leaves me missing the 3rd [Asset Class] and 4th [Fund] -
| portfolio | name | Adjusted Value (No Div, USD) | Current Quarter TWR (USD) | YTD TWR (USD) | TWR Audit Note) | |
|---|---|---|---|---|---|---|
| 0 | Falconer Family | Falconer Family | 2.313249e+07 | -0.046732 | -0.046732 | None |
| 1 | Falconer Family | Wealth Bucket A | 1.326445e+07 | -0.045960 | -0.045960 | None |
| 2 | Falconer Family | Wealth Bucket B | 9.868044e+06 | -0.047699 | -0.047699 | None |
Current code
This is a function which gets me the correct df formatting, however my main issue is that I haven't been able to find a solution to returning all children, but rather only the top-level -
# Function to read API response / JSON Object
def response_writer():
with open('api_response_2022-02-13.json') as f:
api_response = json.load(f)
return api_response
# Function to unpack JSON response into pandas dataframe.
def unpack_response():
while True:
try:
api_response = response_writer()
portfolio_views_children = api_response['data']['attributes']['total']['children']
portfolios = []
for portfolio in portfolio_views_children:
entity_columns = []
# include portfolio itself within an iterable so the total is the header
for entity in itertools.chain([portfolio], portfolio["children"]):
entity_data = entity["columns"].copy() # don't mutate original response
entity_data["portfolio"] = portfolio["name"] # from outer
entity_data["name"] = entity["name"]
entity_columns.append(entity_data)
df = pd.DataFrame(entity_columns)
portfolios.append(df)
# combine dataframes
df = pd.concat(portfolios)
# reorder and rename
column_ordering = {"portfolio": "portfolio", "name": "name"}
column_ordering.update({c["key"]: c["display_name"] for c in api_response["meta"]["columns"]})
df = df[column_ordering.keys()] # beware: un-named cols will be dropped
df = df.rename(columns=column_ordering)
break
except KeyError:
print("-----------------------------------\n","API TIMEOUT ERROR: TRY AGAIN...", "\n-----------------------------------\n")
return df
unpack_response()
Help
In short, I am looking for some advice on how I can tap into the remaining children by enhancing the existing code. Whilst I have taken much time to fully explain my problem, please ask if anything isn't clear. Please note that the JSON may have multiple families, so the solution / advice offered must observe this
jsonpath-ng can parse even such a nested json object very easily. You can install this convenient library by the following command:
pip install --upgrade jsonpath-ng
import json
import jsonpath_ng as jp
import pandas as pd
def unpack_response(r):
# Create a dataframe from extracted data
expr = jp.parse('$..children.[*]')
data = [{'full_path': str(m.full_path), **m.value} for m in expr.find(r)]
df = pd.json_normalize(data).sort_values('full_path', ignore_index=True)
# Append a portfolio column
df['portfolio'] = df.loc[df.full_path.str.contains(r'total\.children\.\[\d+]$'), 'name']
df['portfolio'].fillna(method='ffill', inplace=True)
# Deal with columns
trans = {'columns.' + c['key']: c['display_name'] for c in r['meta']['columns']}
cols = ['full_path', 'portfolio', 'name', 'entity_id', 'Adjusted Value (No Div, USD)', 'Current Quarter TWR (USD)', 'YTD TWR (USD)', 'TWR Audit Note']
df = df.rename(columns=trans)[cols]
return df
# Load the sample data from file
# with open('api_response_2022-02-13.json', 'r') as f:
# api_response = json.load(f)
# Load the sample data from string
api_response = json.loads('{"meta": {"columns": [{"key": "value", "display_name": "Adjusted Value (No Div, USD)", "output_type": "Number", "currency": "USD"}, {"key": "time_weighted_return", "display_name": "Current Quarter TWR (USD)", "output_type": "Percent", "currency": "USD"}, {"key": "time_weighted_return_2", "display_name": "YTD TWR (USD)", "output_type": "Percent", "currency": "USD"}, {"key": "_custom_twr_audit_note_911328", "display_name": "TWR Audit Note", "output_type": "Word"}], "groupings": [{"key": "_custom_name_747205", "display_name": "* Reporting Client Name"}, {"key": "_custom_new_entity_group_453577", "display_name": "NEW Entity Group"}, {"key": "_custom_level_2_624287", "display_name": "* Level 2"}, {"key": "legal_entity", "display_name": "Legal Entity"}]}, "data": {"type": "portfolio_views", "attributes": {"total": {"name": "Total", "columns": {"time_weighted_return": -0.046732301295604683, "time_weighted_return_2": -0.046732301295604683, "_custom_twr_audit_note_911328": null, "value": 23132492.905107163}, "children": [{"name": "Falconer Family", "grouping": "_custom_name_747205", "columns": {"time_weighted_return": -0.046732301295604683, "time_weighted_return_2": -0.046732301295604683, "_custom_twr_audit_note_911328": null, "value": 23132492.905107163}, "children": [{"name": "Wealth Bucket A", "grouping": "_custom_new_entity_group_453577", "columns": {"time_weighted_return": -0.045960317420568164, "time_weighted_return_2": -0.045960317420568164, "_custom_twr_audit_note_911328": null, "value": 13264448.506587159}, "children": [{"name": "Asset Class A", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": 3.434094574039648e-06, "time_weighted_return_2": 3.434094574039648e-06, "_custom_twr_audit_note_911328": null, "value": 3337.99}, "children": [{"entity_id": 10604454, "name": "HUDJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": 3.434094574039648e-06, "time_weighted_return_2": 3.434094574039648e-06, "_custom_twr_audit_note_911328": null, "value": 3337.99}, "children": []}]}, {"name": "Asset Class B", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.025871339096964152, "time_weighted_return_2": -0.025871339096964152, "_custom_twr_audit_note_911328": null, "value": 1017004.7192636987}, "children": [{"entity_id": 10604454, "name": "HUDG Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.025871339096964152, "time_weighted_return_2": -0.025871339096964152, "_custom_twr_audit_note_911328": null, "value": 1017004.7192636987}, "children": []}]}, {"name": "Asset Class C", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.030370376329670656, "time_weighted_return_2": -0.030370376329670656, "_custom_twr_audit_note_911328": null, "value": 231142.67772000004}, "children": [{"entity_id": 10604454, "name": "HKDJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.030370376329670656, "time_weighted_return_2": -0.030370376329670656, "_custom_twr_audit_note_911328": null, "value": 231142.67772000004}, "children": []}]}, {"name": "Asset Class D", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.05382756475465478, "time_weighted_return_2": -0.05382756475465478, "_custom_twr_audit_note_911328": null, "value": 9791282.570000006}, "children": [{"entity_id": 10604454, "name": "HUDW Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05382756475465478, "time_weighted_return_2": -0.05382756475465478, "_custom_twr_audit_note_911328": null, "value": 9791282.570000006}, "children": []}]}, {"name": "Asset Class E", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.01351630404081805, "time_weighted_return_2": -0.01351630404081805, "_custom_twr_audit_note_911328": null, "value": 2153366.6396034593}, "children": [{"entity_id": 10604454, "name": "HJDJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.01351630404081805, "time_weighted_return_2": -0.01351630404081805, "_custom_twr_audit_note_911328": null, "value": 2153366.6396034593}, "children": []}]}, {"name": "Asset Class F", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.002298190175237247, "time_weighted_return_2": -0.002298190175237247, "_custom_twr_audit_note_911328": null, "value": 68313.90999999999}, "children": [{"entity_id": 10604454, "name": "HADJ Trust", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.002298190175237247, "time_weighted_return_2": -0.002298190175237247, "_custom_twr_audit_note_911328": null, "value": 68313.90999999999}, "children": []}]}]}, {"name": "Wealth Bucket B", "grouping": "_custom_new_entity_group_453577", "columns": {"time_weighted_return": -0.04769870075659244, "time_weighted_return_2": -0.04769870075659244, "_custom_twr_audit_note_911328": null, "value": 9868044.398519998}, "children": [{"name": "Asset Class A", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": 2.8632718065191298e-05, "time_weighted_return_2": 2.8632718065191298e-05, "_custom_twr_audit_note_911328": null, "value": 10234.94}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": 2.82679297198829e-05, "time_weighted_return_2": 2.82679297198829e-05, "_custom_twr_audit_note_911328": null, "value": 244.28}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": 4.9373572795108345e-05, "time_weighted_return_2": 4.9373572795108345e-05, "_custom_twr_audit_note_911328": null, "value": 5081.08}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": 6.609603754315074e-06, "time_weighted_return_2": 6.609603754315074e-06, "_custom_twr_audit_note_911328": null, "value": 1523.62}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": 1.0999769004760296e-05, "time_weighted_return_2": 1.0999769004760296e-05, "_custom_twr_audit_note_911328": null, "value": 1828.9}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": 6.466673995619843e-06, "time_weighted_return_2": 6.466673995619843e-06, "_custom_twr_audit_note_911328": null, "value": 1557.06}, "children": []}]}, {"name": "Asset Class B", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.024645947842438676, "time_weighted_return_2": -0.024645947842438676, "_custom_twr_audit_note_911328": null, "value": 674052.31962}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.043304004172576405, "time_weighted_return_2": -0.043304004172576405, "_custom_twr_audit_note_911328": null, "value": 52800.96}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.022408434778798836, "time_weighted_return_2": -0.022408434778798836, "_custom_twr_audit_note_911328": null, "value": 599594.11962}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.039799855483646174, "time_weighted_return_2": -0.039799855483646174, "_custom_twr_audit_note_911328": null, "value": 7219.08}, "children": []}]}, {"name": "Asset Class C", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.03037038746301135, "time_weighted_return_2": -0.03037038746301135, "_custom_twr_audit_note_911328": null, "value": 114472.69744}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.030370390035505124, "time_weighted_return_2": -0.030370390035505124, "_custom_twr_audit_note_911328": null, "value": 114472.68744000001}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": 0, "time_weighted_return_2": 0, "_custom_twr_audit_note_911328": null, "value": 0.01}, "children": []}]}, {"name": "Asset Class D", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.06604362523792162, "time_weighted_return_2": -0.06604362523792162, "_custom_twr_audit_note_911328": null, "value": 5722529.229999997}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.06154960593668424, "time_weighted_return_2": -0.06154960593668424, "_custom_twr_audit_note_911328": null, "value": 1191838.9399999995}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.06750460387418267, "time_weighted_return_2": -0.06750460387418267, "_custom_twr_audit_note_911328": null, "value": 4416618.520000002}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 38190.33}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 37940.72}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.05604507809250081, "time_weighted_return_2": -0.05604507809250081, "_custom_twr_audit_note_911328": null, "value": 37940.72}, "children": []}]}, {"name": "Asset Class E", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.017118805423322003, "time_weighted_return_2": -0.017118805423322003, "_custom_twr_audit_note_911328": null, "value": 3148495.0914600003}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.015251157805867277, "time_weighted_return_2": -0.015251157805867277, "_custom_twr_audit_note_911328": null, "value": 800493.06146}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.01739609576880241, "time_weighted_return_2": -0.01739609576880241, "_custom_twr_audit_note_911328": null, "value": 2215511.2700000005}, "children": []}, {"entity_id": 10598341, "name": "Cht 11th Tr HBO Shirley", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.02085132265594647, "time_weighted_return_2": -0.02085132265594647, "_custom_twr_audit_note_911328": null, "value": 44031.21}, "children": []}, {"entity_id": 10598337, "name": "Cht 11th Tr HBO Hannah", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.02089393244695803, "time_weighted_return_2": -0.02089393244695803, "_custom_twr_audit_note_911328": null, "value": 44394.159999999996}, "children": []}, {"entity_id": 10598334, "name": "Cht 11th Tr HBO Lau", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.020607507059866248, "time_weighted_return_2": -0.020607507059866248, "_custom_twr_audit_note_911328": null, "value": 44065.39000000001}, "children": []}]}, {"name": "Asset Class F", "grouping": "_custom_level_2_624287", "columns": {"time_weighted_return": -0.0014710489231547497, "time_weighted_return_2": -0.0014710489231547497, "_custom_twr_audit_note_911328": null, "value": 198260.12}, "children": [{"entity_id": 10868778, "name": "2012 Desc Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.0014477244560456848, "time_weighted_return_2": -0.0014477244560456848, "_custom_twr_audit_note_911328": null, "value": 44612.33}, "children": []}, {"entity_id": 10643052, "name": "2013 Irrev Tr HBO Thalia", "grouping": "legal_entity", "columns": {"time_weighted_return": -0.001477821083437858, "time_weighted_return_2": -0.001477821083437858, "_custom_twr_audit_note_911328": null, "value": 153647.78999999998}, "children": []}]}]}]}]}}, "included": []}}')
df = unpack_response(api_response)
Firstly, you can confirm the expected output by the following command:
print(df.iloc[:5:,1:])
| portfolio | name | entity_id | Adjusted Value (No Div, USD) | Current Quarter TWR (USD) | YTD TWR (USD) | TWR Audit Note |
|---|---|---|---|---|---|---|
| Falconer Family | Falconer Family | nan | 2.31325e+07 | -0.0467323 | -0.0467323 | |
| Falconer Family | Wealth Bucket A | nan | 1.32644e+07 | -0.0459603 | -0.0459603 | |
| Falconer Family | Asset Class A | nan | 3337.99 | 3.43409e-06 | 3.43409e-06 | |
| Falconer Family | HUDJ Trust | 1.06045e+07 | 3337.99 | 3.43409e-06 | 3.43409e-06 | |
| Falconer Family | Asset Class B | nan | 1.017e+06 | -0.0258713 | -0.0258713 |
Subsequently, you can see one of the wonderful features in jsonpath-ng by the following command:
print(df.iloc[:10,:3])
| full_path | portfolio | name |
|---|---|---|
| data.attributes.total.children.[0] | Falconer Family | Falconer Family |
| data.attributes.total.children.[0].children.[0] | Falconer Family | Wealth Bucket A |
| data.attributes.total.children.[0].children.[0].children.[0] | Falconer Family | Asset Class A |
| data.attributes.total.children.[0].children.[0].children.[0].children.[0] | Falconer Family | HUDJ Trust |
| data.attributes.total.children.[0].children.[0].children.[1] | Falconer Family | Asset Class B |
| data.attributes.total.children.[0].children.[0].children.[1].children.[0] | Falconer Family | HUDG Trust |
| data.attributes.total.children.[0].children.[0].children.[2] | Falconer Family | Asset Class C |
| data.attributes.total.children.[0].children.[0].children.[2].children.[0] | Falconer Family | HKDJ Trust |
| data.attributes.total.children.[0].children.[0].children.[3] | Falconer Family | Asset Class D |
| data.attributes.total.children.[0].children.[0].children.[3].children.[0] | Falconer Family | HUDW Trust |
Thanks to the full_path column, you can grasp the nesting level of the extracted data in each row instantaneously. Actually, I appended the correct portfolio values by using these paths.
In terms of the code, the key point is the following line:
expr = jp.parse('$..children.[*]')
By the above expression, you can search the children attributes at any level of the json object. README.rst tells you what each syntax stands for.
| Syntax | Meaning |
|---|---|
$ |
The root object |
jsonpath1 .. jsonpath2 |
All nodes matched by jsonpath2 that descend from any node matching jsonpath1 |
[*] |
any array index |
I compared the speed between the above method with jsonpath-ng and a nested-for-loop method shown below.
| Method | Duration | Speed ratio |
|---|---|---|
jsonpath-ng |
9.72 ms ± 342 µs per loop (mean ± std. dev. of 7 runs, 100 loops each) | 5.7 (faster) |
| Nested-for-loop | 55.4 ms ± 7.39 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) | 1 |
def unpack_response(r):
df = pd.DataFrame()
for _, r1 in pd.json_normalize(r, ['data', 'attributes', 'total', 'children']).iterrows():
r1['portfolio'] = r1['name']
df = df.append(r1)
for _, r2 in pd.json_normalize(r1.children).iterrows():
df = df.append(r2)
for _, r3 in pd.json_normalize(r2.children).iterrows():
df = df.append(r3).append(pd.json_normalize(r3.children))
df['portfolio'].fillna(method='ffill', inplace=True)
trans = {'columns.' + c['key']: c['display_name'] for c in r['meta']['columns']}
cols = ['portfolio', 'name', 'entity_id', 'Adjusted Value (No Div, USD)', 'Current Quarter TWR (USD)', 'YTD TWR (USD)', 'TWR Audit Note']
df = df.rename(columns=trans)[cols].reset_index(drop=True)
return df
I think this gets you pretty close; might just need to adjust the various name columns and drop the extra data (I kept the grouping column).
The main idea is to recursively use pd.json_normalize with pd.concat for all availalable children levels.
EDIT: Put everything into a single function and added section to collapse the name columns like the expected output.
def process_json(api_response):
def get_column_values(df):
return pd.concat([df, pd.json_normalize(df.pop('columns')).set_axis(df.index)], axis=1)
def expand_children(df):
if len(df.index) > 1:
df['children'] = df['children'].fillna('').apply(lambda x: None if len(x) == 0 else x)
df_children = df.pop('children').dropna().explode()
if len(df_children.index) == 0: # return df if no children to append
return df.index.names, df
df_children = pd.json_normalize(df_children, max_level=0).set_axis(df_children.index).set_index('name', append=True)
df_children = get_column_values(df_children)
idx_names = list(df_children.index.names)
idx_names[-1] = idx_names[-1] + '_' + str(len(idx_names))
df[idx_names[-1]] = None
return idx_names, pd.concat([df.set_index(idx_names[-1], append=True), df_children], axis=0)
columns_dict = pd.DataFrame(api_response['meta']['columns']).set_index('key').to_dict(orient='index') # save column definitions
df = pd.DataFrame(api_response['data']['attributes']['total']['children']).set_index('name') # get initial dataframe
df = get_column_values(df) # get columns for initial level
# expand children
while 'children' in df.columns:
idx_names, df = expand_children(df)
# reorder/replace column headers and sort index
df = (df.loc[:, [x for x in df.columns if x not in columns_dict.keys()] + list(columns_dict.keys())]
.rename(columns={k:v['display_name'] for k,v in columns_dict.items()})
.sort_index(na_position='first').reset_index())
#collapse "name" columns (careful of potential duplicate rows)
for col in idx_names[::-1]:
df[idx_names[-1]] = df[idx_names[-1]].fillna(df[col])
df = df.rename(columns={'name': 'portfolio', idx_names[-1]: 'name'}).drop(columns=idx_names[1:-1])
return df
Since the other answer uses iterrows, which usually isn't advised, figured a quick time compare was worthwhile.
process_json(api_response)
54.2 ms ± 7.12 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
unpack_response(api_response) # iterrows
84.3 ms ± 9.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With