Consider we've got a bunch of subtress that look like this:
subtree1 = {
"id": "root",
"children": [
{
"id": "file",
"caption": "File",
"children": []
},
{
"id": "edit",
"caption": "Edit",
"children": []
},
{
"id": "tools",
"caption": "Tools",
"children": [
{
"id": "packages",
"caption": "Packages",
"children": []
}
]
},
{
"id": "help",
"caption": "Help",
"children": []
},
]
}
subtree2 = {
"id": "root",
"children": [
{
"id": "file",
"caption": "File",
"children": [
{"caption": "New"},
{"caption": "Exit"},
]
}
]
}
subtree3 = {
"id": "root",
"children": [
{
"id": "edit",
"children": [
{"caption": "Copy"},
{"caption": "Cut"},
{"caption": "Paste"},
]
},
{
"id": "help",
"children": [
{"caption": "About"},
]
}
]
}
subtree4 = {
"id": "root",
"children": [
{
"id": "edit",
"children": [
{
"id": "text",
"caption": "Text",
"children": [
{ "caption": "Insert line before" },
{ "caption": "Insert line after" }
]
}
]
}
]
}
I'm trying to figure how to code a merge
function such as doing something like this:
tree0 = merge(subtree1, subtree2)
tree0 = merge(tree0, subtree3)
tree0 = merge(tree0, subtree4)
will produce:
tree0 = {
"id": "root",
"children": [
{
"id": "file",
"caption": "File",
"children": [
{"caption": "New"},
{"caption": "Exit"},
]
},
{
"id": "edit",
"caption": "Edit",
"children": [
{"caption": "Copy"},
{"caption": "Cut"},
{"caption": "Paste"},
{
"id": "text",
"caption": "Text",
"children": [
{ "caption": "Insert line before" },
{ "caption": "Insert line after" }
]
}
]
},
{
"id": "tools",
"caption": "Tools",
"children": [
{
"id": "packages",
"caption": "Packages",
"children": []
}
]
},
{
"id": "help",
"caption": "Help",
"children": [
{"caption": "About"},
]
},
]
}
but doing something like this:
tree1 = merge(subtree1, subtree2)
tree1 = merge(tree1, subtree4)
tree1 = merge(tree1, subtree3)
would produce:
tree1 = {
"id": "root",
"children": [
{
"id": "file",
"caption": "File",
"children": [
{"caption": "New"},
{"caption": "Exit"},
]
},
{
"id": "edit",
"caption": "Edit",
"children": [
{
"id": "text",
"caption": "Text",
"children": [
{ "caption": "Insert line before" },
{ "caption": "Insert line after" }
]
},
{"caption": "Copy"},
{"caption": "Cut"},
{"caption": "Paste"},
]
},
{
"id": "tools",
"caption": "Tools",
"children": [
{
"id": "packages",
"caption": "Packages",
"children": []
}
]
},
{
"id": "help",
"caption": "Help",
"children": [
{"caption": "About"},
]
},
]
}
Said otherwise, loading the subtrees in the same order will always produce the same tree but if you use the same list of subtrees in different order you're not guaranteed to produce the same tree (as children lists could be extended in different order).
I've already attempted to code this but I don't know how what's the merge
algorithm behaviour and that's my question. Could anyone provide the code/pseudocode/explanation so I can implement it?
PS: Below you'll find some random attempt I thought could lead me to victory
if __name__ == '__main__':
from collections import defaultdict
subtree1 = {
"id": "root",
"children": [
{
"id": "file",
"caption": "File",
"children": []
},
{
"id": "edit",
"caption": "Edit",
"children": []
},
{
"id": "tools",
"caption": "Tools",
"children": [
{
"id": "packages",
"caption": "Packages",
"children": []
}
]
},
{
"id": "help",
"caption": "Help",
"children": []
},
]
}
subtree2 = {
"id": "root",
"children": [
{
"id": "file",
"caption": "File",
"children": [
{"caption": "New"},
{"caption": "Exit"},
]
}
]
}
subtree3 = {
"id": "root",
"children": [
{
"id": "edit",
"children": [
{"caption": "Copy"},
{"caption": "Cut"},
{"caption": "Paste"},
]
},
{
"id": "help",
"children": [
{"caption": "About"},
]
}
]
}
subtree4 = {
"id": "root",
"children": [
{
"id": "edit",
"children": [
{
"id": "text",
"caption": "Text",
"children": [
{"caption": "Insert line before"},
{"caption": "Insert line after"}
]
}
]
}
]
}
lst = [
subtree1,
subtree2,
subtree3,
subtree4
]
def traverse(node, path=[]):
yield node, tuple(path)
for c in node.get("children", []):
path.append(c.get("id", None))
yield from traverse(c)
path.pop()
# Levels & Hooks
dct_levels = defaultdict(list)
dct_hooks = defaultdict(list)
for subtree in lst:
for n, p in traverse(subtree):
if p not in dct_levels[len(p)]:
dct_levels[len(p)].append(p)
dct_hooks[p].append(n)
print(dct_levels)
print(dct_hooks[("file",)])
# Merge should happen here
tree = {
"id": "root",
"children": []
}
for level in range(1, max(dct_levels.keys()) + 1):
print("populating level", level, dct_levels[level])
but not sure if I'm creating the right structures/helpers here as it's still unclear how the whole algorithm works... that's what this question is all about
Tested with your examples on Python 3.5.
from copy import deepcopy
def merge(x: dict, y: dict) -> dict:
'Merge subtrees x y, and return the results as a new tree.'
return merge_inplace(deepcopy(x), y)
def merge_inplace(dest: dict, src: dict) -> dict:
'Merge subtree src into dest, and return dest.'
# perform sanity checks to make the code more rock solid
# feel free to remove those lines if you don't need
assert dest.get('id'), 'Cannot merge anonymous subtrees!'
assert dest.get('id') == src.get('id'), 'Identity mismatch!'
# merge attributes
dest.update((k, v) for k, v in src.items() if k != 'children')
# merge children
if not src.get('children'): # nothing to do, so just exit
return dest
elif not dest.get('children'): # if the children list didn't exist
dest['children'] = [] # then create an empty list for it
named_dest_children = {
child['id']: child
for child in dest['children']
if 'id' in child
}
for child in src['children']:
if 'id' not in child: # anonymous child, just append it
dest['children'].append(child)
elif child['id'] in named_dest_children: # override a named subtree
merge_inplace(named_dest_children[child['id']], child)
else: # create a new subtree
dest['children'].append(child)
named_dest_children[child['id']] = child
return dest
You can use itertools.groupby
with recursion:
from itertools import groupby
def merge(*args):
if len(args) < 2 or any('id' not in i for i in args):
return list(args)
_d = [(a, list(b)) for a, b in groupby(sorted(args, key=lambda x:x['id']), key=lambda x:x['id'])]
return [{**{j:k for h in b for j, k in h.items()}, 'id':a, 'children':merge(*[i for c in b for i in c['children']])} for a, b in _d]
Via args
, this solution treats each passed dictionary as a member of a children
list. This is to account for the possibility that two or more dictionaries may be passed to merge
that have different id
s i.e {'id':'root', 'children':[...]}
and {'id':'root2', 'children':[...]}
. As such, this solution would return a list of [{'id':'root', 'children':[...]}, {'id':'root2', 'children':[...]}]
, as the distinct id
s do not provide an avenue for a match. As such, in the context of your current problem, you need to use indexing to access the single returned element of the resulting list: the merged dict
with id
'root'
:
import json
tree0 = merge(subtree1, subtree2)[0]
tree0 = merge(tree0, subtree3)[0]
tree0 = merge(tree0, subtree4)[0]
print(json.dumps(tree0, indent=4))
Output:
{
"id": "root",
"children": [
{
"id": "edit",
"caption": "Edit",
"children": [
{
"caption": "Copy"
},
{
"caption": "Cut"
},
{
"caption": "Paste"
},
{
"id": "text",
"caption": "Text",
"children": [
{
"caption": "Insert line before"
},
{
"caption": "Insert line after"
}
]
}
]
},
{
"id": "file",
"caption": "File",
"children": [
{
"caption": "New"
},
{
"caption": "Exit"
}
]
},
{
"id": "help",
"caption": "Help",
"children": [
{
"caption": "About"
}
]
},
{
"id": "tools",
"caption": "Tools",
"children": [
{
"id": "packages",
"caption": "Packages",
"children": []
}
]
}
]
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With