Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to convert an xml to a dictionary with some modifications?

I currently have an xml file in the following format:

<?xml version="1.0" encoding="UTF-8" ?>
    <Garden>
        <id>97</id>
        <Flowers>
            <id>98</id>
            <Type>
                <id>99</id>
                <Level>
                    <id>100</id>                    
                </Level>
            </Type>
        </Flowers>
    </Garden>

I want to use xmltodict to convert this xml to a dictionary and that is pretty simple to do. But there is a slight modification that I would like to do.

I would like to have my json be changed to something like this.

{
    "Garden": {
        "id": "97",
        "state": "0",
        "Flowers": {
            "id": "98",
            "state": "0",
            "Type": {
                "id": "99",
                "state": "0",
                "Level": {
                    "id": "100",
                    "state": "0"                

                }
            }
        }
    }
}

I want to be able to add a default "state": "0" for all the levels. I am really confused on how to do that. Any help would be really appreciated.

This is what I have as of now:

with open("gardenlist.xml", 'r') as file:
    xmlString = file.read() 
print(xmlString)     
jsonString = json.dumps(xmltodict.parse(xmlString), indent=4)

This just prints the json but without the "state": "0" values.

like image 993
Laura Smith Avatar asked Dec 07 '25 14:12

Laura Smith


1 Answers

Imo this is a very good opportunity to have your own parser:

from parsimonious.grammar import Grammar
from parsimonious.nodes import NodeVisitor, RegexNode

xml = """
<?xml version="1.0" encoding="UTF-8" ?>
    <Garden>
        <id>97</id>
        <Flowers>
            <id>98</id>
            <Type>
                <id>99</id>
                <Level>
                    <id>100</id>                    
                </Level>
            </Type>
        </Flowers>
    </Garden>
"""

class XMLVisitor(NodeVisitor):
    grammar = Grammar(
        r"""
        program     = root expr+
        expr        = opentag list closetag
        item        = (opentag notpar closetag) / expr
        list        = item+

        root        = ws? lpar "?xml" notpar rpar
        opentag     = ws? lpar word rpar ws?
        closetag    = lpar slash word rpar ws?

        lpar        = "<"
        rpar        = ">"
        notpar      = ~"[^<>]+"
        slash       = "/"

        word        = ~"\w+"
        ws          = ~"\s+"
        """
    )

    def generic_visit(self, node, visited_children):
        return visited_children or node

    def visit_opentag(self, node, visited_children):
        ws, lpar, word, *_ = visited_children
        return word.text

    def visit_closetag(self, node, visited_children):
        lpar, slash, word, *_ = visited_children
        return word.text

    def visit_notpar(self, node, visited_children):
        return node.text

    def visit_item(self, node, visited_children):
        if len(visited_children[0]) == 3:
            # first branch
            opentag, content, *_= visited_children[0]
            return (opentag, content)
        else:
            return visited_children[0]

    def visit_expr(self, node, visited_children):
        tag, lst, *_ = visited_children
        return (tag, lst)

    def visit_program(self, node, visited_children):
        root, content = visited_children
        return self.__makeDict__(content)

    def __makeDict__(self, struct, level = 0):
        d = {}
        for item in struct:
            key, value = item
            if isinstance(value, list):
                value = self.__makeDict__(value)
            d[key] = value
            d["state"] = 0
        return d

visitor = XMLVisitor()
output = visitor.parse(xml)

print(output)

This easy to understand snippet correctly yields

{'Garden': {'id': '97', 'state': 0, 'Flowers': {'id': '98', 'state': 0, 'Type': {'id': '99', 'state': 0, 'Level': {'id': '100', 'state': 0}}}}, 'state': 0}
like image 57
Jan Avatar answered Dec 09 '25 02:12

Jan