Convert Logical String to JSON

Question

I want to convert from:

Input:

"#serviceRequest and @charges:getRoamingCharges or @plans:dataplans"

where '#' - intent '@' - Entities ':'-value

output:

{"and":[
{"some" : [ {"var":"intents"}, {"==":[{"var":"intent"}]}, 
"serviceRequest"]},
{"or":[
{"and":[{"some" : [ {"var":"entities"}, {"==":[{"var":"entity"}, 
"charges"]} ]},
{"some" : [ {"var":"entities"}, {"==":[{"var":"value"}, 
"getRoamingCharges"]} ]}]
},{"and":[
{"some" : [ {"var":"entities"}, {"==":[{"var":"entity"}, "plans"]} 
]},
{"some" : [ {"var":"entities"}, {"==":[{"var":"value"}, "data 
plans"]} ]}
]}
]}
]}

What i have tried:

import pyparsing

identifier = pyparsing.QuotedString('"')
operator = (
    pyparsing.Literal("==") |
    pyparsing.Literal("≠") |
    pyparsing.Literal("≥") |
    pyparsing.Literal("≤") |
    pyparsing.Literal("<") |
    pyparsing.Literal(">")
)
value = pyparsing.QuotedString('"')

match_format = identifier + operator + value

  #print(match_format.parseString('"foobar"=="123"'))
  def list_to_dict(pos, tokens):
    dic = {}
    lis =[]
   print(tokens)
  abc= {tokens[1]: {tokens[2], tokens[0]}}
print(abc)
lis.append(abc)
dic['bfeh']=lis
return tokens


 match_format = (identifier + operator + 
  value).setParseAction(list_to_dict)

 print(match_format.parseString('"intent"=="serviceRequest"'))

gives :

{'==': {'intent', 'serviceRequest'}}

Please help me out either using Parsing (Python) or any alternative method whichever you want?

Ajax1234 · Accepted Answer

You can create a simpler tokenizer to link with a parser:

import re
class Token:
  grammar, _types = r'and|or|#|:|@|\w+', [('and', 'cond'), ('or', 'cond'), ('#', 'intent'), ('@', 'entity'), (':', 'value'), (r'\w+', 'label')]
  def __init__(self, val, _type):
     self.val, self._type = val, _type
  @property
  def is_cond(self):
     return self._type == 'cond'
  @property
  def is_desc(self):
     return self._type in {'intent', 'entity', 'value'}
  @property
  def var_name(self):
     return f'{self._type}s' if self._type == 'intent' else 'entities'
  @classmethod
  def tokenize(cls, _input):
     return [cls(i, [b for a, b in cls._types if re.findall(a, i)][0]) for i in re.findall(cls.grammar, _input)]
  def __repr__(self):
     return f'{self.__class__.__name__}(value={self.val}, type={self._type})'

Now, a simple parser can be created:

from itertools import groupby
class AST:
   def __init__(self, stream):
      self.stream = iter(stream)
   def p_parse(self, stream):
      _r, _id, _name = [], None, ''
      for i in stream:
         if i._type == 'value':
            if _name:
               _r.append([{'var':_id.var_name if _id is not None else 'entities'}, {"==":[{"var":'value' if _id is None else _id._type}, _name]}])
            _id, _name = None, ''
         elif i.is_desc:
            _id = i
         else:
            _name = i.val
            _r.append([{'var':i.var_name if _id is not None else 'entities'}, {"==":[{"var":'value' if _id is None else _id._type}, _name]}])
            _id, _name = None, ''
      return {'some':_r[0]} if len(_r) == 1 else {'and':[{'some':_r[0]}, {'some':_r[1]}]}
   def parse(self, seen=None):
      a, b = next(self.stream, [None, None])
      if a is not None:
         return self.parse(self.p_parse(b)) if not a else {b[0].val:[seen,  self.parse()]}
      return seen
   @classmethod
   def _group(cls, _tokens):
     return cls([(a, list(b)) for a, b in groupby(_tokens, key=lambda x:x.is_cond)])

Now, combining the components:

s = "#serviceRequest and @charges:getRoamingCharges or @plans:dataplans"
result = AST._group(Token.tokenize(s)).parse()

Output:

{'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'intent'}, 'serviceRequest']}]}, {'or': [{'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'entity'}, 'charges']}]}, {'some': [{'var': 'entities'}, {'==': [{'var': 'value'}, 'getRoamingCharges']}]}]}, {'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'entity'}, 'plans']}]}, {'some': [{'var': 'entities'}, {'==': [{'var': 'value'}, 'dataplans']}]}]}]}]}

No doubt there are shorter solutions to this problem, however, the goal of both the tokenizer and the parser is to make it easier for you to scale this solution in the future to handle input which cannot be processed by more "hackish" solutions.

Convert Logical String to JSON

Tags:

python

json

parsing

pyparsing

1 Answers

Ajax1234

Recent Activity

Donate For Us

Convert Logical String to JSON

Tags:

python

json

parsing

pyparsing

1 Answers

Ajax1234

Related questions

Recent Activity

Donate For Us