Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Convert Logical String to JSON

I want to convert from:

Input:

"#serviceRequest and @charges:getRoamingCharges or @plans:dataplans"

where '#' - intent '@' - Entities ':'-value

output:

{"and":[
{"some" : [ {"var":"intents"}, {"==":[{"var":"intent"}]}, 
"serviceRequest"]},
{"or":[
{"and":[{"some" : [ {"var":"entities"}, {"==":[{"var":"entity"}, 
"charges"]} ]},
{"some" : [ {"var":"entities"}, {"==":[{"var":"value"}, 
"getRoamingCharges"]} ]}]
},{"and":[
{"some" : [ {"var":"entities"}, {"==":[{"var":"entity"}, "plans"]} 
]},
{"some" : [ {"var":"entities"}, {"==":[{"var":"value"}, "data 
plans"]} ]}
]}
]}
]}

What i have tried:

import pyparsing

identifier = pyparsing.QuotedString('"')
operator = (
    pyparsing.Literal("==") |
    pyparsing.Literal("≠") |
    pyparsing.Literal("≥") |
    pyparsing.Literal("≤") |
    pyparsing.Literal("<") |
    pyparsing.Literal(">")
)
value = pyparsing.QuotedString('"')

match_format = identifier + operator + value

  #print(match_format.parseString('"foobar"=="123"'))
  def list_to_dict(pos, tokens):
    dic = {}
    lis =[]
   print(tokens)
  abc= {tokens[1]: {tokens[2], tokens[0]}}
print(abc)
lis.append(abc)
dic['bfeh']=lis
return tokens


 match_format = (identifier + operator + 
  value).setParseAction(list_to_dict)

 print(match_format.parseString('"intent"=="serviceRequest"'))

gives :

{'==': {'intent', 'serviceRequest'}}

Please help me out either using Parsing (Python) or any alternative method whichever you want?


1 Answers

You can create a simpler tokenizer to link with a parser:

import re
class Token:
  grammar, _types = r'and|or|#|:|@|\w+', [('and', 'cond'), ('or', 'cond'), ('#', 'intent'), ('@', 'entity'), (':', 'value'), (r'\w+', 'label')]
  def __init__(self, val, _type):
     self.val, self._type = val, _type
  @property
  def is_cond(self):
     return self._type == 'cond'
  @property
  def is_desc(self):
     return self._type in {'intent', 'entity', 'value'}
  @property
  def var_name(self):
     return f'{self._type}s' if self._type == 'intent' else 'entities'
  @classmethod
  def tokenize(cls, _input):
     return [cls(i, [b for a, b in cls._types if re.findall(a, i)][0]) for i in re.findall(cls.grammar, _input)]
  def __repr__(self):
     return f'{self.__class__.__name__}(value={self.val}, type={self._type})'

Now, a simple parser can be created:

from itertools import groupby
class AST:
   def __init__(self, stream):
      self.stream = iter(stream)
   def p_parse(self, stream):
      _r, _id, _name = [], None, ''
      for i in stream:
         if i._type == 'value':
            if _name:
               _r.append([{'var':_id.var_name if _id is not None else 'entities'}, {"==":[{"var":'value' if _id is None else _id._type}, _name]}])
            _id, _name = None, ''
         elif i.is_desc:
            _id = i
         else:
            _name = i.val
            _r.append([{'var':i.var_name if _id is not None else 'entities'}, {"==":[{"var":'value' if _id is None else _id._type}, _name]}])
            _id, _name = None, ''
      return {'some':_r[0]} if len(_r) == 1 else {'and':[{'some':_r[0]}, {'some':_r[1]}]}
   def parse(self, seen=None):
      a, b = next(self.stream, [None, None])
      if a is not None:
         return self.parse(self.p_parse(b)) if not a else {b[0].val:[seen,  self.parse()]}
      return seen
   @classmethod
   def _group(cls, _tokens):
     return cls([(a, list(b)) for a, b in groupby(_tokens, key=lambda x:x.is_cond)])

Now, combining the components:

s = "#serviceRequest and @charges:getRoamingCharges or @plans:dataplans"
result = AST._group(Token.tokenize(s)).parse()

Output:

{'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'intent'}, 'serviceRequest']}]}, {'or': [{'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'entity'}, 'charges']}]}, {'some': [{'var': 'entities'}, {'==': [{'var': 'value'}, 'getRoamingCharges']}]}]}, {'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'entity'}, 'plans']}]}, {'some': [{'var': 'entities'}, {'==': [{'var': 'value'}, 'dataplans']}]}]}]}]}

No doubt there are shorter solutions to this problem, however, the goal of both the tokenizer and the parser is to make it easier for you to scale this solution in the future to handle input which cannot be processed by more "hackish" solutions.

like image 155
Ajax1234 Avatar answered Jun 07 '26 07:06

Ajax1234