I want to convert from:
Input:
"#serviceRequest and @charges:getRoamingCharges or @plans:dataplans"
where '#' - intent '@' - Entities ':'-value
output:
{"and":[
{"some" : [ {"var":"intents"}, {"==":[{"var":"intent"}]},
"serviceRequest"]},
{"or":[
{"and":[{"some" : [ {"var":"entities"}, {"==":[{"var":"entity"},
"charges"]} ]},
{"some" : [ {"var":"entities"}, {"==":[{"var":"value"},
"getRoamingCharges"]} ]}]
},{"and":[
{"some" : [ {"var":"entities"}, {"==":[{"var":"entity"}, "plans"]}
]},
{"some" : [ {"var":"entities"}, {"==":[{"var":"value"}, "data
plans"]} ]}
]}
]}
]}
What i have tried:
import pyparsing
identifier = pyparsing.QuotedString('"')
operator = (
pyparsing.Literal("==") |
pyparsing.Literal("≠") |
pyparsing.Literal("≥") |
pyparsing.Literal("≤") |
pyparsing.Literal("<") |
pyparsing.Literal(">")
)
value = pyparsing.QuotedString('"')
match_format = identifier + operator + value
#print(match_format.parseString('"foobar"=="123"'))
def list_to_dict(pos, tokens):
dic = {}
lis =[]
print(tokens)
abc= {tokens[1]: {tokens[2], tokens[0]}}
print(abc)
lis.append(abc)
dic['bfeh']=lis
return tokens
match_format = (identifier + operator +
value).setParseAction(list_to_dict)
print(match_format.parseString('"intent"=="serviceRequest"'))
gives :
{'==': {'intent', 'serviceRequest'}}
Please help me out either using Parsing (Python) or any alternative method whichever you want?
You can create a simpler tokenizer to link with a parser:
import re
class Token:
grammar, _types = r'and|or|#|:|@|\w+', [('and', 'cond'), ('or', 'cond'), ('#', 'intent'), ('@', 'entity'), (':', 'value'), (r'\w+', 'label')]
def __init__(self, val, _type):
self.val, self._type = val, _type
@property
def is_cond(self):
return self._type == 'cond'
@property
def is_desc(self):
return self._type in {'intent', 'entity', 'value'}
@property
def var_name(self):
return f'{self._type}s' if self._type == 'intent' else 'entities'
@classmethod
def tokenize(cls, _input):
return [cls(i, [b for a, b in cls._types if re.findall(a, i)][0]) for i in re.findall(cls.grammar, _input)]
def __repr__(self):
return f'{self.__class__.__name__}(value={self.val}, type={self._type})'
Now, a simple parser can be created:
from itertools import groupby
class AST:
def __init__(self, stream):
self.stream = iter(stream)
def p_parse(self, stream):
_r, _id, _name = [], None, ''
for i in stream:
if i._type == 'value':
if _name:
_r.append([{'var':_id.var_name if _id is not None else 'entities'}, {"==":[{"var":'value' if _id is None else _id._type}, _name]}])
_id, _name = None, ''
elif i.is_desc:
_id = i
else:
_name = i.val
_r.append([{'var':i.var_name if _id is not None else 'entities'}, {"==":[{"var":'value' if _id is None else _id._type}, _name]}])
_id, _name = None, ''
return {'some':_r[0]} if len(_r) == 1 else {'and':[{'some':_r[0]}, {'some':_r[1]}]}
def parse(self, seen=None):
a, b = next(self.stream, [None, None])
if a is not None:
return self.parse(self.p_parse(b)) if not a else {b[0].val:[seen, self.parse()]}
return seen
@classmethod
def _group(cls, _tokens):
return cls([(a, list(b)) for a, b in groupby(_tokens, key=lambda x:x.is_cond)])
Now, combining the components:
s = "#serviceRequest and @charges:getRoamingCharges or @plans:dataplans"
result = AST._group(Token.tokenize(s)).parse()
Output:
{'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'intent'}, 'serviceRequest']}]}, {'or': [{'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'entity'}, 'charges']}]}, {'some': [{'var': 'entities'}, {'==': [{'var': 'value'}, 'getRoamingCharges']}]}]}, {'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'entity'}, 'plans']}]}, {'some': [{'var': 'entities'}, {'==': [{'var': 'value'}, 'dataplans']}]}]}]}]}
No doubt there are shorter solutions to this problem, however, the goal of both the tokenizer and the parser is to make it easier for you to scale this solution in the future to handle input which cannot be processed by more "hackish" solutions.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With