# Inspired by https://github.com/jansorg/tezos-intellij/blob/master/grammar/michelson.bnf
import json
import re
from typing import List
from typing import Optional
from ply.lex import Lexer # type: ignore
from ply.lex import LexToken
from ply.lex import lex
from ply.yacc import yacc # type: ignore
from pymavryk.michelson.macros import expand_macro
from pymavryk.michelson.macros import expr as make_expr
from pymavryk.michelson.tags import prim_tags
[docs]def doc(docstring):
def decorate(func):
func.__doc__ = docstring
return func
return decorate
[docs]class MichelsonParserError(ValueError):
def __init__(self, token: LexToken, message=None):
message = message or f'failed to parse expression {token}'
super(MichelsonParserError, self).__init__(message)
self.message = message
self.line = token.lineno
self.pos = token.lexpos
[docs]class Sequence(list):
pass
[docs]class SimpleMichelsonLexer(Lexer):
tokens = ('INT', 'BYTE', 'STR', 'ANNOT', 'PRIM', 'LEFT_CURLY', 'RIGHT_CURLY', 'LEFT_PAREN', 'RIGHT_PAREN', 'SEMI')
t_INT = r'-?[0-9]+'
t_BYTE = r'0x[A-Fa-f0-9]*'
t_STR = r'\"(\\.|[^\"])*\"'
t_ANNOT = r'[:@%]+([_0-9a-zA-Z\.]*)?' # r'[:@%]+([_a-zA-Z][_0-9a-zA-Z\.]*)?'
t_PRIM = r'[A-Za-z][A-Za-z0-9_]+'
t_LEFT_CURLY = r'\{'
t_RIGHT_CURLY = r'\}'
t_LEFT_PAREN = r'\('
t_RIGHT_PAREN = r'\)'
t_SEMI = r';'
t_ignore_MULTI_COMMENT = r'/\*[^*]*\*/'
t_ignore_COMMENT = r'#[^\n]*'
t_ignore = ' \t\r\n\f'
def __init__(self):
super(SimpleMichelsonLexer, self).__init__()
self.lexer = lex(module=self, reflags=re.MULTILINE)
[docs] def t_error(self, t):
t.type = t.value[0]
t.value = t.value[0]
t.lexer.skip(1)
return t
[docs]class MichelsonParser:
"""Customizable Michelson parser"""
tokens = SimpleMichelsonLexer.tokens
[docs] @doc(
'''instr : expr
| empty'''
)
def p_instr(self, p):
p[0] = p[1]
[docs] @doc('instr : INT')
def p_instr_int(self, p):
p[0] = {'int': p[1]}
[docs] @doc('instr : BYTE')
def p_instr_byte(self, p):
p[0] = {'bytes': p[1][2:]} # strip 0x prefix
[docs] @doc('instr : STR')
def p_instr_str(self, p):
p[0] = {'string': json.loads(p[1])}
[docs] @doc('instr : instr SEMI instr')
def p_instr_list(self, p):
p[0] = []
for i in [p[1], p[3]]:
if type(i) is list:
p[0].extend(i)
elif i is not None:
p[0].append(i)
[docs] @doc('instr : LEFT_CURLY instr RIGHT_CURLY')
def p_instr_subseq(self, p):
p[0] = Sequence()
if type(p[2]) is list:
p[0].extend(p[2])
elif p[2] is not None:
p[0].append(p[2])
[docs] @doc('expr : PRIM annots args')
def p_expr(self, p):
prim = p[1]
if prim in prim_tags or prim in self.extra_primitives:
expr = make_expr(
prim=prim,
annots=p[2] or [],
args=p[3] or [],
)
else:
try:
expr = expand_macro(
prim=prim,
annots=p[2] or [],
args=p[3] or [],
)
except AssertionError as e:
raise MichelsonParserError(p.slice[1], str(e)) from e
p[0] = Sequence(expr) if isinstance(expr, list) else expr
[docs] @doc(
'''annots : annot
| empty'''
)
def p_annots(self, p):
if p[1] is not None:
p[0] = [p[1]]
[docs] @doc('annots : annots annot')
def p_annots_list(self, p):
p[0] = []
if type(p[1]) == list:
p[0].extend(p[1])
if p[2] is not None:
p[0].append(p[2])
[docs] @doc('annot : ANNOT')
def p_annot(self, p):
p[0] = p[1]
[docs] @doc(
'''args : arg
| empty'''
)
def p_args(self, p):
p[0] = []
if p[1] is not None:
p[0].append(p[1])
[docs] @doc('args : args arg')
def p_args_list(self, p):
p[0] = []
if type(p[1]) == list:
p[0].extend(p[1])
if p[2] is not None:
p[0].append(p[2])
[docs] @doc('arg : PRIM')
def p_arg_prim(self, p):
p[0] = {'prim': p[1]}
[docs] @doc('arg : INT')
def p_arg_int(self, p):
p[0] = {'int': p[1]}
[docs] @doc('arg : BYTE')
def p_arg_byte(self, p):
p[0] = {'bytes': p[1][2:]} # strip 0x prefix
[docs] @doc('arg : STR')
def p_arg_str(self, p):
p[0] = {'string': json.loads(p[1])}
[docs] @doc('arg : LEFT_CURLY instr RIGHT_CURLY')
def p_arg_subseq(self, p):
if type(p[2]) == list:
p[0] = p[2]
elif p[2] is not None:
p[0] = [p[2]]
else:
p[0] = []
[docs] @doc('arg : LEFT_PAREN expr RIGHT_PAREN')
def p_arg_group(self, p):
p[0] = p[2]
[docs] @doc('empty :')
def p_empty(self, p):
...
[docs] def p_error(self, p):
raise MichelsonParserError(p)
def __init__(self, debug=False, write_tables=False, extra_primitives: Optional[List[str]] = None):
"""Initialize Michelson parser
:param debug: Verbose output
:param write_tables: Store PLY output
:param extra_primitives: List of words to be ignored
"""
self.lexer = SimpleMichelsonLexer()
self.parser = yacc(
module=self,
debug=debug,
write_tables=write_tables,
)
self.extra_primitives = extra_primitives or []
[docs] def parse(self, code):
"""Parse Michelson source.
:param code: Michelson source
:returns: Micheline expression
"""
if len(code) > 0 and code[0] == '(' and code[-1] == ')':
code = code[1:-1]
return self.parser.parse(code)
[docs]def michelson_to_micheline(data, parser=None):
"""Converts Michelson source text into a Micheline expression.
:param data: Michelson string
:param parser: custom Michelson parser (optional)
:returns: Micheline expression
"""
if parser is None:
parser = MichelsonParser()
return parser.parse(data)