Source code for pymavryk.michelson.parse

# Inspired by https://github.com/jansorg/tezos-intellij/blob/master/grammar/michelson.bnf
import json
import re
from typing import List
from typing import Optional

from ply.lex import Lexer  # type: ignore
from ply.lex import LexToken
from ply.lex import lex
from ply.yacc import yacc  # type: ignore

from pymavryk.michelson.macros import expand_macro
from pymavryk.michelson.macros import expr as make_expr
from pymavryk.michelson.tags import prim_tags


[docs]def doc(docstring): def decorate(func): func.__doc__ = docstring return func return decorate
[docs]class MichelsonParserError(ValueError): def __init__(self, token: LexToken, message=None): message = message or f'failed to parse expression {token}' super(MichelsonParserError, self).__init__(message) self.message = message self.line = token.lineno self.pos = token.lexpos
[docs] def format_stdout(self) -> str: return f'{self.line}:{self.pos}: {self.message}'
[docs]class Sequence(list): pass
[docs]class SimpleMichelsonLexer(Lexer): tokens = ('INT', 'BYTE', 'STR', 'ANNOT', 'PRIM', 'LEFT_CURLY', 'RIGHT_CURLY', 'LEFT_PAREN', 'RIGHT_PAREN', 'SEMI') t_INT = r'-?[0-9]+' t_BYTE = r'0x[A-Fa-f0-9]*' t_STR = r'\"(\\.|[^\"])*\"' t_ANNOT = r'[:@%]+([_0-9a-zA-Z\.]*)?' # r'[:@%]+([_a-zA-Z][_0-9a-zA-Z\.]*)?' t_PRIM = r'[A-Za-z][A-Za-z0-9_]+' t_LEFT_CURLY = r'\{' t_RIGHT_CURLY = r'\}' t_LEFT_PAREN = r'\(' t_RIGHT_PAREN = r'\)' t_SEMI = r';' t_ignore_MULTI_COMMENT = r'/\*[^*]*\*/' t_ignore_COMMENT = r'#[^\n]*' t_ignore = ' \t\r\n\f' def __init__(self): super(SimpleMichelsonLexer, self).__init__() self.lexer = lex(module=self, reflags=re.MULTILINE)
[docs] def t_error(self, t): t.type = t.value[0] t.value = t.value[0] t.lexer.skip(1) return t
[docs]class MichelsonParser: """Customizable Michelson parser""" tokens = SimpleMichelsonLexer.tokens
[docs] @doc( '''instr : expr | empty''' ) def p_instr(self, p): p[0] = p[1]
[docs] @doc('instr : INT') def p_instr_int(self, p): p[0] = {'int': p[1]}
[docs] @doc('instr : BYTE') def p_instr_byte(self, p): p[0] = {'bytes': p[1][2:]} # strip 0x prefix
[docs] @doc('instr : STR') def p_instr_str(self, p): p[0] = {'string': json.loads(p[1])}
[docs] @doc('instr : instr SEMI instr') def p_instr_list(self, p): p[0] = [] for i in [p[1], p[3]]: if type(i) is list: p[0].extend(i) elif i is not None: p[0].append(i)
[docs] @doc('instr : LEFT_CURLY instr RIGHT_CURLY') def p_instr_subseq(self, p): p[0] = Sequence() if type(p[2]) is list: p[0].extend(p[2]) elif p[2] is not None: p[0].append(p[2])
[docs] @doc('expr : PRIM annots args') def p_expr(self, p): prim = p[1] if prim in prim_tags or prim in self.extra_primitives: expr = make_expr( prim=prim, annots=p[2] or [], args=p[3] or [], ) else: try: expr = expand_macro( prim=prim, annots=p[2] or [], args=p[3] or [], ) except AssertionError as e: raise MichelsonParserError(p.slice[1], str(e)) from e p[0] = Sequence(expr) if isinstance(expr, list) else expr
[docs] @doc( '''annots : annot | empty''' ) def p_annots(self, p): if p[1] is not None: p[0] = [p[1]]
[docs] @doc('annots : annots annot') def p_annots_list(self, p): p[0] = [] if type(p[1]) == list: p[0].extend(p[1]) if p[2] is not None: p[0].append(p[2])
[docs] @doc('annot : ANNOT') def p_annot(self, p): p[0] = p[1]
[docs] @doc( '''args : arg | empty''' ) def p_args(self, p): p[0] = [] if p[1] is not None: p[0].append(p[1])
[docs] @doc('args : args arg') def p_args_list(self, p): p[0] = [] if type(p[1]) == list: p[0].extend(p[1]) if p[2] is not None: p[0].append(p[2])
[docs] @doc('arg : PRIM') def p_arg_prim(self, p): p[0] = {'prim': p[1]}
[docs] @doc('arg : INT') def p_arg_int(self, p): p[0] = {'int': p[1]}
[docs] @doc('arg : BYTE') def p_arg_byte(self, p): p[0] = {'bytes': p[1][2:]} # strip 0x prefix
[docs] @doc('arg : STR') def p_arg_str(self, p): p[0] = {'string': json.loads(p[1])}
[docs] @doc('arg : LEFT_CURLY instr RIGHT_CURLY') def p_arg_subseq(self, p): if type(p[2]) == list: p[0] = p[2] elif p[2] is not None: p[0] = [p[2]] else: p[0] = []
[docs] @doc('arg : LEFT_PAREN expr RIGHT_PAREN') def p_arg_group(self, p): p[0] = p[2]
[docs] @doc('empty :') def p_empty(self, p): ...
[docs] def p_error(self, p): raise MichelsonParserError(p)
def __init__(self, debug=False, write_tables=False, extra_primitives: Optional[List[str]] = None): """Initialize Michelson parser :param debug: Verbose output :param write_tables: Store PLY output :param extra_primitives: List of words to be ignored """ self.lexer = SimpleMichelsonLexer() self.parser = yacc( module=self, debug=debug, write_tables=write_tables, ) self.extra_primitives = extra_primitives or []
[docs] def parse(self, code): """Parse Michelson source. :param code: Michelson source :returns: Micheline expression """ if len(code) > 0 and code[0] == '(' and code[-1] == ')': code = code[1:-1] return self.parser.parse(code)
[docs]def michelson_to_micheline(data, parser=None): """Converts Michelson source text into a Micheline expression. :param data: Michelson string :param parser: custom Michelson parser (optional) :returns: Micheline expression """ if parser is None: parser = MichelsonParser() return parser.parse(data)