From aaadba738c6645df5050f166a1b8c77c223332b7 Mon Sep 17 00:00:00 2001 From: Joey Payne Date: Fri, 22 May 2015 12:55:40 -0600 Subject: [PATCH] Initial commit with python grammar and parser files. --- Python.grako | 677 +++++++++++++++++++++++++++++++++++++++++++++++ python_parser.py | 136 ++++++++++ test/test.py | 13 + 3 files changed, 826 insertions(+) create mode 100644 Python.grako create mode 100644 python_parser.py create mode 100644 test/test.py diff --git a/Python.grako b/Python.grako new file mode 100644 index 0000000..b93d157 --- /dev/null +++ b/Python.grako @@ -0,0 +1,677 @@ +@@comments :: /\#[^\n]*/ +@@eol_comments :: /\#[^\n]*/ + +@@whitespace :: /[\t \f]+/ + + +start + = + {newline|@:stmt}+ + ; + + + +single_input + = + newline | @:simple_stmt | @:compound_stmt {dedent} newline + ; + + +file_input + = + {NEWLINE | stmt}* $ + ; + + +eval_input + = + testlist {NEWLINE}* $ + ; + + +decorator + = + '@' name:dotted_name args:{'(' {arglist} ')'} newline + ; + + +decorators + = + decorators:{decorator}+ + ; + + +decorated + = + decorators (classdef | funcdef) + ; + +funcdef + = + type:'def' name:name params:parameters ':' body:suite + ; + + +parameters + = + '(' @:({varargslist}) ')' + ; + +varargslist= ({@:fpdef {'=' @:test} ','}* + ('*' @:name {',' '**' @:name} | '**' @:name) | + @:fpdef {'=' @:test} {',' @:fpdef {'=' @:test}}* {','}); + +fpdef = name | '(' fplist ')'; +fplist= fpdef {',' fpdef}* {','}; + +stmt + = + $ ~ | @:(compound_stmt | simple_stmt) + ; + +newline= + NEWLINE + ; + +eof = EOF ~; + +simple_stmt + = + statement:small_stmt {';' statement:small_stmt}* {';'} newline {dedent} {eof} + ; + + +small_stmt + = + (print_stmt + | expr_stmt + | del_stmt + | pass_stmt + | flow_stmt + | import_stmt + | global_stmt + | exec_stmt + | assert_stmt) + ; + + +expr_stmt + = + @:testlist ~ + ( + @:augassign @:(yield_expr | testlist) ~ + | {@:'=' @:(yield_expr | testlist)}* ~ + ) + ; + + +augassign + = + ( + '+=' + | '-=' + | '*=' + | '/=' + | '%=' + | '&=' + | '|=' + | '^=' + | '<<=' + | '>>=' + | '**=' + | '//=' + ) + ; + + +del_stmt + = + 'del' exprlist + ; + +print_stmt + = + @:'print' ( { @+:test {',' @+:test}* {','} } | + @:'>>' @+:test { {',' @+:test}+ {','}} ) + ; + + +pass_stmt + = + 'pass' + ; + + +flow_stmt + = + break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt + ; + + +break_stmt + = + 'break' + ; + + +continue_stmt + = + 'continue' + ; + + +return_stmt + = + 'return' {testlist} + ; + + +yield_stmt + = + yield_expr + ; + + +raise_stmt + = + 'raise' {test {'from' test}} + ; + + +import_stmt + = + import_name | import_from + ; + + +import_name + = + 'import' dotted_as_names + ; + +import_from= ('from' ({'.'}* dotted_name | {'.'}+) + 'import' ('*' | '(' import_as_names ')' | import_as_names)); + +import_as_name + = + name {'as' name} + ; + + +dotted_as_name + = + dotted_name {'as' name} + ; + + +import_as_names + = + import_as_name {',' import_as_name}* {','} + ; + + +dotted_as_names + = + dotted_as_name {',' dotted_as_name}* + ; + + +dotted_name + = + @:(NAME {'.' NAME}*) + ; + + +global_stmt + = + 'global' name {',' name}* + ; + + +exec_stmt + = + 'exec' expr {'in' test {',' test}} + ; + + +assert_stmt + = + 'assert' test {',' test} + ; + + +compound_stmt + = + statement:(if_stmt + | while_stmt + | for_stmt + | try_stmt + | with_stmt + | funcdef + | classdef + | decorated) + ; + + +if_stmt + = + 'if' test ':' suite {'elif' test ':' suite}* {'else' ':' suite} + ; + + +while_stmt + = + 'while' test ':' suite {'else' ':' suite} + ; + + +for_stmt + = + 'for' exprlist 'in' testlist ':' suite {'else' ':' suite} + ; + + +try_stmt + = + ( + 'try' + ':' + suite + ( + {except_clause ':' suite}+ {'else' ':' suite} {'finally' ':' suite} + | 'finally' ':' suite + ) + ) + ; + + +with_stmt + = + 'with' items:(with_item {',' with_item}*) ':' body:suite + ; + + +with_item + = + test {'as' expr} + ; + + +except_clause + = + 'except' {test {('as' | ',') test}} + ; + +suite + = + {newline} indent @:{stmt}+ {dedent} {eof} | @:simple_stmt + ; + +testlist_safe= old_test {{',' old_test}+ {','}}; +old_test= or_test | old_lambdef; +old_lambdef= 'lambda' {varargslist} ':' old_test; + +test + = + @:or_test {'if' @:or_test 'else' @:test} | @:lambdef + ; + +lambdef + = + 'lambda' {varargslist} ':' test + ; + +or_test + = + @:and_test {'or' @:and_test}* + ; + + +and_test + = + (@:not_test {'and' @:not_test}*) + ; + + +not_test + = + 'not' @:not_test | @:comparison + ; + + +comparison + = + @:expr {comp_op @:expr}* + ; + + +comp_op + = + '<' + | '>' + | '==' + | '>=' + | '<=' + | '<>' + | '!=' + | 'in' + | 'not' 'in' + | 'is' + | 'is' 'not' + ; + + +expr + = + @:xor_expr {'|' @:xor_expr}* + ; + + +xor_expr + = + @:and_expr {'^' @:and_expr}* + ; + + +and_expr + = + @:shift_expr {'&' @:shift_expr}* + ; + + +shift_expr + = + @:arith_expr {('<<' | '>>') @:arith_expr}* + ; + + +arith_expr + = + @:term {('+' | '-') @:term}* + ; + + +term + = + @:factor {('*' | '/' | '%' | '//') @:factor}* + ; + + +factor + = + ('+' | '-' | '~') @:factor | @:power + ; + + +power + = + @:atom {@:trailer} {'**' @:factor} + ; + +atom= ('(' @:{yield_expr|testlist_comp} ')' | + '[' @:{listmaker} ']' | + '{' @:{dictorsetmaker} '}' | + '`' @:testlist1 '`' | + @:name | @:NUMBER | @:{string}+); + +listmaker= test ( list_for | {',' test}* {','} ); + +testlist_comp + = + test ( comp_for | {',' test}* {','} ) + ; + + +trailer + = + '(' {@:arglist} ')' | '[' @:subscriptlist ']' | '.' @:NAME + ; + + +subscriptlist + = + subscript {',' subscript}* {','} + ; + +subscript= '.' '.' '.' | test | {test} ':' {test} {sliceop}; + + +sliceop + = + ':' {test} + ; + + +exprlist + = + @:expr {',' @:expr}* {','} + ; + + +testlist + = + @:test {',' @:test}* {','} + ; + + +dictorsetmaker + = + ( + (test ':' test (comp_for | {',' test ':' test}* {','})) + | (test (comp_for | {',' test}* {','})) + ) + ; + + +classdef + = + type:'class' name:name {'(' base_classes:{arglist} ')'} ':' body:suite + ; + + +arglist + = + {@+:argument ','}* (@+:argument {','} | '*' @+:test {',' @+:argument}* {',' '**' @+:test} | '**' @+:test) + ; + + +argument + = + @:test {@:comp_for} | @:test '=' @:test + ; + + +list_iter= list_for | list_if; +list_for= 'for' exprlist 'in' testlist_safe {list_iter}; +list_if= 'if' old_test {list_iter}; + +comp_iter + = + comp_for | comp_if + ; + + +comp_for + = + 'for' exprlist 'in' or_test {comp_iter} + ; + + +comp_if + = + 'if' old_test {comp_iter} + ; + +testlist1= test {',' test}*; + +encoding_decl + = + name + ; + + +yield_expr + = + 'yield' {testlist1} + ; + + +NEWLINE + = + /(\r?\n[\t ]*)+/ + ; + +NUMBER + = + number:(FLOAT_NUMBER | + DEC_NUMBER | + HEX_NUMBER | + OCT_NUMBER | + BIN_NUMBER | + IMAG_NUMBER) + ; + +term_symbol = STAR|SLASH|PERCENT|DOUBLESLASH ; +shift_symbol = LEFTSHIFT|RIGHTSHIFT; +add_symbol = PLUS|MINUS; +name = NAME; +string = STRING | LONG_STRING; + +I = /(?i)/; # Case insensitive +S = /(?s)/; # Dot matches newline +J = /([jJ])/; + +LONG_POSTFIX = /[lL]?/; +EXP_POSTFIX = /[eE][-+]?\d+/; +DEC_NUMBER = value:/[1-9]\d*(?![.0])/ postfix:LONG_POSTFIX; +HEX_NUMBER = value:/0[xX][\da-fA-F]*/ postfix:LONG_POSTFIX; +OCT_NUMBER = value:/0[oO]?(?![bBxX])[0-7]*/ postfix:LONG_POSTFIX; +FLOAT_NUMBER = value:/(\d+\.\d*|\.\d+)([eE][-+]?\d+)?|\d+[eE][-+]?\d+/; +IMAG_NUMBER = value:(/\d+/ | FLOAT_NUMBER) postfix:J; +BIN_NUMBER = value:/0[bB][01]+/ postfix:LONG_POSTFIX; +STRING_PREFIX = /(u|b|)r?/; +STRING_INTERNAL = /.\*?(?>='; +DOUBLESTAREQUAL = '**='; +DOUBLESLASHEQUAL = '//='; + +EQEQUAL = '=='; +NOTEQUAL = '!=|<>'; +LESSEQUAL = '<='; +LEFTSHIFT = '<<'; +GREATEREQUAL = '>='; +RIGHTSHIFT = '>>'; +PLUSEQUAL = '+='; +MINEQUAL = '-='; +DOUBLESTAR = '**'; +STAREQUAL = '*='; +DOUBLESLASH = '//'; +SLASHEQUAL = '/='; +VBAREQUAL = '|='; +PERCENTEQUAL = '%='; +AMPEREQUAL = '&='; +CIRCUMFLEXEQUAL = '^='; + +COLON = ':'; +COMMA = ','; +SEMI = ';'; +PLUS = '+'; +MINUS = '-'; +STAR = '*'; +SLASH = '/'; +VBAR = '|'; +AMPER = '&'; + +LESS = '<'; +GREATER = '>'; +EQUAL = '='; +DOT = '.'; +PERCENT = '%'; +BACKQUOTE = '`'; +CIRCUMFLEX = '^'; +TILDE = '~'; +AT = '@'; + +LPAR = '('; +RPAR = ')'; +LBRACE = '{'; +RBRACE = '}'; +LSQB = '['; +RSQB = ']'; + +PRINT = 'print'; +IMPORT = 'import'; +FROM = 'from'; +GLOBAL = 'global'; +EXEC = 'exec'; +ASSERT = 'assert'; +DEL = 'del'; +AS = 'as'; +LAMBDA = 'lambda'; + +# Definitions +DEF = 'def'; +CLASS = 'class'; + +# Flow Blocks +TRY = 'try'; +EXCEPT = 'except'; +FINALLY = 'finally'; +IF = 'if'; +ELIF = 'elif'; +ELSE = 'else'; +FOR = 'for'; +WHILE = 'while'; +WITH = 'with'; + +# Flow +BREAK = 'break'; +CONTINUE = 'continue'; +RETURN = 'return'; +YIELD = 'yield'; +RAISE = 'raise'; +PASS = 'pass'; + +# Operators +AND = 'and'; +OR = 'or'; +NOT = 'not'; +IS = 'is'; +IN = 'in'; + +NAME + = + /[a-zA-Z_]\w*/ + ; + +EOF + = + '' $ ~ + ; + +indent = + INDENT + ; +dedent = + DEDENT + ; + +INDENT + = + '' + ; + +DEDENT + = + '' + ; diff --git a/python_parser.py b/python_parser.py new file mode 100644 index 0000000..8580650 --- /dev/null +++ b/python_parser.py @@ -0,0 +1,136 @@ +from __future__ import print_function, division, absolute_import, unicode_literals +from grako.parsing import graken, Parser +from grako.buffering import Buffer, PosLine, LineInfo +from grako.util import re, RE_FLAGS + + +__version__ = (2015, 5, 20, 15, 52, 20, 2) + +from parser_class import PythonParser + +class MyBuffer(Buffer): + def __init__( + self, + text, + filename=None, + comments_re=None, + eol_comments_re=None, + whitespace=None, + **kwargs): + self.last_line = 0 + self.indent_stack = [] + super(MyBuffer, self).__init__( + text, + filename=filename, + memoize_lookaheads=False, + comment_recovery=True, + comments_re=comments_re, + whitespace=whitespace or '\t \f', + eol_comments_re=eol_comments_re, + **kwargs + ) + + def _dedent_from_stack(self, new_lines): + while self.indent_stack: + self.indent_stack.pop() + new_lines[-1] +='' + return new_lines + + def process_leading_spaces(self, line, leading_spaces): + if self.indent_stack: + if len(leading_spaces) > len(self.indent_stack[-1]): + line = ''+line + self.indent_stack.append(leading_spaces) + elif len(leading_spaces) < len(self.indent_stack[-1]): + line = line+'' + self.indent_stack.pop() + else: + line = ''+line + self.indent_stack.append(leading_spaces) + return line + + def dedent_and_end(self, new_lines): + new_lines = self._dedent_from_stack(new_lines) + new_lines[-1] += '' + + def indent_and_dedent_lines(self, lines): + new_lines = [] + for line in lines: + leading_spaces = re.search(u'^['+self.whitespace+']+', line) + if leading_spaces is not None: + leading_spaces = leading_spaces.group().strip('\n') + if leading_spaces: + line = self.process_leading_spaces(line, leading_spaces) + else: + new_lines = self._dedent_from_stack(new_lines) + else: + new_lines = self._dedent_from_stack(new_lines) + + new_lines.append(line) + return new_lines + + def process_lines(self, lines): + new_lines = self.indent_and_dedent_lines(lines) + self.dedent_and_end(new_lines) + return new_lines + + def process_block(self, name, lines, index, **kwargs): + new_lines = self.process_lines(lines) + return new_lines, index + + +def main(filename, startrule, trace=False, whitespace=None, nameguard=None): + import json + with open(filename) as f: + text = f.read() + parser = PythonParser(parseinfo=False) + buf = MyBuffer(text, whitespace=whitespace,nameguard=nameguard,trace=trace,filename=filename) + ast = parser.parse( + buf, + startrule, + filename=filename, + trace=trace, + whitespace=whitespace, + nameguard=nameguard) + print('AST:') + print(ast) + print() + print('JSON:') + print(json.dumps(ast, indent=2)) + print() + +if __name__ == '__main__': + import argparse + import string + import sys + + class ListRules(argparse.Action): + def __call__(self, parser, namespace, values, option_string): + print('Rules:') + for r in pythonParser.rule_list(): + print(r) + print() + sys.exit(0) + + parser = argparse.ArgumentParser(description="Simple parser for python.") + parser.add_argument('-l', '--list', action=ListRules, nargs=0, + help="list all rules and exit") + parser.add_argument('-n', '--no-nameguard', action='store_true', + dest='no_nameguard', + help="disable the 'nameguard' feature") + parser.add_argument('-t', '--trace', action='store_true', + help="output trace information") + parser.add_argument('-w', '--whitespace', type=str, default=None, + help="whitespace specification") + parser.add_argument('file', metavar="FILE", help="the input file to parse") + parser.add_argument('startrule', metavar="STARTRULE", + help="the start rule for parsing") + args = parser.parse_args() + + main( + args.file, + args.startrule, + trace=args.trace, + whitespace=args.whitespace, + nameguard=not args.no_nameguard + ) diff --git a/test/test.py b/test/test.py new file mode 100644 index 0000000..583b1c1 --- /dev/null +++ b/test/test.py @@ -0,0 +1,13 @@ +a = 8 +stuff = x + +class Test(object): + def stuff(self): + print 'stuff' + + + + + +x = 8 +c = u