From aaadba738c6645df5050f166a1b8c77c223332b7 Mon Sep 17 00:00:00 2001
From: Joey Payne <jyapayne@gmail.com>
Date: Fri, 22 May 2015 12:55:40 -0600
Subject: [PATCH] Initial commit with python grammar and parser files.

---
 Python.grako     | 677 +++++++++++++++++++++++++++++++++++++++++++++++
 python_parser.py | 136 ++++++++++
 test/test.py     |  13 +
 3 files changed, 826 insertions(+)
 create mode 100644 Python.grako
 create mode 100644 python_parser.py
 create mode 100644 test/test.py

diff --git a/Python.grako b/Python.grako
new file mode 100644
index 0000000..b93d157
--- /dev/null
+++ b/Python.grako
@@ -0,0 +1,677 @@
+@@comments :: /\#[^\n]*/
+@@eol_comments :: /\#[^\n]*/
+
+@@whitespace :: /[\t \f]+/
+
+
+start
+    =
+    {newline|@:stmt}+
+    ;
+
+
+
+single_input
+    =
+    newline | @:simple_stmt | @:compound_stmt {dedent} newline
+    ;
+
+
+file_input
+    =
+    {NEWLINE | stmt}* $
+    ;
+
+
+eval_input
+    =
+    testlist {NEWLINE}* $
+    ;
+
+
+decorator
+    =
+    '@' name:dotted_name args:{'(' {arglist} ')'} newline
+    ;
+
+
+decorators
+    =
+    decorators:{decorator}+
+    ;
+
+
+decorated
+    =
+    decorators (classdef | funcdef)
+    ;
+
+funcdef
+    =
+    type:'def' name:name params:parameters ':' body:suite
+    ;
+
+
+parameters
+    =
+    '(' @:({varargslist}) ')'
+    ;
+
+varargslist= ({@:fpdef {'=' @:test} ','}*
+              ('*' @:name {',' '**' @:name} | '**' @:name) |
+              @:fpdef {'=' @:test} {',' @:fpdef {'=' @:test}}* {','});
+
+fpdef = name | '(' fplist ')';
+fplist= fpdef {',' fpdef}* {','};
+
+stmt
+    =
+    $ ~ | @:(compound_stmt | simple_stmt)
+    ;
+
+newline=
+    NEWLINE
+    ;
+
+eof = EOF ~;
+
+simple_stmt
+    =
+    statement:small_stmt {';' statement:small_stmt}* {';'} newline {dedent} {eof}
+    ;
+
+
+small_stmt
+    =
+        (print_stmt
+        | expr_stmt
+        | del_stmt
+        | pass_stmt
+        | flow_stmt
+        | import_stmt
+        | global_stmt
+        | exec_stmt
+        | assert_stmt)
+    ;
+
+
+expr_stmt
+    =
+    @:testlist ~ 
+    (
+          @:augassign @:(yield_expr | testlist) ~
+        | {@:'=' @:(yield_expr | testlist)}* ~
+    )
+    ;
+
+
+augassign
+    =
+    (
+          '+='
+        | '-='
+        | '*='
+        | '/='
+        | '%='
+        | '&='
+        | '|='
+        | '^='
+        | '<<='
+        | '>>='
+        | '**='
+        | '//='
+    )
+    ;
+
+
+del_stmt
+    =
+    'del' exprlist
+    ;
+
+print_stmt
+    =
+    @:'print' ( { @+:test {',' @+:test}* {','} } |
+                          @:'>>' @+:test { {',' @+:test}+ {','}} )
+    ;
+
+
+pass_stmt
+    =
+    'pass'
+    ;
+
+
+flow_stmt
+    =
+    break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
+    ;
+
+
+break_stmt
+    =
+    'break'
+    ;
+
+
+continue_stmt
+    =
+    'continue'
+    ;
+
+
+return_stmt
+    =
+    'return' {testlist}
+    ;
+
+
+yield_stmt
+    =
+    yield_expr
+    ;
+
+
+raise_stmt
+    =
+    'raise' {test {'from' test}}
+    ;
+
+
+import_stmt
+    =
+    import_name | import_from
+    ;
+
+
+import_name
+    =
+    'import' dotted_as_names
+    ;
+
+import_from= ('from' ({'.'}* dotted_name | {'.'}+)
+              'import' ('*' | '(' import_as_names ')' | import_as_names));
+
+import_as_name
+    =
+    name {'as' name}
+    ;
+
+
+dotted_as_name
+    =
+    dotted_name {'as' name}
+    ;
+
+
+import_as_names
+    =
+    import_as_name {',' import_as_name}* {','}
+    ;
+
+
+dotted_as_names
+    =
+    dotted_as_name {',' dotted_as_name}*
+    ;
+
+
+dotted_name
+    =
+    @:(NAME {'.' NAME}*)
+    ;
+
+
+global_stmt
+    =
+    'global' name {',' name}*
+    ;
+
+
+exec_stmt
+    =
+    'exec' expr {'in' test {',' test}}
+    ;
+
+
+assert_stmt
+    =
+    'assert' test {',' test}
+    ;
+
+
+compound_stmt
+    =
+    statement:(if_stmt
+    | while_stmt
+    | for_stmt
+    | try_stmt
+    | with_stmt
+    | funcdef
+    | classdef
+    | decorated)
+    ;
+
+
+if_stmt
+    =
+    'if' test ':' suite {'elif' test ':' suite}* {'else' ':' suite}
+    ;
+
+
+while_stmt
+    =
+    'while' test ':' suite {'else' ':' suite}
+    ;
+
+
+for_stmt
+    =
+    'for' exprlist 'in' testlist ':' suite {'else' ':' suite}
+    ;
+
+
+try_stmt
+    =
+    (
+        'try'
+        ':'
+        suite
+        (
+              {except_clause ':' suite}+ {'else' ':' suite} {'finally' ':' suite}
+            | 'finally' ':' suite
+        )
+    )
+    ;
+
+
+with_stmt
+    =
+    'with' items:(with_item {',' with_item}*) ':' body:suite
+    ;
+
+
+with_item
+    =
+    test {'as' expr}
+    ;
+
+
+except_clause
+    =
+    'except' {test {('as' | ',') test}}
+    ;
+
+suite
+    =
+    {newline} indent @:{stmt}+ {dedent} {eof} | @:simple_stmt 
+    ;
+
+testlist_safe= old_test {{',' old_test}+ {','}};
+old_test= or_test | old_lambdef;
+old_lambdef= 'lambda' {varargslist} ':' old_test;
+
+test
+    =
+    @:or_test {'if' @:or_test 'else' @:test} | @:lambdef
+    ;
+
+lambdef
+    =
+    'lambda' {varargslist} ':' test
+    ;
+
+or_test
+    =
+    @:and_test {'or' @:and_test}*
+    ;
+
+
+and_test
+    =
+    (@:not_test {'and' @:not_test}*)
+    ;
+
+
+not_test
+    =
+    'not' @:not_test | @:comparison
+    ;
+
+
+comparison
+    =
+    @:expr {comp_op @:expr}*
+    ;
+
+
+comp_op
+    =
+      '<'
+    | '>'
+    | '=='
+    | '>='
+    | '<='
+    | '<>'
+    | '!='
+    | 'in'
+    | 'not' 'in'
+    | 'is'
+    | 'is' 'not'
+    ;
+
+
+expr
+    =
+    @:xor_expr {'|' @:xor_expr}*
+    ;
+
+
+xor_expr
+    =
+    @:and_expr {'^' @:and_expr}*
+    ;
+
+
+and_expr
+    =
+    @:shift_expr {'&' @:shift_expr}*
+    ;
+
+
+shift_expr
+    =
+    @:arith_expr {('<<' | '>>') @:arith_expr}*
+    ;
+
+
+arith_expr
+    =
+    @:term {('+' | '-') @:term}*
+    ;
+
+
+term
+    =
+    @:factor {('*' | '/' | '%' | '//') @:factor}*
+    ;
+
+
+factor
+    =
+    ('+' | '-' | '~') @:factor | @:power
+    ;
+
+
+power
+    =
+    @:atom {@:trailer} {'**' @:factor}
+    ;
+
+atom= ('(' @:{yield_expr|testlist_comp} ')' |
+       '[' @:{listmaker} ']' |
+       '{' @:{dictorsetmaker} '}' |
+       '`' @:testlist1 '`' |
+       @:name | @:NUMBER | @:{string}+);
+
+listmaker= test ( list_for | {',' test}* {','} );
+
+testlist_comp
+    =
+    test ( comp_for | {',' test}* {','} )
+    ;
+
+
+trailer
+    =
+    '(' {@:arglist} ')' | '[' @:subscriptlist ']' | '.' @:NAME
+    ;
+
+
+subscriptlist
+    =
+    subscript {',' subscript}* {','}
+    ;
+
+subscript= '.' '.' '.' | test | {test} ':' {test} {sliceop};
+
+
+sliceop
+    =
+    ':' {test}
+    ;
+
+
+exprlist
+    =
+    @:expr {',' @:expr}* {','}
+    ;
+
+
+testlist
+    =
+    @:test {',' @:test}* {','}
+    ;
+
+
+dictorsetmaker
+    =
+    (
+          (test ':' test (comp_for | {',' test ':' test}* {','}))
+        | (test (comp_for | {',' test}* {','}))
+    )
+    ;
+
+
+classdef
+    =
+    type:'class' name:name {'(' base_classes:{arglist} ')'} ':' body:suite
+    ;
+
+
+arglist
+    =
+    {@+:argument ','}* (@+:argument {','} | '*' @+:test {',' @+:argument}* {',' '**' @+:test} | '**' @+:test)
+    ;
+
+
+argument
+    =
+    @:test {@:comp_for} | @:test '=' @:test
+    ;
+
+
+list_iter= list_for | list_if;
+list_for= 'for' exprlist 'in' testlist_safe {list_iter};
+list_if= 'if' old_test {list_iter};
+
+comp_iter
+    =
+    comp_for | comp_if
+    ;
+
+
+comp_for
+    =
+    'for' exprlist 'in' or_test {comp_iter}
+    ;
+
+
+comp_if
+    =
+    'if' old_test {comp_iter}
+    ;
+
+testlist1= test {',' test}*;
+
+encoding_decl
+    =
+    name
+    ;
+
+
+yield_expr
+    =
+    'yield' {testlist1}
+    ;
+
+
+NEWLINE
+    =
+    /(\r?\n[\t ]*)+/
+    ;
+
+NUMBER
+    =
+    number:(FLOAT_NUMBER |
+     DEC_NUMBER   |
+     HEX_NUMBER   |
+     OCT_NUMBER   |
+     BIN_NUMBER   |
+     IMAG_NUMBER)
+    ;
+
+term_symbol = STAR|SLASH|PERCENT|DOUBLESLASH ;
+shift_symbol = LEFTSHIFT|RIGHTSHIFT;
+add_symbol = PLUS|MINUS;
+name = NAME;
+string = STRING | LONG_STRING;
+
+I = /(?i)/; # Case insensitive
+S = /(?s)/; # Dot matches newline
+J = /([jJ])/;
+
+LONG_POSTFIX = /[lL]?/;
+EXP_POSTFIX = /[eE][-+]?\d+/;
+DEC_NUMBER = value:/[1-9]\d*(?![.0])/   postfix:LONG_POSTFIX;
+HEX_NUMBER = value:/0[xX][\da-fA-F]*/ postfix:LONG_POSTFIX;
+OCT_NUMBER = value:/0[oO]?(?![bBxX])[0-7]*/  postfix:LONG_POSTFIX;
+FLOAT_NUMBER = value:/(\d+\.\d*|\.\d+)([eE][-+]?\d+)?|\d+[eE][-+]?\d+/;
+IMAG_NUMBER = value:(/\d+/ | FLOAT_NUMBER) postfix:J;
+BIN_NUMBER = value:/0[bB][01]+/ postfix:LONG_POSTFIX;
+STRING_PREFIX = /(u|b|)r?/;
+STRING_INTERNAL = /.\*?(?<!\\)(\\\\)*?/ ;
+QUOTE = "'";
+DBLQUOTE = '"';
+QUOTE3 = "'''";
+DBLQUOTE3 = '"""';
+
+STRING = STRING_PREFIX @:(/"(?!"").*?(?<!\\)(\\\\)*?"/ | /'(?!'').*?(?<!\\)(\\\\)*?'/);
+
+LONG_STRING = S STRING_PREFIX @:(/""".*?(?<!\\)(\\\\)*?(\n)*?"""/ | /'''.*?(?<!\\)(\\\\)*?(\n)*?'''/);
+
+LEFTSHIFTEQUAL = '<<=';
+RIGHTSHIFTEQUAL = '>>=';
+DOUBLESTAREQUAL = '**=';
+DOUBLESLASHEQUAL = '//=';
+
+EQEQUAL = '==';
+NOTEQUAL = '!=|<>';
+LESSEQUAL = '<=';
+LEFTSHIFT = '<<';
+GREATEREQUAL = '>=';
+RIGHTSHIFT = '>>';
+PLUSEQUAL = '+=';
+MINEQUAL = '-=';
+DOUBLESTAR = '**';
+STAREQUAL = '*=';
+DOUBLESLASH = '//';
+SLASHEQUAL = '/=';
+VBAREQUAL = '|=';
+PERCENTEQUAL = '%=';
+AMPEREQUAL = '&=';
+CIRCUMFLEXEQUAL = '^=';
+
+COLON = ':';
+COMMA = ',';
+SEMI = ';';
+PLUS = '+';
+MINUS = '-';
+STAR = '*';
+SLASH = '/';
+VBAR = '|';
+AMPER = '&';
+
+LESS = '<';
+GREATER = '>';
+EQUAL = '=';
+DOT = '.';
+PERCENT = '%';
+BACKQUOTE = '`';
+CIRCUMFLEX = '^';
+TILDE = '~';
+AT = '@';
+
+LPAR = '(';
+RPAR = ')';
+LBRACE = '{';
+RBRACE = '}';
+LSQB = '[';
+RSQB = ']';
+
+PRINT = 'print';
+IMPORT = 'import';
+FROM = 'from';
+GLOBAL = 'global';
+EXEC = 'exec';
+ASSERT = 'assert';
+DEL = 'del';
+AS = 'as';
+LAMBDA = 'lambda';
+
+# Definitions
+DEF = 'def';
+CLASS = 'class';
+
+# Flow Blocks
+TRY = 'try';
+EXCEPT = 'except';
+FINALLY = 'finally';
+IF = 'if';
+ELIF = 'elif';
+ELSE = 'else';
+FOR = 'for';
+WHILE = 'while';
+WITH = 'with';
+
+# Flow
+BREAK = 'break';
+CONTINUE = 'continue';
+RETURN = 'return';
+YIELD = 'yield';
+RAISE = 'raise';
+PASS = 'pass';
+
+# Operators
+AND = 'and';
+OR = 'or';
+NOT = 'not';
+IS = 'is';
+IN = 'in';
+
+NAME
+    =
+    /[a-zA-Z_]\w*/
+    ;
+
+EOF
+    =
+    '<EOF>' $ ~
+    ;
+
+indent =
+    INDENT
+    ;
+dedent =
+    DEDENT
+    ;
+
+INDENT
+    =
+    '<INDENT>'
+    ;
+
+DEDENT
+    =
+    '<DEDENT>'
+    ;
diff --git a/python_parser.py b/python_parser.py
new file mode 100644
index 0000000..8580650
--- /dev/null
+++ b/python_parser.py
@@ -0,0 +1,136 @@
+from __future__ import print_function, division, absolute_import, unicode_literals
+from grako.parsing import graken, Parser
+from grako.buffering import Buffer, PosLine, LineInfo
+from grako.util import re, RE_FLAGS
+
+
+__version__ = (2015, 5, 20, 15, 52, 20, 2)
+
+from parser_class import PythonParser
+
+class MyBuffer(Buffer):
+    def __init__(
+            self,
+            text,
+            filename=None,
+            comments_re=None,
+            eol_comments_re=None,
+            whitespace=None,
+            **kwargs):
+        self.last_line = 0
+        self.indent_stack = []
+        super(MyBuffer, self).__init__(
+            text,
+            filename=filename,
+            memoize_lookaheads=False,
+            comment_recovery=True,
+            comments_re=comments_re,
+            whitespace=whitespace or '\t \f',
+            eol_comments_re=eol_comments_re,
+            **kwargs
+        )
+
+    def _dedent_from_stack(self, new_lines):
+        while self.indent_stack:
+            self.indent_stack.pop()
+            new_lines[-1] +='<DEDENT>'
+        return new_lines
+
+    def process_leading_spaces(self, line, leading_spaces):
+        if self.indent_stack:
+            if len(leading_spaces) > len(self.indent_stack[-1]):
+                line = '<INDENT>'+line
+                self.indent_stack.append(leading_spaces)
+            elif len(leading_spaces) < len(self.indent_stack[-1]):
+                line = line+'<DEDENT>'
+                self.indent_stack.pop()
+        else:
+            line = '<INDENT>'+line
+            self.indent_stack.append(leading_spaces)
+        return line
+
+    def dedent_and_end(self, new_lines):
+        new_lines = self._dedent_from_stack(new_lines)
+        new_lines[-1] += '<EOF>'
+
+    def indent_and_dedent_lines(self, lines):
+        new_lines = []
+        for line in lines:
+            leading_spaces = re.search(u'^['+self.whitespace+']+', line)
+            if leading_spaces is not None:
+                leading_spaces = leading_spaces.group().strip('\n')
+                if leading_spaces:
+                    line = self.process_leading_spaces(line, leading_spaces)
+                else:
+                    new_lines = self._dedent_from_stack(new_lines)
+            else:
+                new_lines = self._dedent_from_stack(new_lines)
+
+            new_lines.append(line)
+        return new_lines
+
+    def process_lines(self, lines):
+        new_lines = self.indent_and_dedent_lines(lines)
+        self.dedent_and_end(new_lines)
+        return new_lines
+
+    def process_block(self, name, lines, index, **kwargs):
+        new_lines = self.process_lines(lines)
+        return new_lines, index
+
+
+def main(filename, startrule, trace=False, whitespace=None, nameguard=None):
+    import json
+    with open(filename) as f:
+        text = f.read()
+    parser = PythonParser(parseinfo=False)
+    buf = MyBuffer(text, whitespace=whitespace,nameguard=nameguard,trace=trace,filename=filename)
+    ast = parser.parse(
+        buf,
+        startrule,
+        filename=filename,
+        trace=trace,
+        whitespace=whitespace,
+        nameguard=nameguard)
+    print('AST:')
+    print(ast)
+    print()
+    print('JSON:')
+    print(json.dumps(ast, indent=2))
+    print()
+
+if __name__ == '__main__':
+    import argparse
+    import string
+    import sys
+
+    class ListRules(argparse.Action):
+        def __call__(self, parser, namespace, values, option_string):
+            print('Rules:')
+            for r in pythonParser.rule_list():
+                print(r)
+            print()
+            sys.exit(0)
+
+    parser = argparse.ArgumentParser(description="Simple parser for python.")
+    parser.add_argument('-l', '--list', action=ListRules, nargs=0,
+                        help="list all rules and exit")
+    parser.add_argument('-n', '--no-nameguard', action='store_true',
+                        dest='no_nameguard',
+                        help="disable the 'nameguard' feature")
+    parser.add_argument('-t', '--trace', action='store_true',
+                        help="output trace information")
+    parser.add_argument('-w', '--whitespace', type=str, default=None,
+                        help="whitespace specification")
+    parser.add_argument('file', metavar="FILE", help="the input file to parse")
+    parser.add_argument('startrule', metavar="STARTRULE",
+                        help="the start rule for parsing")
+    args = parser.parse_args()
+
+    main(
+        args.file,
+        args.startrule,
+        trace=args.trace,
+        whitespace=args.whitespace,
+        nameguard=not args.no_nameguard
+    )
diff --git a/test/test.py b/test/test.py
new file mode 100644
index 0000000..583b1c1
--- /dev/null
+++ b/test/test.py
@@ -0,0 +1,13 @@
+a = 8
+stuff = x
+
+class Test(object):
+    def stuff(self):
+        print 'stuff'
+
+
+
+
+
+x = 8
+c = u