diff --git a/docs/source/doc/functions.rst b/docs/source/doc/functions.rst index e56afe6..8922dd0 100644 --- a/docs/source/doc/functions.rst +++ b/docs/source/doc/functions.rst @@ -13,34 +13,29 @@ References to functions already present in a module can be retrieved via ``Function.get``. All functions in a module can be enumerated by iterating over ``module_obj.functions``. -{% highlight python %} # create a type, representing functions that take -an integer and return # a floating point value. ft = Type.function( -Type.float(), [ Type.int() ] ) -create a function of this type -============================== +.. code-block:: python -f1 = module\_obj.add\_function(ft, "func1") + # create a type, representing functions that take + an integer and return # a floating point value. ft = Type.function( + Type.float(), [ Type.int() ] ) + + # create a function of this type + f1 = module_obj.add_function(ft, "func1") + + # or equivalently, like this: + f2 = Function.new(module_obj, ft, "func2") + + # get a reference to an existing function + f3 = module_obj.get_function_named("func3") + + # or like this: + f4 = Function.get(module_obj, "func4") + + # list all function names in a module + for f in module_obj.functions: print f.name -or equivalently, like this: -=========================== -f2 = Function.new(module\_obj, ft, "func2") - -get a reference to an existing function -======================================= - -f3 = module\_obj.get\_function\_named("func3") - -or like this: -============= - -f4 = Function.get(module\_obj, "func4") - -list all function names in a module -=================================== - -for f in module\_obj.functions: print f.name {% endhighlight %} Intrinsic ========= @@ -52,13 +47,16 @@ called with a module object, an intrinsic ID (which is a numeric constant) and a list of the types of arguments (which LLVM uses to resolve overloaded intrinsic functions). -{% highlight python %} # get a reference to the llvm.bswap intrinsic -bswap = Function.intrinsic(mod, INTR\_BSWAP, [Type.int()]) -call it -======= +.. code-block:: python + + # get a reference to the llvm.bswap intrinsic + bswap = Function.intrinsic(mod, INTR_BSWAP, [Type.int()]) + + # call it + builder.call(bswap, [value]) + -builder.call(bswap, [value]) {% endhighlight %} Here, the constant ``INTR_BSWAP``, available from ``llvm.core``, represents the LLVM intrinsic @@ -111,13 +109,16 @@ The value objects corresponding to the arguments of a function can be got using the read-only property ``args``. These can be iterated over, and also be indexed via integers. An example: -{% highlight python %} # list all argument names and types for arg in -fn.args: print arg.name, "of type", arg.type -change the name of the first argument -===================================== +.. code-block:: python + + # list all argument names and types for arg in + fn.args: print arg.name, "of type", arg.type + + # change the name of the first argument + fn.args[0].name = "objptr" + -fn.args[0].name = "objptr" {% endhighlight %} Basic blocks (see later) are contained within functions. When newly created, a function has no basic blocks. They have to be added @@ -130,71 +131,19 @@ blocks can be got via ``basic_block_count`` method. Note that ``get_entry_basic_block`` is slightly faster than ``basic_blocks[0]`` and so is ``basic_block_count``, over ``len(f.basic_blocks)``. -{% highlight python %} # add a basic block b1 = -fn.append\_basic\_block("entry") -get the first one -================= +.. code-block:: python -b2 = fn.get\_entry\_basic\_block() b2 = fn.basic\_mdblocks[0] # slower -than previous method - -print names of all basic blocks -=============================== - -for b in fn.basic\_blocks: print b.name - -get number of basic blocks -========================== - -n = fn.basic\_block\_count n = len(fn.basic\_blocks) # slower than -previous method {% endhighlight %} - -Functions can be deleted using the method ``delete``. This deletes them -from their containing module. All references to the function object -should be dropped after ``delete`` has been called. - -Functions can be verified with the ``verify`` method. Note that this may -not work properly (aborts on errors). - -Function Attributes # {#fnattr} -=============================== - -Function attributes, as documented -`here `_, can be set on -functions using the methods ``add_attribute`` and ``remove_attribute``. -The following values may be used to refer to the LLVM attributes: - -Value \| Equivalent LLVM Assembly Keyword \| -------\|----------------------------------\| -``ATTR_ALWAYS_INLINE``\ \|\ ``alwaysinline`` \| -``ATTR_INLINE_HINT``\ \|\ ``inlinehint`` \| -``ATTR_NO_INLINE``\ \|\ ``noinline`` \| -``ATTR_OPTIMIZE_FOR_SIZE``\ \|\ ``optsize`` \| -``ATTR_NO_RETURN``\ \|\ ``noreturn`` \| -``ATTR_NO_UNWIND``\ \|\ ``nounwind`` \| -``ATTR_READ_NONE``\ \|\ ``readnone`` \| -``ATTR_READONLY``\ \|\ ``readonly`` \| -``ATTR_STACK_PROTECT``\ \|\ ``ssp`` \| -``ATTR_STACK_PROTECT_REQ``\ \|\ ``sspreq`` \| -``ATTR_NO_REDZONE``\ \|\ ``noredzone`` \| -``ATTR_NO_IMPLICIT_FLOAT``\ \|\ ``noimplicitfloat`` \| -``ATTR_NAKED``\ \|\ ``naked`` \| - -Here is how attributes can be set and removed: - -{% highlight python %} # create a function ti = Type.int(32) tf = -Type.function(ti, [ti, ti]) m = Module.new('mod') f = -m.add\_function(tf, 'sum') print f # declare i32 @sum(i32, i32) - -add a couple of attributes -========================== - -f.add\_attribute(ATTR\_NO\_UNWIND) f.add\_attribute(ATTR\_READONLY) -print f # declare i32 @sum(i32, i32) nounwind readonly {% endhighlight -%} - -**Related Links** - -`llvm.core.Function `_, -`llvm.core.Argument `_ + # add a basic block b1 = + fn.append_basic_block("entry") + + # get the first one + b2 = fn.get_entry_basic_block() b2 = fn.basic_mdblocks[0] # slower + than previous method + + # print names of all basic blocks + for b in fn.basic_blocks: print b.name + + # get number of basic blocks + n = fn.basic_block_count n = len(fn.basic_blocks) # slower than + previous method diff --git a/docs/source/doc/getting_started.rst b/docs/source/doc/getting_started.rst index 4d35074..d703785 100644 --- a/docs/source/doc/getting_started.rst +++ b/docs/source/doc/getting_started.rst @@ -78,8 +78,9 @@ object files be built with the ``-fPIC`` option (generate position independent code). Be sure to use the ``--enable-pic`` option while configuring LLVM (default is no PIC), like this: -{% highlight bash %} ~/llvm$ ./configure --enable-pic --enable-optimized -{% endhighlight %} +.. code-block:: bash + + $ ~/llvm ./configure --enable-pic --enable-optimized llvm-config ----------- @@ -103,51 +104,8 @@ LLVM's 'configure'. Get llvmpy and install it: -{% highlight bash %} $ git clone git@github.com:numba/llvmpy.git $ cd -llvmpy $ python setup.py install {% endhighlight %} -If you need to tell the build script where ``llvm-config`` is, do it -this way: +.. code-block:: bash -{% highlight bash %} $ python setup.py install --user ---llvm-config=/home/mdevan/llvm/Release/bin/llvm-config {% endhighlight -%} - -To build a debug version of llvmpy, that links against the debug -libraries of LLVM, use this: - -{% highlight bash %} $ python setup.py build -g ---llvm-config=/home/mdevan/llvm/Debug/bin/llvm-config $ python setup.py -install --user --llvm-config=/home/mdevan/llvm/Debug/bin/llvm-config {% -endhighlight %} - -Be warned that debug binaries will be huge (100MB+) ! They are required -only if you need to debug into LLVM also. - -``setup.py`` is a standard Python distutils script. See the Python -documentation regarding `Installing Python -Modules `_ and `Distributing -Python Modules `_ for more -information on such scripts. - - -Uninstall -============== - -If you'd installed llvmpy with the ``--user`` option, then llvmpy -would be present under ``~/.local/lib/python2.7/site-packages``. -Otherwise, it might be under ``/usr/lib/python2.7/site-packages`` or -``/usr/local/lib/python2.7/site-packages``. The directory would vary -with your Python version and OS flavour. Look around. - -Once you've located the site-packages directory, the modules and the -"egg" can be removed like so: - -{% highlight bash %} $ rm -rf /llvm /llvm\_py-.egg-info {% endhighlight -%} - -See the `Python -documentation `_ for more -information. - --------------- + $ git clone git@github.com:numba/llvmpy.git $ cd + llvmpy $ python setup.py install diff --git a/docs/source/doc/kaleidoscope/PythonLangImpl1.rst b/docs/source/doc/kaleidoscope/PythonLangImpl1.rst index 3314187..9259d42 100644 --- a/docs/source/doc/kaleidoscope/PythonLangImpl1.rst +++ b/docs/source/doc/kaleidoscope/PythonLangImpl1.rst @@ -112,23 +112,36 @@ This gives the language a very nice and simple syntax. For example, the following simple example computes `Fibonacci numbers `_: -{% highlight python %} # Compute the x'th fibonacci number. def fib(x) -if x < 3 then 1 else fib(x-1)+fib(x-2) -This expression will compute the 40th number. -============================================= +.. code-block:: + + # Compute the x'th fibonacci number. + def fib(x): + if x < 3: + return 1 + else: + return fib(x-1)+fib(x-2) + + # This expression will compute the 40th number. + fib(40) + -fib(40) {% endhighlight %} We also allow Kaleidoscope to call into standard library functions (the LLVM JIT makes this completely trivial). This means that you can use the 'extern' keyword to define a function before you use it (this is also useful for mutually recursive functions). For example: -{% highlight python %} extern sin(arg); extern cos(arg); extern -atan2(arg1 arg2); -atan2(sin(0.4), cos(42)) {% endhighlight %} +.. code-block:: + + extern sin(arg); + extern cos(arg); + extern atan2(arg1 arg2); + + atan2(sin(0.4), cos(42)) + + A more interesting example is included in Chapter 6 where we write a little Kaleidoscope application that @@ -150,23 +163,32 @@ traditional way to do this is to use a the lexer includes a token type and potentially some metadata (e.g. the numeric value of a number). First, we define the possibilities: -{% highlight python %} # The lexer yields one of these types for each -token. class EOFToken(object): pass -class DefToken(object): pass +.. code-block:: python -class ExternToken(object): pass + # The lexer yields one of these types for each token. + class EOFToken(object): pass + + class DefToken(object): pass + + class ExternToken(object): pass + + class IdentifierToken(object): + def __init__(self, name): + self.name = name + + class NumberToken(object): + def __init__(self, value): + self.value = value + + class CharacterToken(object): + def __init__(self, char): + self.char = char + def __eq__(self, other): + return isinstance(other, CharacterToken) and self.char == other.char + def __ne__(self, other): + return not self == other -class IdentifierToken(object): def **init**\ (self, name): self.name = -name - -class NumberToken(object): def **init**\ (self, value): self.value = -value - -class CharacterToken(object): def **init**\ (self, char): self.char = -char def **eq**\ (self, other): return isinstance(other, CharacterToken) -and self.char == other.char def **ne**\ (self, other): return not self -== other {% endhighlight %} Each token yielded by our lexer will be of one of the above types. For simple tokens that are always the same, like the "def" keyword, the @@ -193,82 +215,109 @@ digits. Identifiers (and keywords) are alphanumeric string starting with a letter and comments are anything between a hash (``#``) and the end of the line. -{% highlight python %} import re -... +.. code-block:: python -Regular expressions that tokens and comments of our language. -============================================================= - -REGEX\_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX\_IDENTIFIER = -re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX\_COMMENT = re.compile('#.*') - -{% endhighlight %} + import re + + ... + + # Regular expressions that tokens and comments of our language. + REGEX_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') + REGEX_IDENTIFIER = re.compile('[a-zA-Z][a-zA-Z0-9]\ *') + REGEX_COMMENT = re.compile('#.*') + Next, let's start defining the ``Tokenize`` function itself. The first thing we need to do is set up a loop that scans the string, while ignoring whitespace between tokens: -{% highlight python %} def Tokenize(string): while string: # Skip -whitespace. if string[0].isspace(): string = string[1:] continue -:: +.. code-block:: python + + def Tokenize(string): + while string: # Skip whitespace. + if string[0].isspace(): + string = string[1:] + continue + + :: + + ... + + - ... -{% endhighlight %} Next we want to find out what the next token is. For this we run the regexes we defined above on the remainder of the string. To simplify the rest of the code, we run all three regexes each time. As mentioned above, inefficiencies are ignored for the purpose of this tutorial: -{% highlight python %} # Run regexes. comment\_match = -REGEX\_COMMENT.match(string) number\_match = REGEX\_NUMBER.match(string) -identifier\_match = REGEX\_IDENTIFIER.match(string) {% endhighlight %} -Now se check if any of the regexes matched. For comments, we simply +.. code-block:: python + + # Run regexes. + comment_match = REGEX_COMMENT.match(string) + number_match = REGEX_NUMBER.match(string) + identifier_match = REGEX_IDENTIFIER.match(string) + + +Now we check if any of the regexes matched. For comments, we simply ignore the captured match: -{% highlight python %} # Check if any of the regexes matched and yield -the appropriate result. if comment\_match: comment = -comment\_match.group(0) string = string[len(comment):] {% endhighlight -python %} +.. code-block:: python + + # Check if any of the regexes matched and yield + # the appropriate result. + if comment_match: + comment = comment_match.group(0) + string = string[len(comment):] + For numbers, we yield the captured match, converted to a float and tagged with the appropriate token type: - -{% highlight python %} elif number\_match: number = -number\_match.group(0) yield NumberToken(float(number)) string = -string[len(number):] {% endhighlight %} + +.. code-block:: python + + elif number_match: + number = number_match.group(0) + yield NumberToken(float(number)) + string = string[len(number):] The identifier case is a little more complex. We have to check for keywords to decide whether we have captured an identifier or a keyword: -{% highlight python %} elif identifier\_match: identifier = -identifier\_match.group(0) # Check if we matched a keyword. if -identifier == 'def': yield DefToken() elif identifier == 'extern': yield -ExternToken() else: yield IdentifierToken(identifier) string = -string[len(identifier):] {% endhighlight %} +.. code-block:: python + + elif identifier_match: + identifier = identifier_match.group(0) + # Check if we matched a keyword. + if identifier == 'def': + yield DefToken() + elif identifier == 'extern': + yield ExternToken() + else: + yield IdentifierToken(identifier) + string = string[len(identifier):] + Finally, if we haven't recognized a comment, a number of an identifier, we yield the current character as an "unknown character" token. This is used, for example, for operators like ``+`` or ``*``: -{% highlight python %} else: # Yield the unknown character. yield -CharacterToken(string[0]) string = string[1:] {% endhighlight %} + +.. code-block:: python + + else: # Yield the unknown character. + yield CharacterToken(string[0]) + string = string[1:] + Once we're done with the loop, we return a final end-of-file token: -{% highlight python %} yield EOFToken() {% endhighlight %} -With this, we have the complete lexer for the basic Kaleidoscope -language (the `full code listing `_ for the -Lexer is available in the `next chapter `_ of the -tutorial). Next we'll `build a simple parser that uses this to build an -Abstract Syntax Tree `_. When we have that, we'll -include a driver so that you can use the lexer and parser together. +.. code-block:: python --------------- + yield EOFToken() -**`Next: Implementing a Parser and AST `_** diff --git a/docs/source/doc/kaleidoscope/PythonLangImpl2.rst b/docs/source/doc/kaleidoscope/PythonLangImpl2.rst index f918633..3a05ff8 100644 --- a/docs/source/doc/kaleidoscope/PythonLangImpl2.rst +++ b/docs/source/doc/kaleidoscope/PythonLangImpl2.rst @@ -36,16 +36,19 @@ language, and the AST should closely model the language. In Kaleidoscope, we have expressions, a prototype, and a function object. We'll start with expressions first: -{% highlight python %} # Base class for all expression nodes. class -ExpressionNode(object): pass -Expression class for numeric literals like "1.0". -================================================= +.. code-block:: python + + # Base class for all expression nodes. class + ExpressionNode(object): pass + + # Expression class for numeric literals like "1.0". + class NumberExpressionNode(ExpressionNode): def **init**\ (self, value): + self.value = value + + -class NumberExpressionNode(ExpressionNode): def **init**\ (self, value): -self.value = value -{% endhighlight %} The code above shows the definition of the base ExpressionNode class and one subclass which we use for numeric literals. The important thing to @@ -58,22 +61,23 @@ them. It would be very easy to add a virtual method to pretty print the code, for example. Here are the other expression AST node definitions that we'll use in the basic form of the Kaleidoscope language: -{% highlight python %} # Expression class for referencing a variable, -like "a". class VariableExpressionNode(ExpressionNode): def -**init**\ (self, name): self.name = name -Expression class for a binary operator. -======================================= +.. code-block:: python -class BinaryOperatorExpressionNode(ExpressionNode): def **init**\ (self, -operator, left, right): self.operator = operator self.left = left -self.right = right + # Expression class for referencing a variable, + like "a". class VariableExpressionNode(ExpressionNode): def + **init**\ (self, name): self.name = name + + # Expression class for a binary operator. + class BinaryOperatorExpressionNode(ExpressionNode): def **init**\ (self, + operator, left, right): self.operator = operator self.left = left + self.right = right + + # Expression class for function calls. + class CallExpressionNode(ExpressionNode): def **init**\ (self, callee, + args): self.callee = callee self.args = args -Expression class for function calls. -==================================== -class CallExpressionNode(ExpressionNode): def **init**\ (self, callee, -args): self.callee = callee self.args = args {% endhighlight %} This is all (intentionally) rather straight-forward: variables capture the variable name, binary operators capture their opcode (e.g. '+'), and @@ -89,17 +93,20 @@ Turing-complete; we'll fix that in a later installment. The two things we need next are a way to talk about the interface to a function, and a way to talk about functions themselves: -{% highlight python %} # This class represents the "prototype" for a -function, which captures its name, # and its argument names (thus -implicitly the number of arguments the function # takes). class -PrototypeNode(object): def **init**\ (self, name, args): self.name = -name self.args = args -This class represents a function definition itself. -=================================================== +.. code-block:: python + + # This class represents the "prototype" for a + function, which captures its name, # and its argument names (thus + implicitly the number of arguments the function # takes). class + PrototypeNode(object): def **init**\ (self, name, args): self.name = + name self.args = args + + # This class represents a function definition itself. + class FunctionNode(object): def **init**\ (self, prototype, body): + self.prototype = prototype self.body = body + -class FunctionNode(object): def **init**\ (self, prototype, body): -self.prototype = prototype self.body = body {% endhighlight %} In Kaleidoscope, functions are typed with just a count of their arguments. Since all values are double precision floating point, the @@ -120,22 +127,32 @@ build it. The idea here is that we want to parse something like ``x + y`` (which is returned as three tokens by the lexer) into an AST that could be generated with calls like this: -{% highlight python %} x = VariableExpressionNode('x') y = -VariableExpressionNode('y') result = BinaryOperatorExpressionNode('+', -x, y) {% endhighlight %} + +.. code-block:: python + + x = VariableExpressionNode('x') y = + VariableExpressionNode('y') result = BinaryOperatorExpressionNode('+', + x, y) + + In order to do this, we'll start by defining a lightweight ``Parser`` class with some basic helper routines: -{% highlight python %} class Parser(object): -def **init**\ (self, tokens, binop\_precedence): self.tokens = tokens -self.binop\_precedence = binop\_precedence self.Next() +.. code-block:: python + + class Parser(object): + + def **init**\ (self, tokens, binop_precedence): self.tokens = tokens + self.binop_precedence = binop_precedence self.Next() + + # Provide a simple token buffer. Parser.current is the current token the + # parser is looking at. Parser.Next() reads another token from the lexer + and # updates Parser.current with its results. def Next(self): + self.current = self.tokens.next() + -# Provide a simple token buffer. Parser.current is the current token the -# parser is looking at. Parser.Next() reads another token from the lexer -and # updates Parser.current with its results. def Next(self): -self.current = self.tokens.next() {% endhighlight %} This implements a simple token buffer around the lexer. This allows us to look one token ahead at what the lexer is returning. Every function @@ -157,9 +174,14 @@ We start with numeric literals, because they are the simplest to process. For each production in our grammar, we'll define a function which parses that production. For numeric literals, we have: -{% highlight python %} # numberexpr ::= number def -ParseNumberExpr(self): result = NumberExpressionNode(self.current.value) -self.Next() # consume the number. return result {% endhighlight %} + +.. code-block:: python + + # numberexpr ::= number def + ParseNumberExpr(self): result = NumberExpressionNode(self.current.value) + self.Next() # consume the number. return result + + This method is very simple: it expects to be called when the current token is a ``NumberToken``. It takes the current number value, creates a @@ -173,20 +195,25 @@ not part of the grammar production) ready to go. This is a fairly standard way to go for recursive descent parsers. For a better example, the parenthesis operator is defined like this: -{% highlight python %} # parenexpr ::= '(' expression ')' def -ParseParenExpr(self): self.Next() # eat '('. -:: +.. code-block:: python - contents = self.ParseExpression() + # parenexpr ::= '(' expression ')' def + ParseParenExpr(self): self.Next() # eat '('. + + :: + + contents = self.ParseExpression() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")".') + self.Next() # eat ')'. + + return contents + + - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")".') - self.Next() # eat ')'. - return contents - -{% endhighlight %} This function illustrates an interesting aspect of the parser. The function uses recursion by calling ``ParseExpression`` (we will soon see @@ -201,31 +228,36 @@ needed. The next simple production is for handling variable references and function calls: -{% highlight python %} # identifierexpr ::= identifier \| identifier '(' -expression\* ')' def ParseIdentifierExpr(self): identifier\_name = -self.current.name self.Next() # eat identifier. -:: +.. code-block:: python - if self.current != CharacterToken('('): # Simple variable reference. - return VariableExpressionNode(identifier_name); + # identifierexpr ::= identifier \| identifier '(' + expression\* ')' def ParseIdentifierExpr(self): identifier_name = + self.current.name self.Next() # eat identifier. + + :: + + if self.current != CharacterToken('('): # Simple variable reference. + return VariableExpressionNode(identifier_name); + + # Call. + self.Next() # eat '('. + args = [] + if self.current != CharacterToken(')'): + while True: + args.append(self.ParseExpression()) + if self.current == CharacterToken(')'): + break + elif self.current != CharacterToken(','): + raise RuntimeError('Expected ")" or "," in argument list.') + self.Next() + + self.Next() # eat ')'. + return CallExpressionNode(identifier_name, args) + + - # Call. - self.Next() # eat '('. - args = [] - if self.current != CharacterToken(')'): - while True: - args.append(self.ParseExpression()) - if self.current == CharacterToken(')'): - break - elif self.current != CharacterToken(','): - raise RuntimeError('Expected ")" or "," in argument list.') - self.Next() - self.Next() # eat ')'. - return CallExpressionNode(identifier_name, args) - -{% endhighlight %} This routine follows the same style as the other routines. It expects to be called if the current token is an ``IdentifierToken``. It also has @@ -243,13 +275,18 @@ that will become more clear `later in the tutorial `_. In order to parse an arbitrary primary expression, we need to determine what sort of expression it is: -{% highlight python %} # primary ::= identifierexpr \| numberexpr \| -parenexpr def ParsePrimary(self): if isinstance(self.current, -IdentifierToken): return self.ParseIdentifierExpr() elif -isinstance(self.current, NumberToken): return self.ParseNumberExpr(); -elif self.current == CharacterToken('('): return self.ParseParenExpr() -else: raise RuntimeError('Unknown token when expecting an expression.') -{% endhighlight %} + +.. code-block:: python + + # primary ::= identifierexpr \| numberexpr \| + parenexpr def ParsePrimary(self): if isinstance(self.current, + IdentifierToken): return self.ParseIdentifierExpr() elif + isinstance(self.current, NumberToken): return self.ParseNumberExpr(); + elif self.current == CharacterToken('('): return self.ParseParenExpr() + else: raise RuntimeError('Unknown token when expecting an expression.') + + + Now that you see the definition of this function, it is more obvious why we can assume the state of ``Parser.current`` in the various functions. @@ -278,19 +315,24 @@ recursion. To start with, we need a table of precedences. Remember the ``binop_precedence`` parameter we passed to the ``Parser`` constructor? Now is the time to use it: -{% highlight python %} def main(): # Install standard binary operators. -# 1 is lowest possible precedence. 40 is the highest. -operator\_precedence = { '<': 10, '+': 20, '-': 20, '\*': 40 } -# Run the main ``interpreter loop``. while True: +.. code-block:: python -:: + def main(): # Install standard binary operators. + # 1 is lowest possible precedence. 40 is the highest. + operator_precedence = { '<': 10, '+': 20, '-': 20, '\*': 40 } + + # Run the main ``interpreter loop``. while True: + + :: + + ... + + parser = Parser(Tokenize(raw), operator_precedence) + + - ... - parser = Parser(Tokenize(raw), operator_precedence) - -{% endhighlight %} For the basic form of Kaleidoscope, we will only support 4 binary operators (this can obviously be extended by you, our brave and intrepid @@ -302,11 +344,16 @@ hardcode the comparisons. We also define a helper function to get the precedence of the current token, or -1 if the token is not a binary operator: -{% highlight python %} # Gets the precedence of the current token, or -1 -if the token is not a binary # operator. def -GetCurrentTokenPrecedence(self): if isinstance(self.current, -CharacterToken): return self.binop\_precedence.get(self.current.char, --1) else: return -1 {% endhighlight %} + +.. code-block:: python + + # Gets the precedence of the current token, or -1 + if the token is not a binary # operator. def + GetCurrentTokenPrecedence(self): if isinstance(self.current, + CharacterToken): return self.binop_precedence.get(self.current.char, + -1) else: return -1 + + With the helper above defined, we can now start parsing binary expressions. The basic idea of operator precedence parsing is to break @@ -322,9 +369,14 @@ doesn't need to worry about nested subexpressions like (c+d) at all. To start, an expression is a primary expression potentially followed by a sequence of ``[binop,primaryexpr]`` pairs: -{% highlight python %} # expression ::= primary binoprhs def -ParseExpression(self): left = self.ParsePrimary() return -self.ParseBinOpRHS(left, 0) {% endhighlight %} + +.. code-block:: python + + # expression ::= primary binoprhs def + ParseExpression(self): left = self.ParsePrimary() return + self.ParseBinOpRHS(left, 0) + + ``ParseBinOpRHS`` is the function that parses the sequence of pairs for us. It takes a precedence and a pointer to an expression for the part @@ -341,19 +393,24 @@ is passed in a precedence of 40, it will not consume any tokens (because the precedence of '+' is only 20). With this in mind, ``ParseBinOpRHS`` starts with: -{% highlight python %} # binoprhs ::= (operator primary)\* def -ParseBinOpRHS(self, left, left\_precedence): # If this is a binary -operator, find its precedence. while True: precedence = -self.GetCurrentTokenPrecedence() -:: +.. code-block:: python + + # binoprhs ::= (operator primary)\* def + ParseBinOpRHS(self, left, left_precedence): # If this is a binary + operator, find its precedence. while True: precedence = + self.GetCurrentTokenPrecedence() + + :: + + # If this is a binary operator that binds at least as tightly as the + # current one, consume it; otherwise we are done. + if precedence < left_precedence: + return left + + - # If this is a binary operator that binds at least as tightly as the - # current one, consume it; otherwise we are done. - if precedence < left_precedence: - return left -{% endhighlight %} This code gets the precedence of the current token and checks to see if if is too low. Because we defined invalid tokens to have a precedence of @@ -362,15 +419,20 @@ stream runs out of binary operators. If this check succeeds, we know that the token is a binary operator and that it will be included in this expression: -{% highlight python %} binary\_operator = self.current.char self.Next() -# eat the operator. -:: +.. code-block:: python + + binary_operator = self.current.char self.Next() + # eat the operator. + + :: + + # Parse the primary expression after the binary operator. + right = self.ParsePrimary() + + - # Parse the primary expression after the binary operator. - right = self.ParsePrimary() -{% endhighlight %} As such, this code eats (and remembers) the binary operator and then parses the primary expression that follows. This builds up the whole @@ -383,10 +445,15 @@ In particular, we could have ``(a+b) binop unparsed`` or ``binop`` to determine its precedence and compare it to BinOp's precedence (which is '+' in this case): -{% highlight python %} # If binary\_operator binds less tightly with -right than the operator after # right, let the pending operator take -right as its left. next\_precedence = self.GetCurrentTokenPrecedence() -if precedence < next\_precedence: {% endhighlight %} + +.. code-block:: python + + # If binary_operator binds less tightly with + right than the operator after # right, let the pending operator take + right as its left. next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: + + If the precedence of the binop to the right of ``RHS`` is lower or equal to the precedence of our current operator, then we know that the @@ -395,15 +462,20 @@ current operator is ``+`` and the next operator is ``+``, we know that they have the same precedence. In this case we'll create the AST node for ``a+b``, and then continue parsing: -{% highlight python %} if precedence < next\_precedence: ... if body -omitted ... -:: +.. code-block:: python + + if precedence < next_precedence: ... if body + omitted ... + + :: + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right); + + - # Merge left/right. - left = BinaryOperatorExpressionNode(binary_operator, left, right); -{% endhighlight %} In our example above, this will turn ``a+b+`` into ``(a+b)`` and execute the next iteration of the loop, with ``+`` as the current token. The @@ -420,18 +492,23 @@ all of ``( c + d ) * e * f`` as the RHS expression variable. The code to do this is surprisingly simple (code from the above two blocks duplicated for context): -{% highlight python %} # If binary\_operator binds less tightly with -right than the operator after # right, let the pending operator take -right as its left. next\_precedence = self.GetCurrentTokenPrecedence() -if precedence < next\_precedence: right = self.ParseBinOpRHS(right, -precedence + 1) -:: +.. code-block:: python + + # If binary_operator binds less tightly with + right than the operator after # right, let the pending operator take + right as its left. next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: right = self.ParseBinOpRHS(right, + precedence + 1) + + :: + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right) + + - # Merge left/right. - left = BinaryOperatorExpressionNode(binary_operator, left, right) -{% endhighlight %} At this point, we know that the binary operator to the RHS of our primary has higher precedence than the binop we are currently parsing. @@ -466,57 +543,77 @@ well as function body definitions. The code to do this is straight-forward and not very interesting (once you've survived expressions): -{% highlight python %} # prototype ::= id '(' id\* ')' def -ParsePrototype(self): if not isinstance(self.current, IdentifierToken): -raise RuntimeError('Expected function name in prototype.') -:: +.. code-block:: python - function_name = self.current.name - self.Next() # eat function name. + # prototype ::= id '(' id\* ')' def + ParsePrototype(self): if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected function name in prototype.') + + :: + + function_name = self.current.name + self.Next() # eat function name. + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + return PrototypeNode(function_name, arg_names) + + - if self.current != CharacterToken('('): - raise RuntimeError('Expected "(" in prototype.') - self.Next() # eat '('. - arg_names = [] - while isinstance(self.current, IdentifierToken): - arg_names.append(self.current.name) - self.Next() - - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")" in prototype.') - - # Success. - self.Next() # eat ')'. - - return PrototypeNode(function_name, arg_names) - -{% endhighlight %} Given this, a function definition is very simple, just a prototype plus an expression to implement the body: -{% highlight python %} # definition ::= 'def' prototype expression def -ParseDefinition(self): self.Next() # eat def. proto = -self.ParsePrototype() body = self.ParseExpression() return -FunctionNode(proto, body) {% endhighlight %} + +.. code-block:: python + + # definition ::= 'def' prototype expression def + ParseDefinition(self): self.Next() # eat def. proto = + self.ParsePrototype() body = self.ParseExpression() return + FunctionNode(proto, body) + + In addition, we support 'extern' to declare functions like 'sin' and 'cos' as well as to support forward declaration of user functions. These 'extern's are just prototypes with no body: -{% highlight python %} # external ::= 'extern' prototype def -ParseExtern(self): self.Next() # eat extern. return -self.ParsePrototype() {% endhighlight %} + +.. code-block:: python + + # external ::= 'extern' prototype def + ParseExtern(self): self.Next() # eat extern. return + self.ParsePrototype() + + Finally, we'll also let the user type in arbitrary top-level expressions and evaluate them on the fly. We will handle this by defining anonymous nullary (zero argument) functions for them: -{% highlight python %} # toplevelexpr ::= expression def -ParseTopLevelExpr(self): proto = PrototypeNode('', []) return -FunctionNode(proto, self.ParseExpression()) {% endhighlight %} + +.. code-block:: python + + # toplevelexpr ::= expression def + ParseTopLevelExpr(self): proto = PrototypeNode('', []) return + FunctionNode(proto, self.ParseExpression()) + + Now that we have all the pieces, let's build a little driver that will let us actually *execute* this code we've built! @@ -530,24 +627,29 @@ The driver for this simply invokes all of the parsing pieces with a top-level dispatch loop. There isn't much interesting here, so I'll just include the top-level loop. See `below <#code>`_ for full code. -{% highlight python %} # Run the main "interpreter loop". while True: -print 'ready>', try: raw = raw\_input() except KeyboardInterrupt: return -:: +.. code-block:: python + + # Run the main "interpreter loop". while True: + print 'ready>', try: raw = raw_input() except KeyboardInterrupt: return + + :: + + parser = Parser(Tokenize(raw), operator_precedence) + while True: + # top ::= definition | external | expression | EOF + if isinstance(parser.current, EOFToken): + break + if isinstance(parser.current, DefToken): + parser.HandleDefinition() + elif isinstance(parser.current, ExternToken): + parser.HandleExtern() + else: + parser.HandleTopLevelExpression() + + - parser = Parser(Tokenize(raw), operator_precedence) - while True: - # top ::= definition | external | expression | EOF - if isinstance(parser.current, EOFToken): - break - if isinstance(parser.current, DefToken): - parser.HandleDefinition() - elif isinstance(parser.current, ExternToken): - parser.HandleExtern() - else: - parser.HandleTopLevelExpression() -{% endhighlight %} Here we create a new ``Parser`` for each line read, and try to parse out all the expressions, declarations and definitions in the line. We also @@ -564,12 +666,17 @@ lexer, parser, and AST builder. With this done, the executable will validate Kaleidoscope code and tell us if it is grammatically invalid. For example, here is a sample interaction: -{% highlight python %} $ python kaleidoscope.py ready> def foo(x y) -x+foo(y, 4.0) Parsed a function definition. ready> def foo(x y) x+y y -Parsed a function definition. Parsed a top-level expression. ready> def -foo(x y) x+y ) Parsed a function definition. Error: Unknown token when -expecting an expression. ready> extern sin(a); Parsed an extern. ready> -^C $ {% endhighlight %} + +.. code-block:: python + + $ python kaleidoscope.py ready> def foo(x y) + x+foo(y, 4.0) Parsed a function definition. ready> def foo(x y) x+y y + Parsed a function definition. Parsed a top-level expression. ready> def + foo(x y) x+y ) Parsed a function definition. Error: Unknown token when + expecting an expression. ready> extern sin(a); Parsed an extern. ready> + ^C $ + + There is a lot of room for extension here. You can define new AST nodes, extend the language in many ways, etc. In the `next @@ -585,296 +692,272 @@ Here is the complete code listing for this and the previous chapter. Note that it is fully self-contained: you don't need LLVM or any external libraries at all for this. -{% highlight python %} #!/usr/bin/env python -import re - -Lexer ------ - -The lexer yields one of these types for each token. -=================================================== - -class EOFToken(object): pass - -class DefToken(object): pass - -class ExternToken(object): pass - -class IdentifierToken(object): def **init**\ (self, name): self.name = -name - -class NumberToken(object): def **init**\ (self, value): self.value = -value - -class CharacterToken(object): def **init**\ (self, char): self.char = -char def **eq**\ (self, other): return isinstance(other, CharacterToken) -and self.char == other.char def **ne**\ (self, other): return not self -== other - -Regular expressions that tokens and comments of our language. -============================================================= - -REGEX\_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX\_IDENTIFIER = -re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX\_COMMENT = re.compile('#.*') - -def Tokenize(string): while string: # Skip whitespace. if -string[0].isspace(): string = string[1:] continue - -:: - - # Run regexes. - comment_match = REGEX_COMMENT.match(string) - number_match = REGEX_NUMBER.match(string) - identifier_match = REGEX_IDENTIFIER.match(string) - - # Check if any of the regexes matched and yield the appropriate result. - if comment_match: - comment = comment_match.group(0) - string = string[len(comment):] - elif number_match: - number = number_match.group(0) - yield NumberToken(float(number)) - string = string[len(number):] - elif identifier_match: - identifier = identifier_match.group(0) - # Check if we matched a keyword. - if identifier == 'def': - yield DefToken() - elif identifier == 'extern': - yield ExternToken() - else: - yield IdentifierToken(identifier) - string = string[len(identifier):] - else: - # Yield the ASCII value of the unknown character. - yield CharacterToken(string[0]) - string = string[1:] - -yield EOFToken() - -Abstract Syntax Tree (aka Parse Tree) -------------------------------------- - -Base class for all expression nodes. -==================================== - -class ExpressionNode(object): pass - -Expression class for numeric literals like "1.0". -================================================= - -class NumberExpressionNode(ExpressionNode): def **init**\ (self, value): -self.value = value - -Expression class for referencing a variable, like "a". -====================================================== - -class VariableExpressionNode(ExpressionNode): def **init**\ (self, -name): self.name = name - -Expression class for a binary operator. -======================================= - -class BinaryOperatorExpressionNode(ExpressionNode): def **init**\ (self, -operator, left, right): self.operator = operator self.left = left -self.right = right - -Expression class for function calls. -==================================== - -class CallExpressionNode(ExpressionNode): def **init**\ (self, callee, -args): self.callee = callee self.args = args - -This class represents the "prototype" for a function, which captures its name, -============================================================================== - -and its argument names (thus implicitly the number of arguments the function -============================================================================ - -takes). -======= - -class PrototypeNode(object): def **init**\ (self, name, args): self.name -= name self.args = args - -This class represents a function definition itself. -=================================================== - -class FunctionNode(object): def **init**\ (self, prototype, body): -self.prototype = prototype self.body = body - -Parser ------- - -class Parser(object): - -def **init**\ (self, tokens, binop\_precedence): self.tokens = tokens -self.binop\_precedence = binop\_precedence self.Next() - -# Provide a simple token buffer. Parser.current is the current token the -# parser is looking at. Parser.Next() reads another token from the lexer -and # updates Parser.current with its results. def Next(self): -self.current = self.tokens.next() - -# Gets the precedence of the current token, or -1 if the token is not a -binary # operator. def GetCurrentTokenPrecedence(self): if -isinstance(self.current, CharacterToken): return -self.binop\_precedence.get(self.current.char, -1) else: return -1 - -# identifierexpr ::= identifier \| identifier '(' expression\* ')' def -ParseIdentifierExpr(self): identifier\_name = self.current.name -self.Next() # eat identifier. - -:: - - if self.current != CharacterToken('('): # Simple variable reference. - return VariableExpressionNode(identifier_name) - - # Call. - self.Next() # eat '('. - args = [] - if self.current != CharacterToken(')'): - while True: - args.append(self.ParseExpression()) - if self.current == CharacterToken(')'): - break - elif self.current != CharacterToken(','): - raise RuntimeError('Expected ")" or "," in argument list.') - self.Next() - - self.Next() # eat ')'. - return CallExpressionNode(identifier_name, args) - -# numberexpr ::= number def ParseNumberExpr(self): result = -NumberExpressionNode(self.current.value) self.Next() # consume the -number. return result - -# parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() -# eat '('. - -:: - - contents = self.ParseExpression() - - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")".') - self.Next() # eat ')'. - - return contents - -# primary ::= identifierexpr \| numberexpr \| parenexpr def -ParsePrimary(self): if isinstance(self.current, IdentifierToken): return -self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): -return self.ParseNumberExpr() elif self.current == CharacterToken('('): -return self.ParseParenExpr() else: raise RuntimeError('Unknown token -when expecting an expression.') - -# binoprhs ::= (operator primary)\* def ParseBinOpRHS(self, left, -left\_precedence): # If this is a binary operator, find its precedence. -while True: precedence = self.GetCurrentTokenPrecedence() - -:: - - # If this is a binary operator that binds at least as tightly as the - # current one, consume it; otherwise we are done. - if precedence < left_precedence: - return left - - binary_operator = self.current.char - self.Next() # eat the operator. - - # Parse the primary expression after the binary operator. - right = self.ParsePrimary() - - # If binary_operator binds less tightly with right than the operator after - # right, let the pending operator take right as its left. - next_precedence = self.GetCurrentTokenPrecedence() - if precedence < next_precedence: - right = self.ParseBinOpRHS(right, precedence + 1) - - # Merge left/right. - left = BinaryOperatorExpressionNode(binary_operator, left, right) - -# expression ::= primary binoprhs def ParseExpression(self): left = -self.ParsePrimary() return self.ParseBinOpRHS(left, 0) - -# prototype ::= id '(' id\* ')' def ParsePrototype(self): if not -isinstance(self.current, IdentifierToken): raise RuntimeError('Expected -function name in prototype.') - -:: - - function_name = self.current.name - self.Next() # eat function name. - - if self.current != CharacterToken('('): - raise RuntimeError('Expected "(" in prototype.') - self.Next() # eat '('. - - arg_names = [] - while isinstance(self.current, IdentifierToken): - arg_names.append(self.current.name) - self.Next() - - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")" in prototype.') - - # Success. - self.Next() # eat ')'. - - return PrototypeNode(function_name, arg_names) - -# definition ::= 'def' prototype expression def ParseDefinition(self): -self.Next() # eat def. proto = self.ParsePrototype() body = -self.ParseExpression() return FunctionNode(proto, body) - -# toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = -PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) - -# external ::= 'extern' prototype def ParseExtern(self): self.Next() # -eat extern. return self.ParsePrototype() - -# Top-Level parsing def HandleDefinition(self): -self.Handle(self.ParseDefinition, 'Parsed a function definition.') - -def HandleExtern(self): self.Handle(self.ParseExtern, 'Parsed an -extern.') - -def HandleTopLevelExpression(self): self.Handle(self.ParseTopLevelExpr, -'Parsed a top-level expression.') - -def Handle(self, function, message): try: function() print message -except Exception, e: print 'Error:', e try: self.Next() # Skip for error -recovery. except: pass - -Main driver code. ------------------ - -def main(): # Install standard binary operators. # 1 is lowest possible -precedence. 40 is the highest. operator\_precedence = { '<': 10, '+': -20, '-': 20, '\*': 40 } - -# Run the main "interpreter loop". while True: print 'ready>', try: raw -= raw\_input() except KeyboardInterrupt: return - -:: - - parser = Parser(Tokenize(raw), operator_precedence) - while True: - # top ::= definition | external | expression | EOF - if isinstance(parser.current, EOFToken): - break - if isinstance(parser.current, DefToken): - parser.HandleDefinition() - elif isinstance(parser.current, ExternToken): - parser.HandleExtern() - else: - parser.HandleTopLevelExpression() - -if **name** == '**main**\ ': main() {% endhighlight %} - --------------- - -**`Next: Implementing Code Generation to LLVM -IR `_** +.. code-block:: python + + #!/usr/bin/env python + + import re + + Lexer + ----- + + # The lexer yields one of these types for each token. + class EOFToken(object): pass + + class DefToken(object): pass + + class ExternToken(object): pass + + class IdentifierToken(object): def **init**\ (self, name): self.name = + name + + class NumberToken(object): def **init**\ (self, value): self.value = + value + + class CharacterToken(object): def **init**\ (self, char): self.char = + char def **eq**\ (self, other): return isinstance(other, CharacterToken) + and self.char == other.char def **ne**\ (self, other): return not self + == other + + # Regular expressions that tokens and comments of our language. + REGEX_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX_IDENTIFIER = + re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX_COMMENT = re.compile('#.*') + + def Tokenize(string): while string: # Skip whitespace. if + string[0].isspace(): string = string[1:] continue + + :: + + # Run regexes. + comment_match = REGEX_COMMENT.match(string) + number_match = REGEX_NUMBER.match(string) + identifier_match = REGEX_IDENTIFIER.match(string) + + # Check if any of the regexes matched and yield the appropriate result. + if comment_match: + comment = comment_match.group(0) + string = string[len(comment):] + elif number_match: + number = number_match.group(0) + yield NumberToken(float(number)) + string = string[len(number):] + elif identifier_match: + identifier = identifier_match.group(0) + # Check if we matched a keyword. + if identifier == 'def': + yield DefToken() + elif identifier == 'extern': + yield ExternToken() + else: + yield IdentifierToken(identifier) + string = string[len(identifier):] + else: + # Yield the ASCII value of the unknown character. + yield CharacterToken(string[0]) + string = string[1:] + + yield EOFToken() + + Abstract Syntax Tree (aka Parse Tree) + ------------------------------------- + + # Base class for all expression nodes. + class ExpressionNode(object): pass + + # Expression class for numeric literals like "1.0". + class NumberExpressionNode(ExpressionNode): def **init**\ (self, value): + self.value = value + + # Expression class for referencing a variable, like "a". + class VariableExpressionNode(ExpressionNode): def **init**\ (self, + name): self.name = name + + # Expression class for a binary operator. + class BinaryOperatorExpressionNode(ExpressionNode): def **init**\ (self, + operator, left, right): self.operator = operator self.left = left + self.right = right + + # Expression class for function calls. + class CallExpressionNode(ExpressionNode): def **init**\ (self, callee, + args): self.callee = callee self.args = args + + # This class represents the "prototype" for a function, which captures its name, + # and its argument names (thus implicitly the number of arguments the function + # takes). + class PrototypeNode(object): def **init**\ (self, name, args): self.name + = name self.args = args + + # This class represents a function definition itself. + class FunctionNode(object): def **init**\ (self, prototype, body): + self.prototype = prototype self.body = body + + Parser + ------ + + class Parser(object): + + def **init**\ (self, tokens, binop_precedence): self.tokens = tokens + self.binop_precedence = binop_precedence self.Next() + + # Provide a simple token buffer. Parser.current is the current token the + # parser is looking at. Parser.Next() reads another token from the lexer + and # updates Parser.current with its results. def Next(self): + self.current = self.tokens.next() + + # Gets the precedence of the current token, or -1 if the token is not a + binary # operator. def GetCurrentTokenPrecedence(self): if + isinstance(self.current, CharacterToken): return + self.binop_precedence.get(self.current.char, -1) else: return -1 + + # identifierexpr ::= identifier \| identifier '(' expression\* ')' def + ParseIdentifierExpr(self): identifier_name = self.current.name + self.Next() # eat identifier. + + :: + + if self.current != CharacterToken('('): # Simple variable reference. + return VariableExpressionNode(identifier_name) + + # Call. + self.Next() # eat '('. + args = [] + if self.current != CharacterToken(')'): + while True: + args.append(self.ParseExpression()) + if self.current == CharacterToken(')'): + break + elif self.current != CharacterToken(','): + raise RuntimeError('Expected ")" or "," in argument list.') + self.Next() + + self.Next() # eat ')'. + return CallExpressionNode(identifier_name, args) + + # numberexpr ::= number def ParseNumberExpr(self): result = + NumberExpressionNode(self.current.value) self.Next() # consume the + number. return result + + # parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() + # eat '('. + + :: + + contents = self.ParseExpression() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")".') + self.Next() # eat ')'. + + return contents + + # primary ::= identifierexpr \| numberexpr \| parenexpr def + ParsePrimary(self): if isinstance(self.current, IdentifierToken): return + self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): + return self.ParseNumberExpr() elif self.current == CharacterToken('('): + return self.ParseParenExpr() else: raise RuntimeError('Unknown token + when expecting an expression.') + + # binoprhs ::= (operator primary)\* def ParseBinOpRHS(self, left, + left_precedence): # If this is a binary operator, find its precedence. + while True: precedence = self.GetCurrentTokenPrecedence() + + :: + + # If this is a binary operator that binds at least as tightly as the + # current one, consume it; otherwise we are done. + if precedence < left_precedence: + return left + + binary_operator = self.current.char + self.Next() # eat the operator. + + # Parse the primary expression after the binary operator. + right = self.ParsePrimary() + + # If binary_operator binds less tightly with right than the operator after + # right, let the pending operator take right as its left. + next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: + right = self.ParseBinOpRHS(right, precedence + 1) + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right) + + # expression ::= primary binoprhs def ParseExpression(self): left = + self.ParsePrimary() return self.ParseBinOpRHS(left, 0) + + # prototype ::= id '(' id\* ')' def ParsePrototype(self): if not + isinstance(self.current, IdentifierToken): raise RuntimeError('Expected + function name in prototype.') + + :: + + function_name = self.current.name + self.Next() # eat function name. + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + return PrototypeNode(function_name, arg_names) + + # definition ::= 'def' prototype expression def ParseDefinition(self): + self.Next() # eat def. proto = self.ParsePrototype() body = + self.ParseExpression() return FunctionNode(proto, body) + + # toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = + PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) + + # external ::= 'extern' prototype def ParseExtern(self): self.Next() # + eat extern. return self.ParsePrototype() + + # Top-Level parsing def HandleDefinition(self): + self.Handle(self.ParseDefinition, 'Parsed a function definition.') + + def HandleExtern(self): self.Handle(self.ParseExtern, 'Parsed an + extern.') + + def HandleTopLevelExpression(self): self.Handle(self.ParseTopLevelExpr, + 'Parsed a top-level expression.') + + def Handle(self, function, message): try: function() print message + except Exception, e: print 'Error:', e try: self.Next() # Skip for error + recovery. except: pass + + Main driver code. + ----------------- + + def main(): # Install standard binary operators. # 1 is lowest possible + precedence. 40 is the highest. operator_precedence = { '<': 10, '+': + 20, '-': 20, '\*': 40 } + + # Run the main "interpreter loop". while True: print 'ready>', try: raw + = raw_input() except KeyboardInterrupt: return + + :: + + parser = Parser(Tokenize(raw), operator_precedence) + while True: + # top ::= definition | external | expression | EOF + if isinstance(parser.current, EOFToken): + break + if isinstance(parser.current, DefToken): + parser.HandleDefinition() + elif isinstance(parser.current, ExternToken): + parser.HandleExtern() + else: + parser.HandleTopLevelExpression() + + if **name** == '**main**\ ': main() diff --git a/docs/source/doc/kaleidoscope/PythonLangImpl3.rst b/docs/source/doc/kaleidoscope/PythonLangImpl3.rst index 562f0f3..549cb7b 100644 --- a/docs/source/doc/kaleidoscope/PythonLangImpl3.rst +++ b/docs/source/doc/kaleidoscope/PythonLangImpl3.rst @@ -31,23 +31,26 @@ Code Generation Setup # {#basics} In order to generate LLVM IR, we want some simple setup to get started. First we define code generation methods in each AST node class: -{% highlight python %} # Expression class for numeric literals like -"1.0". class NumberExpressionNode(ExpressionNode): -def **init**\ (self, value): self.value = value +.. code-block:: python -def CodeGen(self): ... + # Expression class for numeric literals like + "1.0". class NumberExpressionNode(ExpressionNode): + + def **init**\ (self, value): self.value = value + + def CodeGen(self): ... + + # Expression class for referencing a variable, like "a". + class VariableExpressionNode(ExpressionNode): + + def **init**\ (self, name): self.name = name + + def CodeGen(self): ... + + ... -Expression class for referencing a variable, like "a". -====================================================== -class VariableExpressionNode(ExpressionNode): - -def **init**\ (self, name): self.name = name - -def CodeGen(self): ... - -... {% endhighlight %} The ``CodeGen`` method says to emit IR for that AST node along with all the things it depends on, and they all return an LLVM Value object. @@ -64,21 +67,20 @@ Assignment `_ We will also need to define some global variables which we will be used during code generation: -{% highlight python %} # The LLVM module, which holds all the IR code. -g\_llvm\_module = Module.new('my cool jit') -The LLVM instruction builder. Created whenever a new function is entered. -========================================================================= +.. code-block:: python -g\_llvm\_builder = None + # The LLVM module, which holds all the IR code. + g_llvm_module = Module.new('my cool jit') + + # The LLVM instruction builder. Created whenever a new function is entered. + g_llvm_builder = None + + # A dictionary that keeps track of which values are defined in the current scope + # and what their LLVM representation is. + g_named_values = {} -A dictionary that keeps track of which values are defined in the current scope -============================================================================== -and what their LLVM representation is. -====================================== - -g\_named\_values = {} {% endhighlight %} ``g_llvm_module`` is the LLVM construct that contains all of the functions and global variables in a chunk of code. In many ways, it is @@ -112,8 +114,13 @@ Generating LLVM code for expression nodes is very straightforward: less than 35 lines of commented code for all four of our expression nodes. First we'll do numeric literals: -{% highlight python %} def CodeGen(self): return -Constant.real(Type.double(), self.value) {% endhighlight %} + +.. code-block:: python + + def CodeGen(self): return + Constant.real(Type.double(), self.value) + + In llvmpy, floating point numeric constants are represented with the ``llvm.core.ConstantFP`` class. To create one, we can use the static @@ -123,9 +130,14 @@ LLVM IR constants are all uniqued together and shared. For this reason, we create the constant through a factory method instead of instantiating one directly. -{% highlight python %} def CodeGen(self): if self.name in -g\_named\_values: return g\_named\_values[self.name] else: raise -RuntimeError('Unknown variable name: ' + self.name) {% endhighlight %} + +.. code-block:: python + + def CodeGen(self): if self.name in + g_named_values: return g_named_values[self.name] else: raise + RuntimeError('Unknown variable name: ' + self.name) + + References to variables are also quite simple using LLVM. In the simple version of Kaleidoscope, we assume that the variable has already been @@ -137,25 +149,30 @@ the value for it. In future chapters, we'll add support for `loop induction variables `_ in the symbol table, and for `local variables `_. -{% highlight python %} def CodeGen(self): left = self.left.CodeGen() -right = self.right.CodeGen() -:: +.. code-block:: python + + def CodeGen(self): left = self.left.CodeGen() + right = self.right.CodeGen() + + :: + + if self.operator == '+': + return g_llvm_builder.fadd(left, right, 'addtmp') + elif self.operator == '-': + return g_llvm_builder.fsub(left, right, 'subtmp') + elif self.operator == '*': + return g_llvm_builder.fmul(left, right, 'multmp') + elif self.operator == '<': + result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') + # Convert bool 0 or 1 to double 0.0 or 1.0. + return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') + else: + raise RuntimeError('Unknown binary operator.') + + - if self.operator == '+': - return g_llvm_builder.fadd(left, right, 'addtmp') - elif self.operator == '-': - return g_llvm_builder.fsub(left, right, 'subtmp') - elif self.operator == '*': - return g_llvm_builder.fmul(left, right, 'multmp') - elif self.operator == '<': - result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') - # Convert bool 0 or 1 to double 0.0 or 1.0. - return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') - else: - raise RuntimeError('Unknown binary operator.') -{% endhighlight %} Binary operators start to get more interesting. The basic idea here is that we recursively emit code for the left-hand side of the expression, @@ -193,21 +210,26 @@ treating the input as an unsigned value. In contrast, if we used the the Kaleidoscope ``<`` operator would return 0.0 and -1.0, depending on the input value. -{% highlight python %} def CodeGen(self): # Look up the name in the -global module table. callee = -g\_llvm\_module.get\_function\_named(self.callee) -:: +.. code-block:: python - # Check for argument mismatch error. - if len(callee.args) != len(self.args): - raise RuntimeError('Incorrect number of arguments passed.') + def CodeGen(self): # Look up the name in the + global module table. callee = + g_llvm_module.get_function_named(self.callee) + + :: + + # Check for argument mismatch error. + if len(callee.args) != len(self.args): + raise RuntimeError('Incorrect number of arguments passed.') + + arg_values = [i.CodeGen() for i in self.args] + + return g_llvm_builder.call(callee, arg_values, 'calltmp') + + - arg_values = [i.CodeGen() for i in self.args] - return g_llvm_builder.call(callee, arg_values, 'calltmp') - -{% endhighlight %} Code generation for function calls is quite straightforward with LLVM. The code above initially does a function name lookup in the LLVM @@ -242,15 +264,20 @@ let's talk about code generation for prototypes: they are used both for function bodies and external function declarations. The code starts with: -{% highlight python %} def CodeGen(self): # Make the function type, eg. -double(double,double). funct\_type = Type.function( Type.double(), -[Type.double()] \* len(self.args), False) -:: +.. code-block:: python + + def CodeGen(self): # Make the function type, eg. + double(double,double). funct_type = Type.function( Type.double(), + [Type.double()] \* len(self.args), False) + + :: + + function = Function.new(g_llvm_module, funct_type, self.name) + + - function = Function.new(g_llvm_module, funct_type, self.name) -{% endhighlight %} The call to ``Type.function`` creates the ``FunctionType`` that should be used for a given Prototype. Since all function arguments in @@ -272,11 +299,16 @@ the name the user specified: since ``g_llvm_module`` is specified, this name is registered in ``g_llvm_module``'s symbol table, which is used by the function call code above. -{% highlight python %} # If the name conflicted, there was already -something with the same name. # If it has a body, don't allow -redefinition or reextern. if function.name != self.name: -function.delete() function = -g\_llvm\_module.get\_function\_named(self.name) {% endhighlight %} + +.. code-block:: python + + # If the name conflicted, there was already + something with the same name. # If it has a body, don't allow + redefinition or reextern. if function.name != self.name: + function.delete() function = + g_llvm_module.get_function_named(self.name) + + The Module symbol table works just like the Function symbol table when it comes to name conflicts: if a new function is created with a name was @@ -298,18 +330,23 @@ function we just created (by calling ``delete``) and then calling ``get_function_named`` to get the existing function with the specified name. -{% highlight python %} # If the function already has a body, reject -this. if not function.is\_declaration: raise RuntimeError('Redefinition -of function.') -:: +.. code-block:: python + + # If the function already has a body, reject + this. if not function.is_declaration: raise RuntimeError('Redefinition + of function.') + + :: + + # If F took a different number of args, reject. + if len(callee.args) != len(self.args): + raise RuntimeError('Redeclaration of a function with different number ' + 'of args.') + + - # If F took a different number of args, reject. - if len(callee.args) != len(self.args): - raise RuntimeError('Redeclaration of a function with different number ' - 'of args.') -{% endhighlight %} In order to verify the logic above, we first check to see if the pre-existing function is a forward declaration. Since we don't allow @@ -318,16 +355,21 @@ case. If the previous reference to a function was an 'extern', we simply verify that the number of arguments for that definition and this one match up. If not, we emit an error. -{% highlight python %} # Set names for all arguments and add them to the -variables symbol table. for arg, arg\_name in zip(function.args, -self.args): arg.name = arg\_name # Add arguments to variable symbol -table. g\_named\_values[arg\_name] = arg -:: +.. code-block:: python + + # Set names for all arguments and add them to the + variables symbol table. for arg, arg_name in zip(function.args, + self.args): arg.name = arg_name # Add arguments to variable symbol + table. g_named_values[arg_name] = arg + + :: + + return function + + - return function -{% endhighlight %} The last bit of code for prototypes loops over all of the arguments in the function, setting the name of the LLVM Argument objects to match, @@ -338,15 +380,20 @@ would be very straight-forward with the mechanics we have already used above. Once this is all set up, it returns the Function object to the caller. -{% highlight python %} def CodeGen(self): # Clear scope. -g\_named\_values.clear() -:: +.. code-block:: python + + def CodeGen(self): # Clear scope. + g_named_values.clear() + + :: + + # Create a function object. + function = self.prototype.CodeGen() + + - # Create a function object. - function = self.prototype.CodeGen() -{% endhighlight %} Code generation for function definitions starts out simply enough: we just clear out the ``g_named_values`` dictionary to make sure that there @@ -354,32 +401,37 @@ isn't anything in it from the last function we compiled and codegen the prototype. Code generation of the prototype ensures that there is an LLVM Function object that is ready to go for us. -{% highlight python %} # Create a new basic block to start insertion -into. block = function.append\_basic\_block('entry') global -g\_llvm\_builder g\_llvm\_builder = Builder.new(block) {% endhighlight -%} -Now we get to the point where ``g_llvm_builder`` is set up. The first -line creates a new `basic -block `_ (named "entry"), -which is inserted into the function. The second line declares that the -global ``g_llvm_builder`` object is to be changed. The last line creates -a new builder that is set up to insert new instructions into the basic -block we just created. Basic blocks in LLVM are an important part of -functions that define the `Control Flow -Graph `_. Since we -don't have any control flow, our functions will only contain one block -at this point. We'll fix this in `Chapter 5 `_ :). +.. code-block:: python -{% highlight python %} # Finish off the function. try: return\_value = -self.body.CodeGen() g\_llvm\_builder.ret(return\_value) + # Create a new basic block to start insertion + into. block = function.append_basic_block('entry') global + g_llvm_builder g_llvm_builder = Builder.new(block) {% endhighlight + %} + + Now we get to the point where ``g_llvm_builder`` is set up. The first + line creates a new `basic + block `_ (named "entry"), + which is inserted into the function. The second line declares that the + global ``g_llvm_builder`` object is to be changed. The last line creates + a new builder that is set up to insert new instructions into the basic + block we just created. Basic blocks in LLVM are an important part of + functions that define the `Control Flow + Graph `_. Since we + don't have any control flow, our functions will only contain one block + at this point. We'll fix this in `Chapter 5 `_ :). + + {% highlight python %} # Finish off the function. try: return_value = + self.body.CodeGen() g_llvm_builder.ret(return_value) + + :: + + # Validate the generated code, checking for consistency. + function.verify() + + -:: - # Validate the generated code, checking for consistency. - function.verify() - -{% endhighlight %} Once the insertion point is set up, we call the ``CodeGen`` method for the root expression of the function. If no error happens, this emits @@ -392,13 +444,18 @@ checks on the generated code, to determine if our compiler is doing everything right. Using this is important: it can catch a lot of bugs. Once the function is finished and validated, we return it. -{% highlight python %} except: function.delete() raise -:: +.. code-block:: python + + except: function.delete() raise + + :: + + return function + + - return function -{% endhighlight %} The only piece left here is handling of the error case. For simplicity, we handle this by merely deleting the function we produced with the @@ -411,9 +468,14 @@ can return a previously defined forward declaration, our code can actually delete a forward declaration. There are a number of ways to fix this bug; see what you can come up with! Here is a testcase: -{% highlight python %} extern foo(a b) # ok, defines foo. def foo(a b) c -# error, 'c' is invalid. def bar() foo(1, 2) # error, unknown function -"foo" {% endhighlight %} + +.. code-block:: python + + extern foo(a b) # ok, defines foo. def foo(a b) c + # error, 'c' is invalid. def bar() foo(1, 2) # error, unknown function + "foo" + + -------------- @@ -426,8 +488,13 @@ CodeGen into the ``Handle*`` functions, and then dumps out the LLVM IR. This gives a nice way to look at the LLVM IR for simple functions. For example: -{% highlight bash %} ready> 4+5 Read a top-level expression: define -double @0() { entry: ret double 9.000000e+00 } {% endhighlight %} + +.. code-block:: bash + + ready> 4+5 Read a top-level expression: define + double @0() { entry: ret double 9.000000e+00 } + + Note how the parser turns the top-level expression into anonymous functions for us. This will be handy when we add JIT support in the next @@ -435,56 +502,76 @@ chapter. Also note that the code is very literally transcribed, no optimizations are being performed except simple constant folding done by the Builder. We will add optimizations explicitly in the next chapter. -{% highlight bash %} ready> def foo(a b) a\ *a + 2*\ a\ *b + b*\ b Read -a function definition: define double @foo(double %a, double %b) { entry: -%multmp = fmul double %a, %a ; [#uses=1] %multmp1 = fmul double -2.000000e+00, %a ; [#uses=1] %multmp2 = fmul double %multmp1, %b ; -[#uses=1] %addtmp = fadd double %multmp, %multmp2 ; [#uses=1] %multmp3 = -fmul double %b, %b ; [#uses=1] %addtmp4 = fadd double %addtmp, %multmp3 -; [#uses=1] ret double %addtmp4 } {% endhighlight %} + +.. code-block:: bash + + ready> def foo(a b) a\ *a + 2*\ a\ *b + b*\ b Read + a function definition: define double @foo(double %a, double %b) { entry: + %multmp = fmul double %a, %a ; [#uses=1] %multmp1 = fmul double + 2.000000e+00, %a ; [#uses=1] %multmp2 = fmul double %multmp1, %b ; + [#uses=1] %addtmp = fadd double %multmp, %multmp2 ; [#uses=1] %multmp3 = + fmul double %b, %b ; [#uses=1] %addtmp4 = fadd double %addtmp, %multmp3 + ; [#uses=1] ret double %addtmp4 } + + This shows some simple arithmetic. Notice the striking similarity to the LLVM builder calls that we use to create the instructions. -{% highlight bash %} ready> def bar(a) foo(a, 4.0) + bar(31337) Read a -function definition: define double @bar(double %a) { entry: %calltmp = -call double @foo(double %a, double 4.000000e+00) ; [#uses=1] %calltmp1 = -call double @bar(double 3.133700e+04) ; [#uses=1] %addtmp = fadd double -%calltmp, %calltmp1 ; [#uses=1] ret double %addtmp } {% endhighlight %} + +.. code-block:: bash + + ready> def bar(a) foo(a, 4.0) + bar(31337) Read a + function definition: define double @bar(double %a) { entry: %calltmp = + call double @foo(double %a, double 4.000000e+00) ; [#uses=1] %calltmp1 = + call double @bar(double 3.133700e+04) ; [#uses=1] %addtmp = fadd double + %calltmp, %calltmp1 ; [#uses=1] ret double %addtmp } + + This shows some function calls. Note that this function will take a long time to execute if you call it. In the future we'll add conditional control flow to actually make recursion useful :). -{% highlight bash %} ready> extern cos(x) Read extern: declare double -@cos(double) -ready> cos(1.234) Read a top-level expression: define double @1() { -entry: %calltmp = call double @cos(double 1.234000e+00) ; [#uses=1] ret -double %calltmp } {% endhighlight %} +.. code-block:: bash + + ready> extern cos(x) Read extern: declare double + @cos(double) + + ready> cos(1.234) Read a top-level expression: define double @1() { + entry: %calltmp = call double @cos(double 1.234000e+00) ; [#uses=1] ret + double %calltmp } + + This shows an extern for the libm "cos" function, and a call to it. -{% highlight bash %} ready> ^C ; ModuleID = 'my cool jit' -define double @0() { entry: ret double 9.000000e+00 } +.. code-block:: bash -define double @foo(double %a, double %b) { entry: %multmp = fmul double -%a, %a ; [#uses=1] %multmp1 = fmul double 2.000000e+00, %a ; [#uses=1] -%multmp2 = fmul double %multmp1, %b ; [#uses=1] %addtmp = fadd double -%multmp, %multmp2 ; [#uses=1] %multmp3 = fmul double %b, %b ; [#uses=1] -%addtmp4 = fadd double %addtmp, %multmp3 ; [#uses=1] ret double %addtmp4 -} + ready> ^C ; ModuleID = 'my cool jit' + + define double @0() { entry: ret double 9.000000e+00 } + + define double @foo(double %a, double %b) { entry: %multmp = fmul double + %a, %a ; [#uses=1] %multmp1 = fmul double 2.000000e+00, %a ; [#uses=1] + %multmp2 = fmul double %multmp1, %b ; [#uses=1] %addtmp = fadd double + %multmp, %multmp2 ; [#uses=1] %multmp3 = fmul double %b, %b ; [#uses=1] + %addtmp4 = fadd double %addtmp, %multmp3 ; [#uses=1] ret double %addtmp4 + } + + define double @bar(double %a) { entry: %calltmp = call double + @foo(double %a, double 4.000000e+00) ; [#uses=1] %calltmp1 = call double + @bar(double 3.133700e+04) ; [#uses=1] %addtmp = fadd double %calltmp, + %calltmp1 ; [#uses=1] ret double %addtmp } + + declare double @cos(double) + + define double @1() { entry: %calltmp = call double @cos(double + 1.234000e+00) ; [#uses=1] ret double %calltmp } -define double @bar(double %a) { entry: %calltmp = call double -@foo(double %a, double 4.000000e+00) ; [#uses=1] %calltmp1 = call double -@bar(double 3.133700e+04) ; [#uses=1] %addtmp = fadd double %calltmp, -%calltmp1 ; [#uses=1] ret double %addtmp } -declare double @cos(double) - -define double @1() { entry: %calltmp = call double @cos(double -1.234000e+00) ; [#uses=1] ret double %calltmp } {% endhighlight %} When you quit the current demo, it dumps out the IR for the entire module generated. Here you can see the big picture with all the @@ -505,419 +592,388 @@ the LLVM code generator. Because this uses the llvmpy libraries, you need to `download <../download.html>`_ and `install <../userguide.html#install>`_ them. -{% highlight python %} #!/usr/bin/env python -import re from llvm.core import Module, Constant, Type, Function, -Builder, FCMP\_ULT - -Globals -------- - -The LLVM module, which holds all the IR code. -============================================= - -g\_llvm\_module = Module.new('my cool jit') - -The LLVM instruction builder. Created whenever a new function is entered. -========================================================================= - -g\_llvm\_builder = None - -A dictionary that keeps track of which values are defined in the current scope -============================================================================== - -and what their LLVM representation is. -====================================== - -g\_named\_values = {} - -Lexer ------ - -The lexer yields one of these types for each token. -=================================================== - -class EOFToken(object): pass - -class DefToken(object): pass - -class ExternToken(object): pass - -class IdentifierToken(object): def **init**\ (self, name): self.name = -name - -class NumberToken(object): def **init**\ (self, value): self.value = -value - -class CharacterToken(object): def **init**\ (self, char): self.char = -char def **eq**\ (self, other): return isinstance(other, CharacterToken) -and self.char == other.char def **ne**\ (self, other): return not self -== other - -Regular expressions that tokens and comments of our language. -============================================================= - -REGEX\_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX\_IDENTIFIER = -re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX\_COMMENT = re.compile('#.*') - -def Tokenize(string): while string: # Skip whitespace. if -string[0].isspace(): string = string[1:] continue - -:: - - # Run regexes. - comment_match = REGEX_COMMENT.match(string) - number_match = REGEX_NUMBER.match(string) - identifier_match = REGEX_IDENTIFIER.match(string) - - # Check if any of the regexes matched and yield the appropriate result. - if comment_match: - comment = comment_match.group(0) - string = string[len(comment):] - elif number_match: - number = number_match.group(0) - yield NumberToken(float(number)) - string = string[len(number):] - elif identifier_match: - identifier = identifier_match.group(0) - # Check if we matched a keyword. - if identifier == 'def': - yield DefToken() - elif identifier == 'extern': - yield ExternToken() - else: - yield IdentifierToken(identifier) - string = string[len(identifier):] - else: - # Yield the ASCII value of the unknown character. - yield CharacterToken(string[0]) - string = string[1:] - -yield EOFToken() - -Abstract Syntax Tree (aka Parse Tree) -------------------------------------- - -Base class for all expression nodes. -==================================== - -class ExpressionNode(object): pass - -Expression class for numeric literals like "1.0". -================================================= - -class NumberExpressionNode(ExpressionNode): - -def **init**\ (self, value): self.value = value - -def CodeGen(self): return Constant.real(Type.double(), self.value) - -Expression class for referencing a variable, like "a". -====================================================== - -class VariableExpressionNode(ExpressionNode): - -def **init**\ (self, name): self.name = name - -def CodeGen(self): if self.name in g\_named\_values: return -g\_named\_values[self.name] else: raise RuntimeError('Unknown variable -name: ' + self.name) - -Expression class for a binary operator. -======================================= - -class BinaryOperatorExpressionNode(ExpressionNode): - -def **init**\ (self, operator, left, right): self.operator = operator -self.left = left self.right = right - -def CodeGen(self): left = self.left.CodeGen() right = -self.right.CodeGen() - -:: - - if self.operator == '+': - return g_llvm_builder.fadd(left, right, 'addtmp') - elif self.operator == '-': - return g_llvm_builder.fsub(left, right, 'subtmp') - elif self.operator == '*': - return g_llvm_builder.fmul(left, right, 'multmp') - elif self.operator == '<': - result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') - # Convert bool 0 or 1 to double 0.0 or 1.0. - return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') - else: - raise RuntimeError('Unknown binary operator.') - -Expression class for function calls. -==================================== - -class CallExpressionNode(ExpressionNode): - -def **init**\ (self, callee, args): self.callee = callee self.args = -args - -def CodeGen(self): # Look up the name in the global module table. callee -= g\_llvm\_module.get\_function\_named(self.callee) - -:: - - # Check for argument mismatch error. - if len(callee.args) != len(self.args): - raise RuntimeError('Incorrect number of arguments passed.') - - arg_values = [i.CodeGen() for i in self.args] - - return g_llvm_builder.call(callee, arg_values, 'calltmp') - -This class represents the "prototype" for a function, which captures its name, -============================================================================== - -and its argument names (thus implicitly the number of arguments the function -============================================================================ - -takes). -======= - -class PrototypeNode(object): - -def **init**\ (self, name, args): self.name = name self.args = args - -def CodeGen(self): # Make the function type, eg. double(double,double). -funct\_type = Type.function( Type.double(), [Type.double()] \* -len(self.args), False) - -:: - - function = Function.new(g_llvm_module, funct_type, self.name) - - # If the name conflicted, there was already something with the same name. - # If it has a body, don't allow redefinition or reextern. - if function.name != self.name: - function.delete() - function = g_llvm_module.get_function_named(self.name) - - # If the function already has a body, reject this. - if not function.is_declaration: - raise RuntimeError('Redefinition of function.') - - # If F took a different number of args, reject. - if len(callee.args) != len(self.args): - raise RuntimeError('Redeclaration of a function with different number ' - 'of args.') - - # Set names for all arguments and add them to the variables symbol table. - for arg, arg_name in zip(function.args, self.args): - arg.name = arg_name - # Add arguments to variable symbol table. - g_named_values[arg_name] = arg - - return function - -This class represents a function definition itself. -=================================================== - -class FunctionNode(object): - -def **init**\ (self, prototype, body): self.prototype = prototype -self.body = body - -def CodeGen(self): # Clear scope. g\_named\_values.clear() - -:: - - # Create a function object. - function = self.prototype.CodeGen() - - # Create a new basic block to start insertion into. - block = function.append_basic_block('entry') - global g_llvm_builder - g_llvm_builder = Builder.new(block) - - # Finish off the function. - try: - return_value = self.body.CodeGen() - g_llvm_builder.ret(return_value) - - # Validate the generated code, checking for consistency. - function.verify() - except: - function.delete() - raise - - return function - -Parser ------- - -class Parser(object): - -def **init**\ (self, tokens, binop\_precedence): self.tokens = tokens -self.binop\_precedence = binop\_precedence self.Next() - -# Provide a simple token buffer. Parser.current is the current token the -# parser is looking at. Parser.Next() reads another token from the lexer -and # updates Parser.current with its results. def Next(self): -self.current = self.tokens.next() - -# Gets the precedence of the current token, or -1 if the token is not a -binary # operator. def GetCurrentTokenPrecedence(self): if -isinstance(self.current, CharacterToken): return -self.binop\_precedence.get(self.current.char, -1) else: return -1 - -# identifierexpr ::= identifier \| identifier '(' expression\* ')' def -ParseIdentifierExpr(self): identifier\_name = self.current.name -self.Next() # eat identifier. - -:: - - if self.current != CharacterToken('('): # Simple variable reference. - return VariableExpressionNode(identifier_name) - - # Call. - self.Next() # eat '('. - args = [] - if self.current != CharacterToken(')'): - while True: - args.append(self.ParseExpression()) - if self.current == CharacterToken(')'): - break - elif self.current != CharacterToken(','): - raise RuntimeError('Expected ")" or "," in argument list.') - self.Next() - - self.Next() # eat ')'. - return CallExpressionNode(identifier_name, args) - -# numberexpr ::= number def ParseNumberExpr(self): result = -NumberExpressionNode(self.current.value) self.Next() # consume the -number. return result - -# parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() -# eat '('. - -:: - - contents = self.ParseExpression() - - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")".') - self.Next() # eat ')'. - - return contents - -# primary ::= identifierexpr \| numberexpr \| parenexpr def -ParsePrimary(self): if isinstance(self.current, IdentifierToken): return -self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): -return self.ParseNumberExpr() elif self.current == CharacterToken('('): -return self.ParseParenExpr() else: raise RuntimeError('Unknown token -when expecting an expression.') - -# binoprhs ::= (operator primary)\* def ParseBinOpRHS(self, left, -left\_precedence): # If this is a binary operator, find its precedence. -while True: precedence = self.GetCurrentTokenPrecedence() - -:: - - # If this is a binary operator that binds at least as tightly as the - # current one, consume it; otherwise we are done. - if precedence < left_precedence: - return left - - binary_operator = self.current.char - self.Next() # eat the operator. - - # Parse the primary expression after the binary operator. - right = self.ParsePrimary() - - # If binary_operator binds less tightly with right than the operator after - # right, let the pending operator take right as its left. - next_precedence = self.GetCurrentTokenPrecedence() - if precedence < next_precedence: - right = self.ParseBinOpRHS(right, precedence + 1) - - # Merge left/right. - left = BinaryOperatorExpressionNode(binary_operator, left, right) - -# expression ::= primary binoprhs def ParseExpression(self): left = -self.ParsePrimary() return self.ParseBinOpRHS(left, 0) - -# prototype ::= id '(' id\* ')' def ParsePrototype(self): if not -isinstance(self.current, IdentifierToken): raise RuntimeError('Expected -function name in prototype.') - -:: - - function_name = self.current.name - self.Next() # eat function name. - - if self.current != CharacterToken('('): - raise RuntimeError('Expected "(" in prototype.') - self.Next() # eat '('. - - arg_names = [] - while isinstance(self.current, IdentifierToken): - arg_names.append(self.current.name) - self.Next() - - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")" in prototype.') - - # Success. - self.Next() # eat ')'. - - return PrototypeNode(function_name, arg_names) - -# definition ::= 'def' prototype expression def ParseDefinition(self): -self.Next() # eat def. proto = self.ParsePrototype() body = -self.ParseExpression() return FunctionNode(proto, body) - -# toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = -PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) - -# external ::= 'extern' prototype def ParseExtern(self): self.Next() # -eat extern. return self.ParsePrototype() - -# Top-Level parsing def HandleDefinition(self): -self.Handle(self.ParseDefinition, 'Read a function definition:') - -def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') - -def HandleTopLevelExpression(self): self.Handle(self.ParseTopLevelExpr, -'Read a top-level expression:') - -def Handle(self, function, message): try: print message, -function().CodeGen() except Exception, e: print 'Error:', e try: -self.Next() # Skip for error recovery. except: pass - -Main driver code. ------------------ - -def main(): # Install standard binary operators. # 1 is lowest possible -precedence. 40 is the highest. operator\_precedence = { '<': 10, '+': -20, '-': 20, '\*': 40 } - -# Run the main "interpreter loop". while True: print 'ready>', try: raw -= raw\_input() except KeyboardInterrupt: break - -:: - - parser = Parser(Tokenize(raw), operator_precedence) - while True: - # top ::= definition | external | expression | EOF - if isinstance(parser.current, EOFToken): - break - if isinstance(parser.current, DefToken): - parser.HandleDefinition() - elif isinstance(parser.current, ExternToken): - parser.HandleExtern() - else: - parser.HandleTopLevelExpression() - -# Print out all of the generated code. print '', g\_llvm\_module - -if **name** == '**main**\ ': main() {% endhighlight %} - --------------- - -**`Next: Adding JIT and Optimizer Support `_** +.. code-block:: python + + #!/usr/bin/env python + + import re from llvm.core import Module, Constant, Type, Function, + Builder, FCMP_ULT + + Globals + ------- + + # The LLVM module, which holds all the IR code. + g_llvm_module = Module.new('my cool jit') + + # The LLVM instruction builder. Created whenever a new function is entered. + g_llvm_builder = None + + # A dictionary that keeps track of which values are defined in the current scope + # and what their LLVM representation is. + g_named_values = {} + + Lexer + ----- + + # The lexer yields one of these types for each token. + class EOFToken(object): pass + + class DefToken(object): pass + + class ExternToken(object): pass + + class IdentifierToken(object): def **init**\ (self, name): self.name = + name + + class NumberToken(object): def **init**\ (self, value): self.value = + value + + class CharacterToken(object): def **init**\ (self, char): self.char = + char def **eq**\ (self, other): return isinstance(other, CharacterToken) + and self.char == other.char def **ne**\ (self, other): return not self + == other + + # Regular expressions that tokens and comments of our language. + REGEX_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX_IDENTIFIER = + re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX_COMMENT = re.compile('#.*') + + def Tokenize(string): while string: # Skip whitespace. if + string[0].isspace(): string = string[1:] continue + + :: + + # Run regexes. + comment_match = REGEX_COMMENT.match(string) + number_match = REGEX_NUMBER.match(string) + identifier_match = REGEX_IDENTIFIER.match(string) + + # Check if any of the regexes matched and yield the appropriate result. + if comment_match: + comment = comment_match.group(0) + string = string[len(comment):] + elif number_match: + number = number_match.group(0) + yield NumberToken(float(number)) + string = string[len(number):] + elif identifier_match: + identifier = identifier_match.group(0) + # Check if we matched a keyword. + if identifier == 'def': + yield DefToken() + elif identifier == 'extern': + yield ExternToken() + else: + yield IdentifierToken(identifier) + string = string[len(identifier):] + else: + # Yield the ASCII value of the unknown character. + yield CharacterToken(string[0]) + string = string[1:] + + yield EOFToken() + + Abstract Syntax Tree (aka Parse Tree) + ------------------------------------- + + # Base class for all expression nodes. + class ExpressionNode(object): pass + + # Expression class for numeric literals like "1.0". + class NumberExpressionNode(ExpressionNode): + + def **init**\ (self, value): self.value = value + + def CodeGen(self): return Constant.real(Type.double(), self.value) + + # Expression class for referencing a variable, like "a". + class VariableExpressionNode(ExpressionNode): + + def **init**\ (self, name): self.name = name + + def CodeGen(self): if self.name in g_named_values: return + g_named_values[self.name] else: raise RuntimeError('Unknown variable + name: ' + self.name) + + # Expression class for a binary operator. + class BinaryOperatorExpressionNode(ExpressionNode): + + def **init**\ (self, operator, left, right): self.operator = operator + self.left = left self.right = right + + def CodeGen(self): left = self.left.CodeGen() right = + self.right.CodeGen() + + :: + + if self.operator == '+': + return g_llvm_builder.fadd(left, right, 'addtmp') + elif self.operator == '-': + return g_llvm_builder.fsub(left, right, 'subtmp') + elif self.operator == '*': + return g_llvm_builder.fmul(left, right, 'multmp') + elif self.operator == '<': + result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') + # Convert bool 0 or 1 to double 0.0 or 1.0. + return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') + else: + raise RuntimeError('Unknown binary operator.') + + # Expression class for function calls. + class CallExpressionNode(ExpressionNode): + + def **init**\ (self, callee, args): self.callee = callee self.args = + args + + def CodeGen(self): # Look up the name in the global module table. callee + = g_llvm_module.get_function_named(self.callee) + + :: + + # Check for argument mismatch error. + if len(callee.args) != len(self.args): + raise RuntimeError('Incorrect number of arguments passed.') + + arg_values = [i.CodeGen() for i in self.args] + + return g_llvm_builder.call(callee, arg_values, 'calltmp') + + # This class represents the "prototype" for a function, which captures its name, + # and its argument names (thus implicitly the number of arguments the function + # takes). + class PrototypeNode(object): + + def **init**\ (self, name, args): self.name = name self.args = args + + def CodeGen(self): # Make the function type, eg. double(double,double). + funct_type = Type.function( Type.double(), [Type.double()] \* + len(self.args), False) + + :: + + function = Function.new(g_llvm_module, funct_type, self.name) + + # If the name conflicted, there was already something with the same name. + # If it has a body, don't allow redefinition or reextern. + if function.name != self.name: + function.delete() + function = g_llvm_module.get_function_named(self.name) + + # If the function already has a body, reject this. + if not function.is_declaration: + raise RuntimeError('Redefinition of function.') + + # If F took a different number of args, reject. + if len(callee.args) != len(self.args): + raise RuntimeError('Redeclaration of a function with different number ' + 'of args.') + + # Set names for all arguments and add them to the variables symbol table. + for arg, arg_name in zip(function.args, self.args): + arg.name = arg_name + # Add arguments to variable symbol table. + g_named_values[arg_name] = arg + + return function + + # This class represents a function definition itself. + class FunctionNode(object): + + def **init**\ (self, prototype, body): self.prototype = prototype + self.body = body + + def CodeGen(self): # Clear scope. g_named_values.clear() + + :: + + # Create a function object. + function = self.prototype.CodeGen() + + # Create a new basic block to start insertion into. + block = function.append_basic_block('entry') + global g_llvm_builder + g_llvm_builder = Builder.new(block) + + # Finish off the function. + try: + return_value = self.body.CodeGen() + g_llvm_builder.ret(return_value) + + # Validate the generated code, checking for consistency. + function.verify() + except: + function.delete() + raise + + return function + + Parser + ------ + + class Parser(object): + + def **init**\ (self, tokens, binop_precedence): self.tokens = tokens + self.binop_precedence = binop_precedence self.Next() + + # Provide a simple token buffer. Parser.current is the current token the + # parser is looking at. Parser.Next() reads another token from the lexer + and # updates Parser.current with its results. def Next(self): + self.current = self.tokens.next() + + # Gets the precedence of the current token, or -1 if the token is not a + binary # operator. def GetCurrentTokenPrecedence(self): if + isinstance(self.current, CharacterToken): return + self.binop_precedence.get(self.current.char, -1) else: return -1 + + # identifierexpr ::= identifier \| identifier '(' expression\* ')' def + ParseIdentifierExpr(self): identifier_name = self.current.name + self.Next() # eat identifier. + + :: + + if self.current != CharacterToken('('): # Simple variable reference. + return VariableExpressionNode(identifier_name) + + # Call. + self.Next() # eat '('. + args = [] + if self.current != CharacterToken(')'): + while True: + args.append(self.ParseExpression()) + if self.current == CharacterToken(')'): + break + elif self.current != CharacterToken(','): + raise RuntimeError('Expected ")" or "," in argument list.') + self.Next() + + self.Next() # eat ')'. + return CallExpressionNode(identifier_name, args) + + # numberexpr ::= number def ParseNumberExpr(self): result = + NumberExpressionNode(self.current.value) self.Next() # consume the + number. return result + + # parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() + # eat '('. + + :: + + contents = self.ParseExpression() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")".') + self.Next() # eat ')'. + + return contents + + # primary ::= identifierexpr \| numberexpr \| parenexpr def + ParsePrimary(self): if isinstance(self.current, IdentifierToken): return + self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): + return self.ParseNumberExpr() elif self.current == CharacterToken('('): + return self.ParseParenExpr() else: raise RuntimeError('Unknown token + when expecting an expression.') + + # binoprhs ::= (operator primary)\* def ParseBinOpRHS(self, left, + left_precedence): # If this is a binary operator, find its precedence. + while True: precedence = self.GetCurrentTokenPrecedence() + + :: + + # If this is a binary operator that binds at least as tightly as the + # current one, consume it; otherwise we are done. + if precedence < left_precedence: + return left + + binary_operator = self.current.char + self.Next() # eat the operator. + + # Parse the primary expression after the binary operator. + right = self.ParsePrimary() + + # If binary_operator binds less tightly with right than the operator after + # right, let the pending operator take right as its left. + next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: + right = self.ParseBinOpRHS(right, precedence + 1) + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right) + + # expression ::= primary binoprhs def ParseExpression(self): left = + self.ParsePrimary() return self.ParseBinOpRHS(left, 0) + + # prototype ::= id '(' id\* ')' def ParsePrototype(self): if not + isinstance(self.current, IdentifierToken): raise RuntimeError('Expected + function name in prototype.') + + :: + + function_name = self.current.name + self.Next() # eat function name. + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + return PrototypeNode(function_name, arg_names) + + # definition ::= 'def' prototype expression def ParseDefinition(self): + self.Next() # eat def. proto = self.ParsePrototype() body = + self.ParseExpression() return FunctionNode(proto, body) + + # toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = + PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) + + # external ::= 'extern' prototype def ParseExtern(self): self.Next() # + eat extern. return self.ParsePrototype() + + # Top-Level parsing def HandleDefinition(self): + self.Handle(self.ParseDefinition, 'Read a function definition:') + + def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') + + def HandleTopLevelExpression(self): self.Handle(self.ParseTopLevelExpr, + 'Read a top-level expression:') + + def Handle(self, function, message): try: print message, + function().CodeGen() except Exception, e: print 'Error:', e try: + self.Next() # Skip for error recovery. except: pass + + Main driver code. + ----------------- + + def main(): # Install standard binary operators. # 1 is lowest possible + precedence. 40 is the highest. operator_precedence = { '<': 10, '+': + 20, '-': 20, '\*': 40 } + + # Run the main "interpreter loop". while True: print 'ready>', try: raw + = raw_input() except KeyboardInterrupt: break + + :: + + parser = Parser(Tokenize(raw), operator_precedence) + while True: + # top ::= definition | external | expression | EOF + if isinstance(parser.current, EOFToken): + break + if isinstance(parser.current, DefToken): + parser.HandleDefinition() + elif isinstance(parser.current, ExternToken): + parser.HandleExtern() + else: + parser.HandleTopLevelExpression() + + # Print out all of the generated code. print '', g_llvm_module + + if **name** == '**main**\ ': main() diff --git a/docs/source/doc/kaleidoscope/PythonLangImpl4.rst b/docs/source/doc/kaleidoscope/PythonLangImpl4.rst index d9e32da..da6a5d8 100644 --- a/docs/source/doc/kaleidoscope/PythonLangImpl4.rst +++ b/docs/source/doc/kaleidoscope/PythonLangImpl4.rst @@ -25,17 +25,27 @@ Our demonstration for Chapter 3 is elegant and easy to extend. Unfortunately, it does not produce wonderful code. The LLVM Builder, however, does give us obvious optimizations when compiling simple code: -{% highlight bash %} ready> def test(x) 1+2+x Read function definition: -define double @test(double %x) { entry: %addtmp = fadd double -3.000000e+00, %x ret double %addtmp } {% endhighlight %} + +.. code-block:: bash + + ready> def test(x) 1+2+x Read function definition: + define double @test(double %x) { entry: %addtmp = fadd double + 3.000000e+00, %x ret double %addtmp } + + This code is not a literal transcription of the AST built by parsing the input. That would be: -{% highlight bash %} ready> def test(x) 1+2+x Read function definition: -define double @test(double %x) { entry: %addtmp = fadd double -2.000000e+00, 1.000000e+00 %addtmp1 = fadd double %addtmp, %x ret double -%addtmp1 } {% endhighlight %} + +.. code-block:: bash + + ready> def test(x) 1+2+x Read function definition: + define double @test(double %x) { entry: %addtmp = fadd double + 2.000000e+00, 1.000000e+00 %addtmp1 = fadd double %addtmp, %x ret double + %addtmp1 } + + Constant folding, as seen above, in particular, is a very common and very important optimization: so much so that many language implementors @@ -58,11 +68,16 @@ On the other hand, the ``Builder`` is limited by the fact that it does all of its analysis inline with the code as it is built. If you take a slightly more complex example: -{% highlight bash %} ready> def test(x) (1+2+x)\*(x+(1+2)) Read a -function definition: define double @test(double %x) { entry: %addtmp = -fadd double 3.000000e+00, %x ; [#uses=1] %addtmp1 = fadd double %x, -3.000000e+00 ; [#uses=1] %multmp = fmul double %addtmp, %addtmp1 ; -[#uses=1] ret double %multmp } {% endhighlight %} + +.. code-block:: bash + + ready> def test(x) (1+2+x)\*(x+(1+2)) Read a + function definition: define double @test(double %x) { entry: %addtmp = + fadd double 3.000000e+00, %x ; [#uses=1] %addtmp1 = fadd double %x, + 3.000000e+00 ; [#uses=1] %multmp = fmul double %addtmp, %addtmp1 ; + [#uses=1] ret double %multmp } + + In this case, the LHS and RHS of the multiplication are the same value. We'd really like to see this generate"``tmp = x+3; result = tmp*tmp;`` @@ -112,27 +127,30 @@ to hold and organize the LLVM optimizations that we want to run. Once we have that, we can add a set of optimizations to run. The code looks like this: -{% highlight python %} # The function optimization passes manager. -g\_llvm\_pass\_manager = FunctionPassManager.new(g\_llvm\_module) -The LLVM execution engine. -========================== +.. code-block:: python -g\_llvm\_executor = ExecutionEngine.new(g\_llvm\_module) + # The function optimization passes manager. + g_llvm_pass_manager = FunctionPassManager.new(g_llvm_module) + + # The LLVM execution engine. + g_llvm_executor = ExecutionEngine.new(g_llvm_module) + + ... + + def main(): # Set up the optimizer pipeline. Start with registering info + about how the # target lays out data structures. + g_llvm_pass_manager.add(g_llvm_executor.target_data) # Do simple + "peephole" optimizations and bit-twiddling optzns. + g_llvm_pass_manager.add(PASS_INSTRUCTION_COMBINING) # Reassociate + expressions. g_llvm_pass_manager.add(PASS_REASSOCIATE) # Eliminate + Common SubExpressions. g_llvm_pass_manager.add(PASS_GVN) # Simplify + the control flow graph (deleting unreachable blocks, etc). + g_llvm_pass_manager.add(PASS_CFG_SIMPLIFICATION) + + g_llvm_pass_manager.initialize() -... -def main(): # Set up the optimizer pipeline. Start with registering info -about how the # target lays out data structures. -g\_llvm\_pass\_manager.add(g\_llvm\_executor.target\_data) # Do simple -"peephole" optimizations and bit-twiddling optzns. -g\_llvm\_pass\_manager.add(PASS\_INSTRUCTION\_COMBINING) # Reassociate -expressions. g\_llvm\_pass\_manager.add(PASS\_REASSOCIATE) # Eliminate -Common SubExpressions. g\_llvm\_pass\_manager.add(PASS\_GVN) # Simplify -the control flow graph (deleting unreachable blocks, etc). -g\_llvm\_pass\_manager.add(PASS\_CFG\_SIMPLIFICATION) - -g\_llvm\_pass\_manager.initialize() {% endhighlight %} This code defines a ``FunctionPassManager``, ``g_llvm_pass_manager``. Once it is set up, we use a series of "add" calls to add a bunch of LLVM @@ -149,28 +167,38 @@ Once the pass manager is set up, we need to make use of it. We do this by running it after our newly created function is constructed (in ``FunctionNode.CodeGen``), but before it is returned to the client: -{% highlight python %} return\_value = self.body.CodeGen() -g\_llvm\_builder.ret(return\_value) -:: +.. code-block:: python - # Validate the generated code, checking for consistency. - function.verify() + return_value = self.body.CodeGen() + g_llvm_builder.ret(return_value) + + :: + + # Validate the generated code, checking for consistency. + function.verify() + + # Optimize the function. + g_llvm_pass_manager.run(function) + + - # Optimize the function. - g_llvm_pass_manager.run(function) -{% endhighlight %} As you can see, this is pretty straightforward. The ``FunctionPassManager`` optimizes and updates the LLVM Function in place, improving (hopefully) its body. With this in place, we can try our test above again: -{% highlight bash %} ready> def test(x) (1+2+x)\*(x+(1+2)) Read a -function definition: define double @test(double %x) { entry: %addtmp = -fadd double %x, 3.000000e+00 ; [#uses=2] %multmp = fmul double %addtmp, -%addtmp ; [#uses=1] ret double %multmp } {% endhighlight %} + +.. code-block:: bash + + ready> def test(x) (1+2+x)\*(x+(1+2)) Read a + function definition: define double @test(double %x) { entry: %addtmp = + fadd double %x, 3.000000e+00 ; [#uses=2] %multmp = fmul double %addtmp, + %addtmp ; [#uses=1] ret double %multmp } + + As expected, we now get our nicely optimized code, saving a floating point add instruction from every execution of this function. @@ -208,8 +236,13 @@ be able to call it from the command line. In order to do this, we first declare and initialize the JIT. This is done by adding and initializing a global variable: -{% highlight python %} # The LLVM execution engine. g\_llvm\_executor = -ExecutionEngine.new(g\_llvm\_module) {% endhighlight %} + +.. code-block:: python + + # The LLVM execution engine. g_llvm_executor = + ExecutionEngine.new(g_llvm_module) + + This creates an abstract "Execution Engine" which can be either a JIT compiler or the LLVM interpreter. LLVM will automatically pick a JIT @@ -222,38 +255,48 @@ compiled function and get its return value. In our case, this means that we can change the code that parses a top-level expression to look like this: -{% highlight python %} def HandleTopLevelExpression(self): try: function -= self.ParseTopLevelExpr().CodeGen() result = -g\_llvm\_executor.run\_function(function, []) print 'Evaluated to:', -result.as\_real(Type.double()) except Exception, e: print 'Error:', e -try: self.Next() # Skip for error recovery. except: pass {% endhighlight -%} -Recall that we compile top-level expressions into a self-contained LLVM -function that takes no arguments and returns the computed double. +.. code-block:: python -With just these two changes, lets see how Kaleidoscope works now! + def HandleTopLevelExpression(self): try: function + = self.ParseTopLevelExpr().CodeGen() result = + g_llvm_executor.run_function(function, []) print 'Evaluated to:', + result.as_real(Type.double()) except Exception, e: print 'Error:', e + try: self.Next() # Skip for error recovery. except: pass {% endhighlight + %} + + Recall that we compile top-level expressions into a self-contained LLVM + function that takes no arguments and returns the computed double. + + With just these two changes, lets see how Kaleidoscope works now! + + {% highlight python %} ready> 4+5 Read a top level expression: define + double @0() { entry: ret double 9.000000e+00 } + + Evaluated to: 9.0 -{% highlight python %} ready> 4+5 Read a top level expression: define -double @0() { entry: ret double 9.000000e+00 } -Evaluated to: 9.0 {% endhighlight %} Well this looks like it is basically working. The dump of the function shows the "no argument function that always returns double" that we synthesize for each top-level expression that is typed in. This demonstrates very basic functionality, but can we do more? -{% highlight python %} ready> def testfunc(x y) x + y\*2 Read a function -definition: define double @testfunc(double %x, double %y) { entry: -%multmp = fmul double %y, 2.000000e+00 ; [#uses=1] %addtmp = fadd double -%multmp, %x ; [#uses=1] ret double %addtmp } -ready> testfunc(4, 10) Read a top level expression: define double @0() { -entry: %calltmp = call double @testfunc(double 4.000000e+00, double -1.000000e+01) ; [#uses=1] ret double %calltmp } +.. code-block:: python + + ready> def testfunc(x y) x + y\*2 Read a function + definition: define double @testfunc(double %x, double %y) { entry: + %multmp = fmul double %y, 2.000000e+00 ; [#uses=1] %addtmp = fadd double + %multmp, %x ; [#uses=1] ret double %addtmp } + + ready> testfunc(4, 10) Read a top level expression: define double @0() { + entry: %calltmp = call double @testfunc(double 4.000000e+00, double + 1.000000e+01) ; [#uses=1] ret double %calltmp } + + *Evaluated to: 24.0* + -*Evaluated to: 24.0* {% endhighlight %} This illustrates that we can now call user code, but there is something a bit subtle going on here. Note that we only invoke the JIT on the @@ -269,23 +312,28 @@ etc. However, even with this simple code, we get some surprisingly powerful capabilities - check this out (I removed the dump of the anonymous functions, you should get the idea by now :) : -{% highlight bash %} ready> extern sin(x) Read an extern: declare double -@sin(double) -ready> extern cos(x) Read an extern: declare double @cos(double) +.. code-block:: bash -ready> sin(1.0) *Evaluated to: 0.841470984808* + ready> extern sin(x) Read an extern: declare double + @sin(double) + + ready> extern cos(x) Read an extern: declare double @cos(double) + + ready> sin(1.0) *Evaluated to: 0.841470984808* + + ready> def foo(x) sin(x)\ *sin(x) + cos(x)*\ cos(x) Read a function + definition: define double @foo(double %x) { entry: %calltmp = call + double @sin(double %x) ; [#uses=1] %calltmp1 = call double @sin(double + %x) ; [#uses=1] %multmp = fmul double %calltmp, %calltmp1 ; [#uses=1] + %calltmp2 = call double @cos(double %x) ; [#uses=1] %calltmp3 = call + double @cos(double %x) ; [#uses=1] %multmp4 = fmul double %calltmp2, + %calltmp3 ; [#uses=1] %addtmp = fadd double %multmp, %multmp4 ; + [#uses=1] ret double %addtmp } + + ready> foo(4.0) *Evaluated to: 1.000000* -ready> def foo(x) sin(x)\ *sin(x) + cos(x)*\ cos(x) Read a function -definition: define double @foo(double %x) { entry: %calltmp = call -double @sin(double %x) ; [#uses=1] %calltmp1 = call double @sin(double -%x) ; [#uses=1] %multmp = fmul double %calltmp, %calltmp1 ; [#uses=1] -%calltmp2 = call double @cos(double %x) ; [#uses=1] %calltmp3 = call -double @cos(double %x) ; [#uses=1] %multmp4 = fmul double %calltmp2, -%calltmp3 ; [#uses=1] %addtmp = fadd double %multmp, %multmp4 ; -[#uses=1] ret double %addtmp } -ready> foo(4.0) *Evaluated to: 1.000000* {% endhighlight %} Whoa, how does the JIT know about sin and cos? The answer is surprisingly simple: in this example, the JIT started execution of a @@ -301,27 +349,32 @@ One interesting application of this is that we can now extend the language by writing arbitrary C++ code to implement operations. For example, we can create a C file with the following simple function: -{% highlight c %} #include -double putchard(double x) { putchar((char)x); return 0; } {% -endhighlight %} +.. code-block:: c -We can then compile this into a shared library with GCC: + #include + + double putchard(double x) { putchar((char)x); return 0; } {% + endhighlight %} + + We can then compile this into a shared library with GCC: + + {% highlight bash %} gcc -shared -fPIC -o putchard.so putchard.c {% + endhighlight %} + + Now we can load this library into the Python process using + ``llvm.core.load_library_permanently`` and access it from Kaleidoscope + to produce simple output to the console: + + {% highlight python %} >>> import llvm.core >>> + llvm.core.load_library_permanently('/home/max/llvmpy-tutorial/putchard.so') + >>> import kaleidoscope >>> kaleidoscope.main() ready> extern + putchard(x) Read an extern: declare double @putchard(double) + + ready> putchard(65) + putchard(66) + putchard(67) + putchard(10) *ABC* + Evaluated to: 0.0 -{% highlight bash %} gcc -shared -fPIC -o putchard.so putchard.c {% -endhighlight %} -Now we can load this library into the Python process using -``llvm.core.load_library_permanently`` and access it from Kaleidoscope -to produce simple output to the console: - -{% highlight python %} >>> import llvm.core >>> -llvm.core.load\_library\_permanently('/home/max/llvmpy-tutorial/putchard.so') ->>> import kaleidoscope >>> kaleidoscope.main() ready> extern -putchard(x) Read an extern: declare double @putchard(double) - -ready> putchard(65) + putchard(66) + putchard(67) + putchard(10) *ABC* -Evaluated to: 0.0 {% endhighlight %} Similar code could be used to implement file I/O, console input, and many other capabilities in Kaleidoscope. @@ -341,450 +394,415 @@ Full Code Listing # {#code} Here is the complete code listing for our running example, enhanced with the LLVM JIT and optimizer: -{% highlight python %} #!/usr/bin/env python -import re from llvm.core import Module, Constant, Type, Function, -Builder, FCMP\_ULT from llvm.ee import ExecutionEngine, TargetData from -llvm.passes import FunctionPassManager from llvm.passes import -(PASS\_INSTRUCTION\_COMBINING, PASS\_REASSOCIATE, PASS\_GVN, -PASS\_CFG\_SIMPLIFICATION) - -Globals -------- - -The LLVM module, which holds all the IR code. -============================================= - -g\_llvm\_module = Module.new('my cool jit') - -The LLVM instruction builder. Created whenever a new function is entered. -========================================================================= - -g\_llvm\_builder = None - -A dictionary that keeps track of which values are defined in the current scope -============================================================================== - -and what their LLVM representation is. -====================================== - -g\_named\_values = {} - -The function optimization passes manager. -========================================= - -g\_llvm\_pass\_manager = FunctionPassManager.new(g\_llvm\_module) - -The LLVM execution engine. -========================== - -g\_llvm\_executor = ExecutionEngine.new(g\_llvm\_module) - -Lexer ------ - -The lexer yields one of these types for each token. -=================================================== - -class EOFToken(object): pass - -class DefToken(object): pass - -class ExternToken(object): pass - -class IdentifierToken(object): def **init**\ (self, name): self.name = -name - -class NumberToken(object): def **init**\ (self, value): self.value = -value - -class CharacterToken(object): def **init**\ (self, char): self.char = -char def **eq**\ (self, other): return isinstance(other, CharacterToken) -and self.char == other.char def **ne**\ (self, other): return not self -== other - -Regular expressions that tokens and comments of our language. -============================================================= - -REGEX\_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX\_IDENTIFIER = -re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX\_COMMENT = re.compile('#.*') - -def Tokenize(string): while string: # Skip whitespace. if -string[0].isspace(): string = string[1:] continue - -:: - - # Run regexes. - comment_match = REGEX_COMMENT.match(string) - number_match = REGEX_NUMBER.match(string) - identifier_match = REGEX_IDENTIFIER.match(string) - - # Check if any of the regexes matched and yield the appropriate result. - if comment_match: - comment = comment_match.group(0) - string = string[len(comment):] - elif number_match: - number = number_match.group(0) - yield NumberToken(float(number)) - string = string[len(number):] - elif identifier_match: - identifier = identifier_match.group(0) - # Check if we matched a keyword. - if identifier == 'def': - yield DefToken() - elif identifier == 'extern': - yield ExternToken() - else: - yield IdentifierToken(identifier) - string = string[len(identifier):] - else: - # Yield the ASCII value of the unknown character. - yield CharacterToken(string[0]) - string = string[1:] - -yield EOFToken() - -Abstract Syntax Tree (aka Parse Tree) -------------------------------------- - -Base class for all expression nodes. -==================================== - -class ExpressionNode(object): pass - -Expression class for numeric literals like "1.0". -================================================= - -class NumberExpressionNode(ExpressionNode): - -def **init**\ (self, value): self.value = value - -def CodeGen(self): return Constant.real(Type.double(), self.value) - -Expression class for referencing a variable, like "a". -====================================================== - -class VariableExpressionNode(ExpressionNode): - -def **init**\ (self, name): self.name = name - -def CodeGen(self): if self.name in g\_named\_values: return -g\_named\_values[self.name] else: raise RuntimeError('Unknown variable -name: ' + self.name) - -Expression class for a binary operator. -======================================= - -class BinaryOperatorExpressionNode(ExpressionNode): - -def **init**\ (self, operator, left, right): self.operator = operator -self.left = left self.right = right - -def CodeGen(self): left = self.left.CodeGen() right = -self.right.CodeGen() - -:: - - if self.operator == '+': - return g_llvm_builder.fadd(left, right, 'addtmp') - elif self.operator == '-': - return g_llvm_builder.fsub(left, right, 'subtmp') - elif self.operator == '*': - return g_llvm_builder.fmul(left, right, 'multmp') - elif self.operator == '<': - result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') - # Convert bool 0 or 1 to double 0.0 or 1.0. - return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') - else: - raise RuntimeError('Unknown binary operator.') - -Expression class for function calls. -==================================== - -class CallExpressionNode(ExpressionNode): - -def **init**\ (self, callee, args): self.callee = callee self.args = -args - -def CodeGen(self): # Look up the name in the global module table. callee -= g\_llvm\_module.get\_function\_named(self.callee) - -:: - - # Check for argument mismatch error. - if len(callee.args) != len(self.args): - raise RuntimeError('Incorrect number of arguments passed.') - - arg_values = [i.CodeGen() for i in self.args] - - return g_llvm_builder.call(callee, arg_values, 'calltmp') - -This class represents the "prototype" for a function, which captures its name, -============================================================================== - -and its argument names (thus implicitly the number of arguments the function -============================================================================ - -takes). -======= - -class PrototypeNode(object): - -def **init**\ (self, name, args): self.name = name self.args = args - -def CodeGen(self): # Make the function type, eg. double(double,double). -funct\_type = Type.function( Type.double(), [Type.double()] \* -len(self.args), False) - -:: - - function = Function.new(g_llvm_module, funct_type, self.name) - - # If the name conflicted, there was already something with the same name. - # If it has a body, don't allow redefinition or reextern. - if function.name != self.name: - function.delete() - function = g_llvm_module.get_function_named(self.name) - - # If the function already has a body, reject this. - if not function.is_declaration: - raise RuntimeError('Redefinition of function.') - - # If F took a different number of args, reject. - if len(callee.args) != len(self.args): - raise RuntimeError('Redeclaration of a function with different number ' - 'of args.') - - # Set names for all arguments and add them to the variables symbol table. - for arg, arg_name in zip(function.args, self.args): - arg.name = arg_name - # Add arguments to variable symbol table. - g_named_values[arg_name] = arg - - return function - -This class represents a function definition itself. -=================================================== - -class FunctionNode(object): - -def **init**\ (self, prototype, body): self.prototype = prototype -self.body = body - -def CodeGen(self): # Clear scope. g\_named\_values.clear() - -:: - - # Create a function object. - function = self.prototype.CodeGen() - - # Create a new basic block to start insertion into. - block = function.append_basic_block('entry') - global g_llvm_builder - g_llvm_builder = Builder.new(block) - - # Finish off the function. - try: - return_value = self.body.CodeGen() - g_llvm_builder.ret(return_value) - - # Validate the generated code, checking for consistency. - function.verify() - - # Optimize the function. - g_llvm_pass_manager.run(function) - except: - function.delete() - raise - - return function - -Parser ------- - -class Parser(object): - -def **init**\ (self, tokens, binop\_precedence): self.tokens = tokens -self.binop\_precedence = binop\_precedence self.Next() - -# Provide a simple token buffer. Parser.current is the current token the -# parser is looking at. Parser.Next() reads another token from the lexer -and # updates Parser.current with its results. def Next(self): -self.current = self.tokens.next() - -# Gets the precedence of the current token, or -1 if the token is not a -binary # operator. def GetCurrentTokenPrecedence(self): if -isinstance(self.current, CharacterToken): return -self.binop\_precedence.get(self.current.char, -1) else: return -1 - -# identifierexpr ::= identifier \| identifier '(' expression\* ')' def -ParseIdentifierExpr(self): identifier\_name = self.current.name -self.Next() # eat identifier. - -:: - - if self.current != CharacterToken('('): # Simple variable reference. - return VariableExpressionNode(identifier_name) - - # Call. - self.Next() # eat '('. - args = [] - if self.current != CharacterToken(')'): - while True: - args.append(self.ParseExpression()) - if self.current == CharacterToken(')'): - break - elif self.current != CharacterToken(','): - raise RuntimeError('Expected ")" or "," in argument list.') - self.Next() - - self.Next() # eat ')'. - return CallExpressionNode(identifier_name, args) - -# numberexpr ::= number def ParseNumberExpr(self): result = -NumberExpressionNode(self.current.value) self.Next() # consume the -number. return result - -# parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() -# eat '('. - -:: - - contents = self.ParseExpression() - - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")".') - self.Next() # eat ')'. - - return contents - -# primary ::= identifierexpr \| numberexpr \| parenexpr def -ParsePrimary(self): if isinstance(self.current, IdentifierToken): return -self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): -return self.ParseNumberExpr() elif self.current == CharacterToken('('): -return self.ParseParenExpr() else: raise RuntimeError('Unknown token -when expecting an expression.') - -# binoprhs ::= (operator primary)\* def ParseBinOpRHS(self, left, -left\_precedence): # If this is a binary operator, find its precedence. -while True: precedence = self.GetCurrentTokenPrecedence() - -:: - - # If this is a binary operator that binds at least as tightly as the - # current one, consume it; otherwise we are done. - if precedence < left_precedence: - return left - - binary_operator = self.current.char - self.Next() # eat the operator. - - # Parse the primary expression after the binary operator. - right = self.ParsePrimary() - - # If binary_operator binds less tightly with right than the operator after - # right, let the pending operator take right as its left. - next_precedence = self.GetCurrentTokenPrecedence() - if precedence < next_precedence: - right = self.ParseBinOpRHS(right, precedence + 1) - - # Merge left/right. - left = BinaryOperatorExpressionNode(binary_operator, left, right) - -# expression ::= primary binoprhs def ParseExpression(self): left = -self.ParsePrimary() return self.ParseBinOpRHS(left, 0) - -# prototype ::= id '(' id\* ')' def ParsePrototype(self): if not -isinstance(self.current, IdentifierToken): raise RuntimeError('Expected -function name in prototype.') - -:: - - function_name = self.current.name - self.Next() # eat function name. - - if self.current != CharacterToken('('): - raise RuntimeError('Expected "(" in prototype.') - self.Next() # eat '('. - - arg_names = [] - while isinstance(self.current, IdentifierToken): - arg_names.append(self.current.name) - self.Next() - - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")" in prototype.') - - # Success. - self.Next() # eat ')'. - - return PrototypeNode(function_name, arg_names) - -# definition ::= 'def' prototype expression def ParseDefinition(self): -self.Next() # eat def. proto = self.ParsePrototype() body = -self.ParseExpression() return FunctionNode(proto, body) - -# toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = -PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) - -# external ::= 'extern' prototype def ParseExtern(self): self.Next() # -eat extern. return self.ParsePrototype() - -# Top-Level parsing def HandleDefinition(self): -self.Handle(self.ParseDefinition, 'Read a function definition:') - -def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') - -def HandleTopLevelExpression(self): try: function = -self.ParseTopLevelExpr().CodeGen() result = -g\_llvm\_executor.run\_function(function, []) print 'Evaluated to:', -result.as\_real(Type.double()) except Exception, e: print 'Error:', e -try: self.Next() # Skip for error recovery. except: pass - -def Handle(self, function, message): try: print message, -function().CodeGen() except Exception, e: print 'Error:', e try: -self.Next() # Skip for error recovery. except: pass - -Main driver code. ------------------ - -def main(): # Set up the optimizer pipeline. Start with registering info -about how the # target lays out data structures. -g\_llvm\_pass\_manager.add(g\_llvm\_executor.target\_data) # Do simple -"peephole" optimizations and bit-twiddling optzns. -g\_llvm\_pass\_manager.add(PASS\_INSTRUCTION\_COMBINING) # Reassociate -expressions. g\_llvm\_pass\_manager.add(PASS\_REASSOCIATE) # Eliminate -Common SubExpressions. g\_llvm\_pass\_manager.add(PASS\_GVN) # Simplify -the control flow graph (deleting unreachable blocks, etc). -g\_llvm\_pass\_manager.add(PASS\_CFG\_SIMPLIFICATION) - -g\_llvm\_pass\_manager.initialize() - -# Install standard binary operators. # 1 is lowest possible precedence. -40 is the highest. operator\_precedence = { '<': 10, '+': 20, '-': 20, -'\*': 40 } - -# Run the main "interpreter loop". while True: print 'ready>', try: raw -= raw\_input() except KeyboardInterrupt: break - -:: - - parser = Parser(Tokenize(raw), operator_precedence) - while True: - # top ::= definition | external | expression | EOF - if isinstance(parser.current, EOFToken): - break - if isinstance(parser.current, DefToken): - parser.HandleDefinition() - elif isinstance(parser.current, ExternToken): - parser.HandleExtern() - else: - parser.HandleTopLevelExpression() - -# Print out all of the generated code. print '', g\_llvm\_module - -if **name** == '**main**\ ': main() {% endhighlight %} - --------------- - -**`Next: Extending the language: control flow `_** +.. code-block:: python + + #!/usr/bin/env python + + import re from llvm.core import Module, Constant, Type, Function, + Builder, FCMP_ULT from llvm.ee import ExecutionEngine, TargetData from + llvm.passes import FunctionPassManager from llvm.passes import + (PASS_INSTRUCTION_COMBINING, PASS_REASSOCIATE, PASS_GVN, + PASS_CFG_SIMPLIFICATION) + + Globals + ------- + + # The LLVM module, which holds all the IR code. + g_llvm_module = Module.new('my cool jit') + + # The LLVM instruction builder. Created whenever a new function is entered. + g_llvm_builder = None + + # A dictionary that keeps track of which values are defined in the current scope + # and what their LLVM representation is. + g_named_values = {} + + # The function optimization passes manager. + g_llvm_pass_manager = FunctionPassManager.new(g_llvm_module) + + # The LLVM execution engine. + g_llvm_executor = ExecutionEngine.new(g_llvm_module) + + Lexer + ----- + + # The lexer yields one of these types for each token. + class EOFToken(object): pass + + class DefToken(object): pass + + class ExternToken(object): pass + + class IdentifierToken(object): def **init**\ (self, name): self.name = + name + + class NumberToken(object): def **init**\ (self, value): self.value = + value + + class CharacterToken(object): def **init**\ (self, char): self.char = + char def **eq**\ (self, other): return isinstance(other, CharacterToken) + and self.char == other.char def **ne**\ (self, other): return not self + == other + + # Regular expressions that tokens and comments of our language. + REGEX_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX_IDENTIFIER = + re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX_COMMENT = re.compile('#.*') + + def Tokenize(string): while string: # Skip whitespace. if + string[0].isspace(): string = string[1:] continue + + :: + + # Run regexes. + comment_match = REGEX_COMMENT.match(string) + number_match = REGEX_NUMBER.match(string) + identifier_match = REGEX_IDENTIFIER.match(string) + + # Check if any of the regexes matched and yield the appropriate result. + if comment_match: + comment = comment_match.group(0) + string = string[len(comment):] + elif number_match: + number = number_match.group(0) + yield NumberToken(float(number)) + string = string[len(number):] + elif identifier_match: + identifier = identifier_match.group(0) + # Check if we matched a keyword. + if identifier == 'def': + yield DefToken() + elif identifier == 'extern': + yield ExternToken() + else: + yield IdentifierToken(identifier) + string = string[len(identifier):] + else: + # Yield the ASCII value of the unknown character. + yield CharacterToken(string[0]) + string = string[1:] + + yield EOFToken() + + Abstract Syntax Tree (aka Parse Tree) + ------------------------------------- + + # Base class for all expression nodes. + class ExpressionNode(object): pass + + # Expression class for numeric literals like "1.0". + class NumberExpressionNode(ExpressionNode): + + def **init**\ (self, value): self.value = value + + def CodeGen(self): return Constant.real(Type.double(), self.value) + + # Expression class for referencing a variable, like "a". + class VariableExpressionNode(ExpressionNode): + + def **init**\ (self, name): self.name = name + + def CodeGen(self): if self.name in g_named_values: return + g_named_values[self.name] else: raise RuntimeError('Unknown variable + name: ' + self.name) + + # Expression class for a binary operator. + class BinaryOperatorExpressionNode(ExpressionNode): + + def **init**\ (self, operator, left, right): self.operator = operator + self.left = left self.right = right + + def CodeGen(self): left = self.left.CodeGen() right = + self.right.CodeGen() + + :: + + if self.operator == '+': + return g_llvm_builder.fadd(left, right, 'addtmp') + elif self.operator == '-': + return g_llvm_builder.fsub(left, right, 'subtmp') + elif self.operator == '*': + return g_llvm_builder.fmul(left, right, 'multmp') + elif self.operator == '<': + result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') + # Convert bool 0 or 1 to double 0.0 or 1.0. + return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') + else: + raise RuntimeError('Unknown binary operator.') + + # Expression class for function calls. + class CallExpressionNode(ExpressionNode): + + def **init**\ (self, callee, args): self.callee = callee self.args = + args + + def CodeGen(self): # Look up the name in the global module table. callee + = g_llvm_module.get_function_named(self.callee) + + :: + + # Check for argument mismatch error. + if len(callee.args) != len(self.args): + raise RuntimeError('Incorrect number of arguments passed.') + + arg_values = [i.CodeGen() for i in self.args] + + return g_llvm_builder.call(callee, arg_values, 'calltmp') + + # This class represents the "prototype" for a function, which captures its name, + # and its argument names (thus implicitly the number of arguments the function + # takes). + class PrototypeNode(object): + + def **init**\ (self, name, args): self.name = name self.args = args + + def CodeGen(self): # Make the function type, eg. double(double,double). + funct_type = Type.function( Type.double(), [Type.double()] \* + len(self.args), False) + + :: + + function = Function.new(g_llvm_module, funct_type, self.name) + + # If the name conflicted, there was already something with the same name. + # If it has a body, don't allow redefinition or reextern. + if function.name != self.name: + function.delete() + function = g_llvm_module.get_function_named(self.name) + + # If the function already has a body, reject this. + if not function.is_declaration: + raise RuntimeError('Redefinition of function.') + + # If F took a different number of args, reject. + if len(callee.args) != len(self.args): + raise RuntimeError('Redeclaration of a function with different number ' + 'of args.') + + # Set names for all arguments and add them to the variables symbol table. + for arg, arg_name in zip(function.args, self.args): + arg.name = arg_name + # Add arguments to variable symbol table. + g_named_values[arg_name] = arg + + return function + + # This class represents a function definition itself. + class FunctionNode(object): + + def **init**\ (self, prototype, body): self.prototype = prototype + self.body = body + + def CodeGen(self): # Clear scope. g_named_values.clear() + + :: + + # Create a function object. + function = self.prototype.CodeGen() + + # Create a new basic block to start insertion into. + block = function.append_basic_block('entry') + global g_llvm_builder + g_llvm_builder = Builder.new(block) + + # Finish off the function. + try: + return_value = self.body.CodeGen() + g_llvm_builder.ret(return_value) + + # Validate the generated code, checking for consistency. + function.verify() + + # Optimize the function. + g_llvm_pass_manager.run(function) + except: + function.delete() + raise + + return function + + Parser + ------ + + class Parser(object): + + def **init**\ (self, tokens, binop_precedence): self.tokens = tokens + self.binop_precedence = binop_precedence self.Next() + + # Provide a simple token buffer. Parser.current is the current token the + # parser is looking at. Parser.Next() reads another token from the lexer + and # updates Parser.current with its results. def Next(self): + self.current = self.tokens.next() + + # Gets the precedence of the current token, or -1 if the token is not a + binary # operator. def GetCurrentTokenPrecedence(self): if + isinstance(self.current, CharacterToken): return + self.binop_precedence.get(self.current.char, -1) else: return -1 + + # identifierexpr ::= identifier \| identifier '(' expression\* ')' def + ParseIdentifierExpr(self): identifier_name = self.current.name + self.Next() # eat identifier. + + :: + + if self.current != CharacterToken('('): # Simple variable reference. + return VariableExpressionNode(identifier_name) + + # Call. + self.Next() # eat '('. + args = [] + if self.current != CharacterToken(')'): + while True: + args.append(self.ParseExpression()) + if self.current == CharacterToken(')'): + break + elif self.current != CharacterToken(','): + raise RuntimeError('Expected ")" or "," in argument list.') + self.Next() + + self.Next() # eat ')'. + return CallExpressionNode(identifier_name, args) + + # numberexpr ::= number def ParseNumberExpr(self): result = + NumberExpressionNode(self.current.value) self.Next() # consume the + number. return result + + # parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() + # eat '('. + + :: + + contents = self.ParseExpression() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")".') + self.Next() # eat ')'. + + return contents + + # primary ::= identifierexpr \| numberexpr \| parenexpr def + ParsePrimary(self): if isinstance(self.current, IdentifierToken): return + self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): + return self.ParseNumberExpr() elif self.current == CharacterToken('('): + return self.ParseParenExpr() else: raise RuntimeError('Unknown token + when expecting an expression.') + + # binoprhs ::= (operator primary)\* def ParseBinOpRHS(self, left, + left_precedence): # If this is a binary operator, find its precedence. + while True: precedence = self.GetCurrentTokenPrecedence() + + :: + + # If this is a binary operator that binds at least as tightly as the + # current one, consume it; otherwise we are done. + if precedence < left_precedence: + return left + + binary_operator = self.current.char + self.Next() # eat the operator. + + # Parse the primary expression after the binary operator. + right = self.ParsePrimary() + + # If binary_operator binds less tightly with right than the operator after + # right, let the pending operator take right as its left. + next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: + right = self.ParseBinOpRHS(right, precedence + 1) + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right) + + # expression ::= primary binoprhs def ParseExpression(self): left = + self.ParsePrimary() return self.ParseBinOpRHS(left, 0) + + # prototype ::= id '(' id\* ')' def ParsePrototype(self): if not + isinstance(self.current, IdentifierToken): raise RuntimeError('Expected + function name in prototype.') + + :: + + function_name = self.current.name + self.Next() # eat function name. + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + return PrototypeNode(function_name, arg_names) + + # definition ::= 'def' prototype expression def ParseDefinition(self): + self.Next() # eat def. proto = self.ParsePrototype() body = + self.ParseExpression() return FunctionNode(proto, body) + + # toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = + PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) + + # external ::= 'extern' prototype def ParseExtern(self): self.Next() # + eat extern. return self.ParsePrototype() + + # Top-Level parsing def HandleDefinition(self): + self.Handle(self.ParseDefinition, 'Read a function definition:') + + def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') + + def HandleTopLevelExpression(self): try: function = + self.ParseTopLevelExpr().CodeGen() result = + g_llvm_executor.run_function(function, []) print 'Evaluated to:', + result.as_real(Type.double()) except Exception, e: print 'Error:', e + try: self.Next() # Skip for error recovery. except: pass + + def Handle(self, function, message): try: print message, + function().CodeGen() except Exception, e: print 'Error:', e try: + self.Next() # Skip for error recovery. except: pass + + Main driver code. + ----------------- + + def main(): # Set up the optimizer pipeline. Start with registering info + about how the # target lays out data structures. + g_llvm_pass_manager.add(g_llvm_executor.target_data) # Do simple + "peephole" optimizations and bit-twiddling optzns. + g_llvm_pass_manager.add(PASS_INSTRUCTION_COMBINING) # Reassociate + expressions. g_llvm_pass_manager.add(PASS_REASSOCIATE) # Eliminate + Common SubExpressions. g_llvm_pass_manager.add(PASS_GVN) # Simplify + the control flow graph (deleting unreachable blocks, etc). + g_llvm_pass_manager.add(PASS_CFG_SIMPLIFICATION) + + g_llvm_pass_manager.initialize() + + # Install standard binary operators. # 1 is lowest possible precedence. + 40 is the highest. operator_precedence = { '<': 10, '+': 20, '-': 20, + '\*': 40 } + + # Run the main "interpreter loop". while True: print 'ready>', try: raw + = raw_input() except KeyboardInterrupt: break + + :: + + parser = Parser(Tokenize(raw), operator_precedence) + while True: + # top ::= definition | external | expression | EOF + if isinstance(parser.current, EOFToken): + break + if isinstance(parser.current, DefToken): + parser.HandleDefinition() + elif isinstance(parser.current, ExternToken): + parser.HandleExtern() + else: + parser.HandleTopLevelExpression() + + # Print out all of the generated code. print '', g_llvm_module + + if **name** == '**main**\ ': main() diff --git a/docs/source/doc/kaleidoscope/PythonLangImpl5.rst b/docs/source/doc/kaleidoscope/PythonLangImpl5.rst index e66944e..d4d01fa 100644 --- a/docs/source/doc/kaleidoscope/PythonLangImpl5.rst +++ b/docs/source/doc/kaleidoscope/PythonLangImpl5.rst @@ -34,8 +34,13 @@ Before we get going on "how" we add this extension, lets talk about "what" we want. The basic idea is that we want to be able to write this sort of thing: -{% highlight python %} def fib(x) if x < 3 then 1 else fib(x-1) + -fib(x-2) {% endhighlight %} + +.. code-block:: python + + def fib(x) if x < 3 then 1 else fib(x-1) + + fib(x-2) + + In Kaleidoscope, every construct is an expression: there are no statements. As such, the if/then/else expression needs to return a value @@ -61,31 +66,46 @@ Lexer Extensions for If/Then/Else ## {#iflexer} The lexer extensions are straightforward. First we add new token classes for the relevant tokens: -{% highlight python %} class IfToken(object): pass class -ThenToken(object): pass class ElseToken(object): pass {% endhighlight %} + +.. code-block:: python + + class IfToken(object): pass class + ThenToken(object): pass class ElseToken(object): pass + + Once we have that, we recognize the new keywords in the lexer. This is pretty simple stuff: -{% highlight python %} ... if identifier == 'def': yield DefToken() elif -identifier == 'extern': yield ExternToken() elif identifier == 'if': -yield IfToken() elif identifier == 'then': yield ThenToken() elif -identifier == 'else': yield ElseToken() else: yield -IdentifierToken(identifier) {% endhighlight %} + +.. code-block:: python + + ... if identifier == 'def': yield DefToken() elif + identifier == 'extern': yield ExternToken() elif identifier == 'if': + yield IfToken() elif identifier == 'then': yield ThenToken() elif + identifier == 'else': yield ElseToken() else: yield + IdentifierToken(identifier) + + AST Extensions for If/Then/Else ## {#ifast} ------------------------------------------- To represent the new expression we add a new AST node for it: -{% highlight python %} # Expression class for if/then/else. class -IfExpressionNode(ExpressionNode): -def **init**\ (self, condition, then\_branch, else\_branch): -self.condition = condition self.then\_branch = then\_branch -self.else\_branch = else\_branch +.. code-block:: python + + # Expression class for if/then/else. class + IfExpressionNode(ExpressionNode): + + def **init**\ (self, condition, then_branch, else_branch): + self.condition = condition self.then_branch = then_branch + self.else_branch = else_branch + + def CodeGen(self): ... + -def CodeGen(self): ... {% endhighlight %} The AST node just has pointers to the various subexpressions. @@ -96,39 +116,49 @@ Now that we have the relevant tokens coming from the lexer and we have the AST node to build, our parsing logic is relatively straightforward. First we define a new parsing function: -{% highlight python %} # ifexpr ::= 'if' expression 'then' expression -'else' expression def ParseIfExpr(self): self.Next() # eat the if. -:: +.. code-block:: python - # condition. - condition = self.ParseExpression() + # ifexpr ::= 'if' expression 'then' expression + 'else' expression def ParseIfExpr(self): self.Next() # eat the if. + + :: + + # condition. + condition = self.ParseExpression() + + if not isinstance(self.current, ThenToken): + raise RuntimeError('Expected "then".') + self.Next() # eat the then. + + then_branch = self.ParseExpression() + + if not isinstance(self.current, ElseToken): + raise RuntimeError('Expected "else".') + self.Next() # eat the else. + + else_branch = self.ParseExpression() + + return IfExpressionNode(condition, then_branch, else_branch) + + - if not isinstance(self.current, ThenToken): - raise RuntimeError('Expected "then".') - self.Next() # eat the then. - then_branch = self.ParseExpression() - - if not isinstance(self.current, ElseToken): - raise RuntimeError('Expected "else".') - self.Next() # eat the else. - - else_branch = self.ParseExpression() - - return IfExpressionNode(condition, then_branch, else_branch) - -{% endhighlight %} Next we hook it up as a primary expression: -{% highlight python %} def ParsePrimary(self): if -isinstance(self.current, IdentifierToken): return -self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): -return self.ParseNumberExpr(); elif isinstance(self.current, IfToken): -return self.ParseIfExpr() elif self.current == CharacterToken('('): -return self.ParseParenExpr() else: raise RuntimeError('Unknown token -when expecting an expression.') {% endhighlight %} + +.. code-block:: python + + def ParsePrimary(self): if + isinstance(self.current, IdentifierToken): return + self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): + return self.ParseNumberExpr(); elif isinstance(self.current, IfToken): + return self.ParseIfExpr() elif self.current == CharacterToken('('): + return self.ParseParenExpr() else: raise RuntimeError('Unknown token + when expecting an expression.') + + LLVM IR for If/Then/Else ## {#ifir} ----------------------------------- @@ -142,19 +172,29 @@ described in previous chapters. To motivate the code we want to produce, lets take a look at a simple example. Consider: -{% highlight python %} extern foo(); extern bar(); def baz(x) if x then -foo() else bar(); {% endhighlight %} + +.. code-block:: python + + extern foo(); extern bar(); def baz(x) if x then + foo() else bar(); + + If you disable optimizations, the code you'll (soon) get from Kaleidoscope looks something like this: -{% highlight llvm %} declare double @foo() declare double @bar() define -double @baz(double %x) { entry: %ifcond = fcmp one double %x, -0.000000e+00 br i1 %ifcond, label %then, label %else then: ; preds = -%entry %calltmp1 = call double @bar() else: ; preds = %entry %calltmp1 = -call double @bar() br label %ifcont ifcont: ; preds = %else, %then -%iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ] ret double -%iftmp } {% endhighlight %} + +.. code-block:: llvm + + declare double @foo() declare double @bar() define + double @baz(double %x) { entry: %ifcond = fcmp one double %x, + 0.000000e+00 br i1 %ifcond, label %then, label %else then: ; preds = + %entry %calltmp1 = call double @bar() else: ; preds = %entry %calltmp1 = + call double @bar() br label %ifcont ifcont: ; preds = %else, %then + %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ] ret double + %iftmp } + + To visualize the control flow graph, you can use a nifty feature of the LLVM `opt `_ tool. If you put this LLVM @@ -226,34 +266,44 @@ Code Generation for If/Then/Else ## {#ifcodegen} In order to generate code for this, we implement the ``Codegen`` method for ``IfExpressionNode``: -{% highlight python %} def CodeGen(self): condition = -self.condition.CodeGen() -:: +.. code-block:: python + + def CodeGen(self): condition = + self.condition.CodeGen() + + :: + + # Convert condition to a bool by comparing equal to 0.0. + condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, condition, Constant.real(Type.double(), 0), 'ifcond') + + - # Convert condition to a bool by comparing equal to 0.0. - condition_bool = g_llvm_builder.fcmp( - FCMP_ONE, condition, Constant.real(Type.double(), 0), 'ifcond') -{% endhighlight %} This code is straightforward and similar to what we saw before. We emit the expression for the condition, then compare that value to zero to get a truth value as a 1-bit (bool) value. -{% highlight python %} function = g\_llvm\_builder.basic\_block.function -:: +.. code-block:: python - # Create blocks for the then and else cases. Insert the 'then' block at the - # end of the function. - then_block = function.append_basic_block('then') - else_block = function.append_basic_block('else') - merge_block = function.append_basic_block('ifcond') + function = g_llvm_builder.basic_block.function + + :: + + # Create blocks for the then and else cases. Insert the 'then' block at the + # end of the function. + then_block = function.append_basic_block('then') + else_block = function.append_basic_block('else') + merge_block = function.append_basic_block('ifcond') + + g_llvm_builder.cbranch(condition_bool, then_block, else_block) + + - g_llvm_builder.cbranch(condition_bool, then_block, else_block) -{% endhighlight %} This code creates the basic blocks that are related to the if/then/else statement, and correspond directly to the blocks in the example above. @@ -268,17 +318,22 @@ can emit the conditional branch that chooses between them. Note that creating new blocks does not implicitly affect the Builder, so it is still inserting into the block that the condition went into. -{% highlight python %} # Emit then value. -g\_llvm\_builder.position\_at\_end(then\_block) then\_value = -self.then\_branch.CodeGen() g\_llvm\_builder.branch(merge\_block) -:: +.. code-block:: python + + # Emit then value. + g_llvm_builder.position_at_end(then_block) then_value = + self.then_branch.CodeGen() g_llvm_builder.branch(merge_block) + + :: + + # Codegen of 'Then' can change the current block; update then_block for the + # PHI node. + then_block = g_llvm_builder.basic_block + + - # Codegen of 'Then' can change the current block; update then_block for the - # PHI node. - then_block = g_llvm_builder.basic_block -{% endhighlight %} After the conditional branch is inserted, we move the builder to start inserting into the "then" block. Strictly speaking, this call moves the @@ -310,17 +365,22 @@ expression. Because calling Codegen recursively could arbitrarily change the notion of the current block, we are required to get an up-to-date value for code that will set up the Phi node. -{% highlight python %} # Emit else block. -g\_llvm\_builder.position\_at\_end(else\_block) else\_value = -self.else\_branch.CodeGen() g\_llvm\_builder.branch(merge\_block) -:: +.. code-block:: python + + # Emit else block. + g_llvm_builder.position_at_end(else_block) else_value = + self.else_branch.CodeGen() g_llvm_builder.branch(merge_block) + + :: + + # Codegen of 'Else' can change the current block, update else_block for the + # PHI node. + else_block = g_llvm_builder.basic_block + + - # Codegen of 'Else' can change the current block, update else_block for the - # PHI node. - else_block = g_llvm_builder.basic_block -{% endhighlight %} Code generation for the 'else' block is basically identical to codegen for the 'then' block. The only significant difference is the first line, @@ -329,17 +389,22 @@ which adds the 'else' block to the function. Recall previously that the 'then' and 'else' blocks are emitted, we can finish up with the merge code: -{% highlight python %} # Emit merge block. -g\_llvm\_builder.position\_at\_end(merge\_block) phi = -g\_llvm\_builder.phi(Type.double(), 'iftmp') -phi.add\_incoming(then\_value, then\_block) -phi.add\_incoming(else\_value, else\_block) -:: +.. code-block:: python + + # Emit merge block. + g_llvm_builder.position_at_end(merge_block) phi = + g_llvm_builder.phi(Type.double(), 'iftmp') + phi.add_incoming(then_value, then_block) + phi.add_incoming(else_value, else_block) + + :: + + return phi + + - return phi -{% endhighlight %} The first line changes the insertion point so that newly created code will go into the "merge" block. Once that is done, we need to create the @@ -365,15 +430,20 @@ Now that we know how to add basic control flow constructs to the language, we have the tools to add more powerful things. Lets add something more aggressive, a 'for' expression: -{% highlight python %} extern putchard(char) def printstar(n) for i = 1, -i < n, 1.0 in putchard(42) # ascii 42 = '\*' -:: +.. code-block:: python + + extern putchard(char) def printstar(n) for i = 1, + i < n, 1.0 in putchard(42) # ascii 42 = '\*' + + :: + + # print 100 '*' characters + printstar(100) + + - # print 100 '*' characters - printstar(100) -{% endhighlight %} This expression defines a new variable (``i`` in this case) which iterates from a starting value, while the condition (``i < n`` in this @@ -391,29 +461,34 @@ Lexer Extensions for the 'for' Loop ## {#forlexer} The lexer extensions are the same sort of thing as for if/then/else: -{% highlight python %} ... -class ThenToken(object): pass class ElseToken(object): pass class -ForToken(object): pass class InToken(object): pass +.. code-block:: python -... + ... + + class ThenToken(object): pass class ElseToken(object): pass class + ForToken(object): pass class InToken(object): pass + + ... + + def Tokenize(string): + + :: + + ... + + elif identifier == 'else': + yield ElseToken() + elif identifier == 'for': + yield ForToken() + elif identifier == 'in': + yield InToken() + else: + yield IdentifierToken(identifier) + + -def Tokenize(string): -:: - - ... - - elif identifier == 'else': - yield ElseToken() - elif identifier == 'for': - yield ForToken() - elif identifier == 'in': - yield InToken() - else: - yield IdentifierToken(identifier) - -{% endhighlight %} AST Extensions for the 'for' Loop ## {#forast} ---------------------------------------------- @@ -421,14 +496,19 @@ AST Extensions for the 'for' Loop ## {#forast} The AST node is just as simple. It basically boils down to capturing the variable name and the constituent expressions in the node. -{% highlight python %} # Expression class for for/in. class -ForExpressionNode(ExpressionNode): -def **init**\ (self, loop\_variable, start, end, step, body): -self.loop\_variable = loop\_variable self.start = start self.end = end -self.step = step self.body = body +.. code-block:: python + + # Expression class for for/in. class + ForExpressionNode(ExpressionNode): + + def **init**\ (self, loop_variable, start, end, step, body): + self.loop_variable = loop_variable self.start = start self.end = end + self.step = step self.body = body + + def CodeGen(self): ... + -def CodeGen(self): ... {% endhighlight %} Parser Extensions for the 'for' Loop ## {#forparser} ---------------------------------------------------- @@ -438,46 +518,51 @@ is handling of the optional step value. The parser code handles it by checking to see if the second comma is present. If not, it sets the step value to null in the AST node: -{% highlight python %} # forexpr ::= 'for' identifier '=' expr ',' expr -(',' expr)? 'in' expression def ParseForExpr(self): self.Next() # eat -the for. -:: +.. code-block:: python - if not isinstance(self.current, IdentifierToken): - raise RuntimeError('Expected identifier after for.') + # forexpr ::= 'for' identifier '=' expr ',' expr + (',' expr)? 'in' expression def ParseForExpr(self): self.Next() # eat + the for. + + :: + + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after for.') + + loop_variable = self.current.name + self.Next() # eat the identifier. + + if self.current != CharacterToken('='): + raise RuntimeError('Expected "=" after for variable.') + self.Next() # eat the '='. + + start = self.ParseExpression() + + if self.current != CharacterToken(','): + raise RuntimeError('Expected "," after for start value.') + self.Next() # eat the ','. + + end = self.ParseExpression() + + # The step value is optional. + if self.current == CharacterToken(','): + self.Next() # eat the ','. + step = self.ParseExpression() + else: + step = None + + if not isinstance(self.current, InToken): + raise RuntimeError('Expected "in" after for variable specification.') + self.Next() # eat 'in'. + + body = self.ParseExpression() + + return ForExpressionNode(loop_variable, start, end, step, body) + + - loop_variable = self.current.name - self.Next() # eat the identifier. - if self.current != CharacterToken('='): - raise RuntimeError('Expected "=" after for variable.') - self.Next() # eat the '='. - - start = self.ParseExpression() - - if self.current != CharacterToken(','): - raise RuntimeError('Expected "," after for start value.') - self.Next() # eat the ','. - - end = self.ParseExpression() - - # The step value is optional. - if self.current == CharacterToken(','): - self.Next() # eat the ','. - step = self.ParseExpression() - else: - step = None - - if not isinstance(self.current, InToken): - raise RuntimeError('Expected "in" after for variable specification.') - self.Next() # eat 'in'. - - body = self.ParseExpression() - - return ForExpressionNode(loop_variable, start, end, step, body) - -{% endhighlight %} LLVM IR for the 'for' Loop ## {#forir} -------------------------------------- @@ -486,16 +571,21 @@ Now we get to the good part: the LLVM IR we want to generate for this thing. With the simple example above, we get this LLVM IR (note that this dump is generated with optimizations disabled for clarity): -{% highlight llvm %} declare double @putchard(double) define double -@printstar(double %n) { entry: ; initial value = 1.0 (inlined into phi) -br label %loop loop: ; preds = %loop, %entry %i = phi double [ -1.000000e+00, %entry ], [ %nextvar, %loop ] ; body %calltmp = call -double @putchard(double 4.200000e+01) ; increment %nextvar = fadd double -%i, 1.000000e+00 ; termination test %cmptmp = fcmp ult double %i, %n -%booltmp = uitofp i1 %cmptmp to double %loopcond = fcmp one double -%booltmp, 0.000000e+00 br i1 %loopcond, label %loop, label %afterloop -afterloop: ; preds = %loop ; loop always returns 0.0 ret double -0.000000e+00 } {% endhighlight %} + +.. code-block:: llvm + + declare double @putchard(double) define double + @printstar(double %n) { entry: ; initial value = 1.0 (inlined into phi) + br label %loop loop: ; preds = %loop, %entry %i = phi double [ + 1.000000e+00, %entry ], [ %nextvar, %loop ] ; body %calltmp = call + double @putchard(double 4.200000e+01) ; increment %nextvar = fadd double + %i, 1.000000e+00 ; termination test %cmptmp = fcmp ult double %i, %n + %booltmp = uitofp i1 %cmptmp to double %loopcond = fcmp one double + %booltmp, 0.000000e+00 br i1 %loopcond, label %loop, label %afterloop + afterloop: ; preds = %loop ; loop always returns 0.0 ret double + 0.000000e+00 } + + This loop contains all the same constructs we saw before: a phi node, several expressions, and some basic blocks. Lets see how this fits @@ -507,28 +597,33 @@ Code Generation for the 'for' Loop ## {#forcodegen} The first part of Codegen is very simple: we just output the start expression for the loop value: -{% highlight python %} def CodeGen(self): # Emit the start code first, -without 'variable' in scope. start\_value = self.start.CodeGen() {% -endhighlight %} -With this out of the way, the next step is to set up the LLVM basic -block for the start of the loop body. In the case above, the whole loop -body is one block, but remember that the body code itself could consist -of multiple blocks (e.g. if it contains an if/then/else or a for/in -expression). +.. code-block:: python -{% highlight python %} # Make the new basic block for the loop header, -inserting after current # block. function = -g\_llvm\_builder.basic\_block.function pre\_header\_block = -g\_llvm\_builder.basic\_block loop\_block = -function.append\_basic\_block('loop') + def CodeGen(self): # Emit the start code first, + without 'variable' in scope. start_value = self.start.CodeGen() {% + endhighlight %} + + With this out of the way, the next step is to set up the LLVM basic + block for the start of the loop body. In the case above, the whole loop + body is one block, but remember that the body code itself could consist + of multiple blocks (e.g. if it contains an if/then/else or a for/in + expression). + + {% highlight python %} # Make the new basic block for the loop header, + inserting after current # block. function = + g_llvm_builder.basic_block.function pre_header_block = + g_llvm_builder.basic_block loop_block = + function.append_basic_block('loop') + + :: + + # Insert an explicit fallthrough from the current block to the loop_block. + g_llvm_builder.branch(loop_block) + + -:: - # Insert an explicit fallthrough from the current block to the loop_block. - g_llvm_builder.branch(loop_block) - -{% endhighlight %} This code is similar to what we saw for if/then/else. Because we will need it to create the Phi node, we remember the block that falls through @@ -536,16 +631,21 @@ into the loop. Once we have that, we create the actual block that starts the loop and create an unconditional branch for the fall-through between the two blocks. -{% highlight python %} # Start insertion in loop\_block. -g\_llvm\_builder.position\_at\_end(loop\_block); -:: +.. code-block:: python + + # Start insertion in loop_block. + g_llvm_builder.position_at_end(loop_block); + + :: + + # Start the PHI node with an entry for start. + variable_phi = g_llvm_builder.phi(Type.double(), self.loop_variable) + variable_phi.add_incoming(start_value, pre_header_block) + + - # Start the PHI node with an entry for start. - variable_phi = g_llvm_builder.phi(Type.double(), self.loop_variable) - variable_phi.add_incoming(start_value, pre_header_block) -{% endhighlight %} Now that the "pre\_header\_block" for the loop is set up, we switch to emitting code for the loop body. To begin with, we move the insertion @@ -554,19 +654,24 @@ already know the incoming value for the starting value, we add it to the Phi node. Note that the Phi will eventually get a second value for the backedge, but we can't set it up yet (because it doesn't exist!). -{% highlight python %} # Within the loop, the variable is defined equal -to the PHI node. If it # shadows an existing variable, we have to -restore it, so save it now. old\_value = -g\_named\_values.get(self.loop\_variable, None) -g\_named\_values[self.loop\_variable] = variable\_phi -:: +.. code-block:: python + + # Within the loop, the variable is defined equal + to the PHI node. If it # shadows an existing variable, we have to + restore it, so save it now. old_value = + g_named_values.get(self.loop_variable, None) + g_named_values[self.loop_variable] = variable_phi + + :: + + # Emit the body of the loop. This, like any other expr, can change the + # current BB. Note that we ignore the value computed by the body. + self.body.CodeGen() + + - # Emit the body of the loop. This, like any other expr, can change the - # current BB. Note that we ignore the value computed by the body. - self.body.CodeGen() -{% endhighlight %} Now the code starts to get more interesting. Our 'for' loop introduces a new variable to the symbol table. This means that our symbol table can @@ -585,43 +690,58 @@ recursively codegen's the body. This allows the body to use the loop variable: any references to it will naturally find it in the symbol table. -{% highlight python %} # Emit the step value. if self.step: step\_value -= self.step.CodeGen() else: # If not specified, use 1.0. step\_value = -Constant.real(Type.double(), 1) -:: +.. code-block:: python + + # Emit the step value. if self.step: step_value + = self.step.CodeGen() else: # If not specified, use 1.0. step_value = + Constant.real(Type.double(), 1) + + :: + + next_value = g_llvm_builder.fadd(variable_phi, step_value, 'next') + + - next_value = g_llvm_builder.fadd(variable_phi, step_value, 'next') -{% endhighlight %} Now that the body is emitted, we compute the next value of the iteration variable by adding the step value, or 1.0 if it isn't present. ``next_value`` will be the value of the loop variable on the next iteration of the loop. -{% highlight python %} # Compute the end condition and convert it to a -bool by comparing to 0.0. end\_condition = self.end.CodeGen() -end\_condition\_bool = g\_llvm\_builder.fcmp( FCMP\_ONE, end\_condition, -Constant.real(Type.double(), 0), 'loopcond') {% endhighlight %} + +.. code-block:: python + + # Compute the end condition and convert it to a + bool by comparing to 0.0. end_condition = self.end.CodeGen() + end_condition_bool = g_llvm_builder.fcmp( FCMP_ONE, end_condition, + Constant.real(Type.double(), 0), 'loopcond') + + Finally, we evaluate the exit value of the loop, to determine whether the loop should exit. This mirrors the condition evaluation for the if/then/else statement. -{% highlight python %} # Create the "after loop" block and insert it. -loop\_end\_block = g\_llvm\_builder.basic\_block after\_block = -function.append\_basic\_block('afterloop') -:: +.. code-block:: python - # Insert the conditional branch into the end of loop_end_block. - g_llvm_builder.cbranch(end_condition_bool, loop_block, after_block) + # Create the "after loop" block and insert it. + loop_end_block = g_llvm_builder.basic_block after_block = + function.append_basic_block('afterloop') + + :: + + # Insert the conditional branch into the end of loop_end_block. + g_llvm_builder.cbranch(end_condition_bool, loop_block, after_block) + + # Any new code will be inserted in after_block. + g_llvm_builder.position_at_end(after_block) + + - # Any new code will be inserted in after_block. - g_llvm_builder.position_at_end(after_block) -{% endhighlight %} With the code for the body of the loop complete, we just need to finish up the control flow for it. This code remembers the end block (for the @@ -631,21 +751,26 @@ chooses between executing the loop again and exiting the loop. Any future code is emitted in the "afterloop" block, so it sets the insertion position to it. -{% highlight python %} # Add a new entry to the PHI node for the -backedge. variable\_phi.add\_incoming(next\_value, loop\_end\_block) -:: +.. code-block:: python - # Restore the unshadowed variable. - if old_value: - g_named_values[self.loop_variable] = old_value - else: - del g_named_values[self.loop_variable] + # Add a new entry to the PHI node for the + backedge. variable_phi.add_incoming(next_value, loop_end_block) + + :: + + # Restore the unshadowed variable. + if old_value: + g_named_values[self.loop_variable] = old_value + else: + del g_named_values[self.loop_variable] + + # for expr always returns 0.0. + return Constant.real(Type.double(), 0) + + - # for expr always returns 0.0. - return Constant.real(Type.double(), 0) -{% endhighlight %} The final code handles various cleanups: now that we have the "next\_value", we can add the incoming value to the loop PHI node. After @@ -669,658 +794,618 @@ Full Code Listing # {#code} Here is the complete code listing for our running example, enhanced with the if/then/else and for expressions: -{% highlight python %} #!/usr/bin/env python -import re from llvm.core import Module, Constant, Type, Function, -Builder from llvm.ee import ExecutionEngine, TargetData from llvm.passes -import FunctionPassManager - -from llvm.core import FCMP\_ULT, FCMP\_ONE from llvm.passes import -(PASS\_INSTRUCTION\_COMBINING, PASS\_REASSOCIATE, PASS\_GVN, -PASS\_CFG\_SIMPLIFICATION) - -Globals -------- - -The LLVM module, which holds all the IR code. -============================================= - -g\_llvm\_module = Module.new('my cool jit') - -The LLVM instruction builder. Created whenever a new function is entered. -========================================================================= - -g\_llvm\_builder = None - -A dictionary that keeps track of which values are defined in the current scope -============================================================================== - -and what their LLVM representation is. -====================================== - -g\_named\_values = {} - -The function optimization passes manager. -========================================= - -g\_llvm\_pass\_manager = FunctionPassManager.new(g\_llvm\_module) - -The LLVM execution engine. -========================== - -g\_llvm\_executor = ExecutionEngine.new(g\_llvm\_module) - -Lexer ------ - -The lexer yields one of these types for each token. -=================================================== - -class EOFToken(object): pass class DefToken(object): pass class -ExternToken(object): pass class IfToken(object): pass class -ThenToken(object): pass class ElseToken(object): pass class -ForToken(object): pass class InToken(object): pass - -class IdentifierToken(object): def **init**\ (self, name): self.name = -name - -class NumberToken(object): def **init**\ (self, value): self.value = -value - -class CharacterToken(object): def **init**\ (self, char): self.char = -char def **eq**\ (self, other): return isinstance(other, CharacterToken) -and self.char == other.char def **ne**\ (self, other): return not self -== other - -Regular expressions that tokens and comments of our language. -============================================================= - -REGEX\_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX\_IDENTIFIER = -re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX\_COMMENT = re.compile('#.*') - -def Tokenize(string): while string: # Skip whitespace. if -string[0].isspace(): string = string[1:] continue - -:: - - # Run regexes. - comment_match = REGEX_COMMENT.match(string) - number_match = REGEX_NUMBER.match(string) - identifier_match = REGEX_IDENTIFIER.match(string) - - # Check if any of the regexes matched and yield the appropriate result. - if comment_match: - comment = comment_match.group(0) - string = string[len(comment):] - elif number_match: - number = number_match.group(0) - yield NumberToken(float(number)) - string = string[len(number):] - elif identifier_match: - identifier = identifier_match.group(0) - # Check if we matched a keyword. - if identifier == 'def': - yield DefToken() - elif identifier == 'extern': - yield ExternToken() - elif identifier == 'if': - yield IfToken() - elif identifier == 'then': - yield ThenToken() - elif identifier == 'else': - yield ElseToken() - elif identifier == 'for': - yield ForToken() - elif identifier == 'in': - yield InToken() - else: - yield IdentifierToken(identifier) - string = string[len(identifier):] - else: - # Yield the ASCII value of the unknown character. - yield CharacterToken(string[0]) - string = string[1:] - -yield EOFToken() - -Abstract Syntax Tree (aka Parse Tree) -------------------------------------- - -Base class for all expression nodes. -==================================== - -class ExpressionNode(object): pass - -Expression class for numeric literals like "1.0". -================================================= - -class NumberExpressionNode(ExpressionNode): - -def **init**\ (self, value): self.value = value - -def CodeGen(self): return Constant.real(Type.double(), self.value) - -Expression class for referencing a variable, like "a". -====================================================== - -class VariableExpressionNode(ExpressionNode): - -def **init**\ (self, name): self.name = name - -def CodeGen(self): if self.name in g\_named\_values: return -g\_named\_values[self.name] else: raise RuntimeError('Unknown variable -name: ' + self.name) - -Expression class for a binary operator. -======================================= - -class BinaryOperatorExpressionNode(ExpressionNode): - -def **init**\ (self, operator, left, right): self.operator = operator -self.left = left self.right = right - -def CodeGen(self): left = self.left.CodeGen() right = -self.right.CodeGen() - -:: - - if self.operator == '+': - return g_llvm_builder.fadd(left, right, 'addtmp') - elif self.operator == '-': - return g_llvm_builder.fsub(left, right, 'subtmp') - elif self.operator == '*': - return g_llvm_builder.fmul(left, right, 'multmp') - elif self.operator == '<': - result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') - # Convert bool 0 or 1 to double 0.0 or 1.0. - return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') - else: - raise RuntimeError('Unknown binary operator.') - -Expression class for function calls. -==================================== - -class CallExpressionNode(ExpressionNode): - -def **init**\ (self, callee, args): self.callee = callee self.args = -args - -def CodeGen(self): # Look up the name in the global module table. callee -= g\_llvm\_module.get\_function\_named(self.callee) - -:: - - # Check for argument mismatch error. - if len(callee.args) != len(self.args): - raise RuntimeError('Incorrect number of arguments passed.') - - arg_values = [i.CodeGen() for i in self.args] - - return g_llvm_builder.call(callee, arg_values, 'calltmp') - -Expression class for if/then/else. -================================== - -class IfExpressionNode(ExpressionNode): - -def **init**\ (self, condition, then\_branch, else\_branch): -self.condition = condition self.then\_branch = then\_branch -self.else\_branch = else\_branch - -def CodeGen(self): condition = self.condition.CodeGen() - -:: - - # Convert condition to a bool by comparing equal to 0.0. - condition_bool = g_llvm_builder.fcmp( - FCMP_ONE, condition, Constant.real(Type.double(), 0), 'ifcond') - - function = g_llvm_builder.basic_block.function - - # Create blocks for the then and else cases. Insert the 'then' block at the - # end of the function. - then_block = function.append_basic_block('then') - else_block = function.append_basic_block('else') - merge_block = function.append_basic_block('ifcond') - - g_llvm_builder.cbranch(condition_bool, then_block, else_block) - - # Emit then value. - g_llvm_builder.position_at_end(then_block) - then_value = self.then_branch.CodeGen() - g_llvm_builder.branch(merge_block) - - # Codegen of 'Then' can change the current block; update then_block for the - # PHI node. - then_block = g_llvm_builder.basic_block - - # Emit else block. - g_llvm_builder.position_at_end(else_block) - else_value = self.else_branch.CodeGen() - g_llvm_builder.branch(merge_block) - - # Codegen of 'Else' can change the current block, update else_block for the - # PHI node. - else_block = g_llvm_builder.basic_block - - # Emit merge block. - g_llvm_builder.position_at_end(merge_block) - phi = g_llvm_builder.phi(Type.double(), 'iftmp') - phi.add_incoming(then_value, then_block) - phi.add_incoming(else_value, else_block) - - return phi - -Expression class for for/in. -============================ - -class ForExpressionNode(ExpressionNode): - -def **init**\ (self, loop\_variable, start, end, step, body): -self.loop\_variable = loop\_variable self.start = start self.end = end -self.step = step self.body = body - -def CodeGen(self): # Output this as: # ... # start = startexpr # goto -loop # loop: # variable = phi [start, loopheader], [nextvariable, -loopend] # ... # bodyexpr # ... # loopend: # step = stepexpr # -nextvariable = variable + step # endcond = endexpr # br endcond, loop, -endloop # outloop: - -:: - - # Emit the start code first, without 'variable' in scope. - start_value = self.start.CodeGen() - - # Make the new basic block for the loop header, inserting after current - # block. - function = g_llvm_builder.basic_block.function - pre_header_block = g_llvm_builder.basic_block - loop_block = function.append_basic_block('loop') - - # Insert an explicit fallthrough from the current block to the loop_block. - g_llvm_builder.branch(loop_block) - - # Start insertion in loop_block. - g_llvm_builder.position_at_end(loop_block) - - # Start the PHI node with an entry for start. - variable_phi = g_llvm_builder.phi(Type.double(), self.loop_variable) - variable_phi.add_incoming(start_value, pre_header_block) - - # Within the loop, the variable is defined equal to the PHI node. If it - # shadows an existing variable, we have to restore it, so save it now. - old_value = g_named_values.get(self.loop_variable, None) - g_named_values[self.loop_variable] = variable_phi - - # Emit the body of the loop. This, like any other expr, can change the - # current BB. Note that we ignore the value computed by the body. - self.body.CodeGen() - - # Emit the step value. - if self.step: - step_value = self.step.CodeGen() - else: - # If not specified, use 1.0. - step_value = Constant.real(Type.double(), 1) - - next_value = g_llvm_builder.fadd(variable_phi, step_value, 'next') - - # Compute the end condition and convert it to a bool by comparing to 0.0. - end_condition = self.end.CodeGen() - end_condition_bool = g_llvm_builder.fcmp( - FCMP_ONE, end_condition, Constant.real(Type.double(), 0), 'loopcond') - - # Create the "after loop" block and insert it. - loop_end_block = g_llvm_builder.basic_block - after_block = function.append_basic_block('afterloop') - - # Insert the conditional branch into the end of loop_end_block. - g_llvm_builder.cbranch(end_condition_bool, loop_block, after_block) - - # Any new code will be inserted in after_block. - g_llvm_builder.position_at_end(after_block) - - # Add a new entry to the PHI node for the backedge. - variable_phi.add_incoming(next_value, loop_end_block) - - # Restore the unshadowed variable. - if old_value: - g_named_values[self.loop_variable] = old_value - else: - del g_named_values[self.loop_variable] - - # for expr always returns 0.0. - return Constant.real(Type.double(), 0) - -This class represents the "prototype" for a function, which captures its name, -============================================================================== - -and its argument names (thus implicitly the number of arguments the function -============================================================================ - -takes). -======= - -class PrototypeNode(object): - -def **init**\ (self, name, args): self.name = name self.args = args - -def CodeGen(self): # Make the function type, eg. double(double,double). -funct\_type = Type.function( Type.double(), [Type.double()] \* -len(self.args), False) - -:: - - function = Function.new(g_llvm_module, funct_type, self.name) - - # If the name conflicted, there was already something with the same name. - # If it has a body, don't allow redefinition or reextern. - if function.name != self.name: - function.delete() - function = g_llvm_module.get_function_named(self.name) - - # If the function already has a body, reject this. - if not function.is_declaration: - raise RuntimeError('Redefinition of function.') - - # If the function took a different number of args, reject. - if len(function.args) != len(self.args): - raise RuntimeError('Redeclaration of a function with different number ' - 'of args.') - - # Set names for all arguments and add them to the variables symbol table. - for arg, arg_name in zip(function.args, self.args): - arg.name = arg_name - # Add arguments to variable symbol table. - g_named_values[arg_name] = arg - - return function - -This class represents a function definition itself. -=================================================== - -class FunctionNode(object): - -def **init**\ (self, prototype, body): self.prototype = prototype -self.body = body - -def CodeGen(self): # Clear scope. g\_named\_values.clear() - -:: - - # Create a function object. - function = self.prototype.CodeGen() - - # Create a new basic block to start insertion into. - block = function.append_basic_block('entry') - global g_llvm_builder - g_llvm_builder = Builder.new(block) - - # Finish off the function. - try: - return_value = self.body.CodeGen() - g_llvm_builder.ret(return_value) - - # Validate the generated code, checking for consistency. - function.verify() - - # Optimize the function. - g_llvm_pass_manager.run(function) - except: - function.delete() - raise - - return function - -Parser ------- - -class Parser(object): - -def **init**\ (self, tokens, binop\_precedence): self.tokens = tokens -self.binop\_precedence = binop\_precedence self.Next() - -# Provide a simple token buffer. Parser.current is the current token the -# parser is looking at. Parser.Next() reads another token from the lexer -and # updates Parser.current with its results. def Next(self): -self.current = self.tokens.next() - -# Gets the precedence of the current token, or -1 if the token is not a -binary # operator. def GetCurrentTokenPrecedence(self): if -isinstance(self.current, CharacterToken): return -self.binop\_precedence.get(self.current.char, -1) else: return -1 - -# identifierexpr ::= identifier \| identifier '(' expression\* ')' def -ParseIdentifierExpr(self): identifier\_name = self.current.name -self.Next() # eat identifier. - -:: - - if self.current != CharacterToken('('): # Simple variable reference. - return VariableExpressionNode(identifier_name) - - # Call. - self.Next() # eat '('. - args = [] - if self.current != CharacterToken(')'): - while True: - args.append(self.ParseExpression()) - if self.current == CharacterToken(')'): - break - elif self.current != CharacterToken(','): - raise RuntimeError('Expected ")" or "," in argument list.') - self.Next() - - self.Next() # eat ')'. - return CallExpressionNode(identifier_name, args) - -# numberexpr ::= number def ParseNumberExpr(self): result = -NumberExpressionNode(self.current.value) self.Next() # consume the -number. return result - -# parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() -# eat '('. - -:: - - contents = self.ParseExpression() - - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")".') - self.Next() # eat ')'. - - return contents - -# ifexpr ::= 'if' expression 'then' expression 'else' expression def -ParseIfExpr(self): self.Next() # eat the if. - -:: - - # condition. - condition = self.ParseExpression() - - if not isinstance(self.current, ThenToken): - raise RuntimeError('Expected "then".') - self.Next() # eat the then. - - then_branch = self.ParseExpression() - - if not isinstance(self.current, ElseToken): - raise RuntimeError('Expected "else".') - self.Next() # eat the else. - - else_branch = self.ParseExpression() - - return IfExpressionNode(condition, then_branch, else_branch) - -# forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' -expression def ParseForExpr(self): self.Next() # eat the for. - -:: - - if not isinstance(self.current, IdentifierToken): - raise RuntimeError('Expected identifier after for.') - - loop_variable = self.current.name - self.Next() # eat the identifier. - - if self.current != CharacterToken('='): - raise RuntimeError('Expected "=" after for variable.') - self.Next() # eat the '='. - - start = self.ParseExpression() - - if self.current != CharacterToken(','): - raise RuntimeError('Expected "," after for start value.') - self.Next() # eat the ','. - - end = self.ParseExpression() - - # The step value is optional. - if self.current == CharacterToken(','): - self.Next() # eat the ','. - step = self.ParseExpression() - else: - step = None - - if not isinstance(self.current, InToken): - raise RuntimeError('Expected "in" after for variable specification.') - self.Next() # eat 'in'. - - body = self.ParseExpression() - - return ForExpressionNode(loop_variable, start, end, step, body) - -# primary ::= identifierexpr \| numberexpr \| parenexpr \| ifexpr \| -forexpr def ParsePrimary(self): if isinstance(self.current, -IdentifierToken): return self.ParseIdentifierExpr() elif -isinstance(self.current, NumberToken): return self.ParseNumberExpr() -elif isinstance(self.current, IfToken): return self.ParseIfExpr() elif -isinstance(self.current, ForToken): return self.ParseForExpr() elif -self.current == CharacterToken('('): return self.ParseParenExpr() else: -raise RuntimeError('Unknown token when expecting an expression.') - -# binoprhs ::= (operator primary)\* def ParseBinOpRHS(self, left, -left\_precedence): # If this is a binary operator, find its precedence. -while True: precedence = self.GetCurrentTokenPrecedence() - -:: - - # If this is a binary operator that binds at least as tightly as the - # current one, consume it; otherwise we are done. - if precedence < left_precedence: - return left - - binary_operator = self.current.char - self.Next() # eat the operator. - - # Parse the primary expression after the binary operator. - right = self.ParsePrimary() - - # If binary_operator binds less tightly with right than the operator after - # right, let the pending operator take right as its left. - next_precedence = self.GetCurrentTokenPrecedence() - if precedence < next_precedence: - right = self.ParseBinOpRHS(right, precedence + 1) - - # Merge left/right. - left = BinaryOperatorExpressionNode(binary_operator, left, right) - -# expression ::= primary binoprhs def ParseExpression(self): left = -self.ParsePrimary() return self.ParseBinOpRHS(left, 0) - -# prototype ::= id '(' id\* ')' def ParsePrototype(self): if not -isinstance(self.current, IdentifierToken): raise RuntimeError('Expected -function name in prototype.') - -:: - - function_name = self.current.name - self.Next() # eat function name. - - if self.current != CharacterToken('('): - raise RuntimeError('Expected "(" in prototype.') - self.Next() # eat '('. - - arg_names = [] - while isinstance(self.current, IdentifierToken): - arg_names.append(self.current.name) - self.Next() - - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")" in prototype.') - - # Success. - self.Next() # eat ')'. - - return PrototypeNode(function_name, arg_names) - -# definition ::= 'def' prototype expression def ParseDefinition(self): -self.Next() # eat def. proto = self.ParsePrototype() body = -self.ParseExpression() return FunctionNode(proto, body) - -# toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = -PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) - -# external ::= 'extern' prototype def ParseExtern(self): self.Next() # -eat extern. return self.ParsePrototype() - -# Top-Level parsing def HandleDefinition(self): -self.Handle(self.ParseDefinition, 'Read a function definition:') - -def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') - -def HandleTopLevelExpression(self): try: function = -self.ParseTopLevelExpr().CodeGen() result = -g\_llvm\_executor.run\_function(function, []) print 'Evaluated to:', -result.as\_real(Type.double()) except Exception, e: print 'Error:', e -try: self.Next() # Skip for error recovery. except: pass - -def Handle(self, function, message): try: print message, -function().CodeGen() except Exception, e: print 'Error:', e try: -self.Next() # Skip for error recovery. except: pass - -Main driver code. ------------------ - -def main(): # Set up the optimizer pipeline. Start with registering info -about how the # target lays out data structures. -g\_llvm\_pass\_manager.add(g\_llvm\_executor.target\_data) # Do simple -"peephole" optimizations and bit-twiddling optzns. -g\_llvm\_pass\_manager.add(PASS\_INSTRUCTION\_COMBINING) # Reassociate -expressions. g\_llvm\_pass\_manager.add(PASS\_REASSOCIATE) # Eliminate -Common SubExpressions. g\_llvm\_pass\_manager.add(PASS\_GVN) # Simplify -the control flow graph (deleting unreachable blocks, etc). -g\_llvm\_pass\_manager.add(PASS\_CFG\_SIMPLIFICATION) - -g\_llvm\_pass\_manager.initialize() - -# Install standard binary operators. # 1 is lowest possible precedence. -40 is the highest. operator\_precedence = { '<': 10, '+': 20, '-': 20, -'\*': 40 } - -# Run the main "interpreter loop". while True: print 'ready>', try: raw -= raw\_input() except KeyboardInterrupt: break - -:: - - parser = Parser(Tokenize(raw), operator_precedence) - while True: - # top ::= definition | external | expression | EOF - if isinstance(parser.current, EOFToken): - break - if isinstance(parser.current, DefToken): - parser.HandleDefinition() - elif isinstance(parser.current, ExternToken): - parser.HandleExtern() - else: - parser.HandleTopLevelExpression() - -# Print out all of the generated code. print '', g\_llvm\_module - -if **name** == '**main**\ ': main() {% endhighlight %} - --------------- - -**`Next: Extending the language: user-defined -operators `_** +.. code-block:: python + + #!/usr/bin/env python + + import re from llvm.core import Module, Constant, Type, Function, + Builder from llvm.ee import ExecutionEngine, TargetData from llvm.passes + import FunctionPassManager + + from llvm.core import FCMP_ULT, FCMP_ONE from llvm.passes import + (PASS_INSTRUCTION_COMBINING, PASS_REASSOCIATE, PASS_GVN, + PASS_CFG_SIMPLIFICATION) + + Globals + ------- + + # The LLVM module, which holds all the IR code. + g_llvm_module = Module.new('my cool jit') + + # The LLVM instruction builder. Created whenever a new function is entered. + g_llvm_builder = None + + # A dictionary that keeps track of which values are defined in the current scope + # and what their LLVM representation is. + g_named_values = {} + + # The function optimization passes manager. + g_llvm_pass_manager = FunctionPassManager.new(g_llvm_module) + + # The LLVM execution engine. + g_llvm_executor = ExecutionEngine.new(g_llvm_module) + + Lexer + ----- + + # The lexer yields one of these types for each token. + class EOFToken(object): pass class DefToken(object): pass class + ExternToken(object): pass class IfToken(object): pass class + ThenToken(object): pass class ElseToken(object): pass class + ForToken(object): pass class InToken(object): pass + + class IdentifierToken(object): def **init**\ (self, name): self.name = + name + + class NumberToken(object): def **init**\ (self, value): self.value = + value + + class CharacterToken(object): def **init**\ (self, char): self.char = + char def **eq**\ (self, other): return isinstance(other, CharacterToken) + and self.char == other.char def **ne**\ (self, other): return not self + == other + + # Regular expressions that tokens and comments of our language. + REGEX_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX_IDENTIFIER = + re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX_COMMENT = re.compile('#.*') + + def Tokenize(string): while string: # Skip whitespace. if + string[0].isspace(): string = string[1:] continue + + :: + + # Run regexes. + comment_match = REGEX_COMMENT.match(string) + number_match = REGEX_NUMBER.match(string) + identifier_match = REGEX_IDENTIFIER.match(string) + + # Check if any of the regexes matched and yield the appropriate result. + if comment_match: + comment = comment_match.group(0) + string = string[len(comment):] + elif number_match: + number = number_match.group(0) + yield NumberToken(float(number)) + string = string[len(number):] + elif identifier_match: + identifier = identifier_match.group(0) + # Check if we matched a keyword. + if identifier == 'def': + yield DefToken() + elif identifier == 'extern': + yield ExternToken() + elif identifier == 'if': + yield IfToken() + elif identifier == 'then': + yield ThenToken() + elif identifier == 'else': + yield ElseToken() + elif identifier == 'for': + yield ForToken() + elif identifier == 'in': + yield InToken() + else: + yield IdentifierToken(identifier) + string = string[len(identifier):] + else: + # Yield the ASCII value of the unknown character. + yield CharacterToken(string[0]) + string = string[1:] + + yield EOFToken() + + Abstract Syntax Tree (aka Parse Tree) + ------------------------------------- + + # Base class for all expression nodes. + class ExpressionNode(object): pass + + # Expression class for numeric literals like "1.0". + class NumberExpressionNode(ExpressionNode): + + def **init**\ (self, value): self.value = value + + def CodeGen(self): return Constant.real(Type.double(), self.value) + + # Expression class for referencing a variable, like "a". + class VariableExpressionNode(ExpressionNode): + + def **init**\ (self, name): self.name = name + + def CodeGen(self): if self.name in g_named_values: return + g_named_values[self.name] else: raise RuntimeError('Unknown variable + name: ' + self.name) + + # Expression class for a binary operator. + class BinaryOperatorExpressionNode(ExpressionNode): + + def **init**\ (self, operator, left, right): self.operator = operator + self.left = left self.right = right + + def CodeGen(self): left = self.left.CodeGen() right = + self.right.CodeGen() + + :: + + if self.operator == '+': + return g_llvm_builder.fadd(left, right, 'addtmp') + elif self.operator == '-': + return g_llvm_builder.fsub(left, right, 'subtmp') + elif self.operator == '*': + return g_llvm_builder.fmul(left, right, 'multmp') + elif self.operator == '<': + result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') + # Convert bool 0 or 1 to double 0.0 or 1.0. + return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') + else: + raise RuntimeError('Unknown binary operator.') + + # Expression class for function calls. + class CallExpressionNode(ExpressionNode): + + def **init**\ (self, callee, args): self.callee = callee self.args = + args + + def CodeGen(self): # Look up the name in the global module table. callee + = g_llvm_module.get_function_named(self.callee) + + :: + + # Check for argument mismatch error. + if len(callee.args) != len(self.args): + raise RuntimeError('Incorrect number of arguments passed.') + + arg_values = [i.CodeGen() for i in self.args] + + return g_llvm_builder.call(callee, arg_values, 'calltmp') + + # Expression class for if/then/else. + class IfExpressionNode(ExpressionNode): + + def **init**\ (self, condition, then_branch, else_branch): + self.condition = condition self.then_branch = then_branch + self.else_branch = else_branch + + def CodeGen(self): condition = self.condition.CodeGen() + + :: + + # Convert condition to a bool by comparing equal to 0.0. + condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, condition, Constant.real(Type.double(), 0), 'ifcond') + + function = g_llvm_builder.basic_block.function + + # Create blocks for the then and else cases. Insert the 'then' block at the + # end of the function. + then_block = function.append_basic_block('then') + else_block = function.append_basic_block('else') + merge_block = function.append_basic_block('ifcond') + + g_llvm_builder.cbranch(condition_bool, then_block, else_block) + + # Emit then value. + g_llvm_builder.position_at_end(then_block) + then_value = self.then_branch.CodeGen() + g_llvm_builder.branch(merge_block) + + # Codegen of 'Then' can change the current block; update then_block for the + # PHI node. + then_block = g_llvm_builder.basic_block + + # Emit else block. + g_llvm_builder.position_at_end(else_block) + else_value = self.else_branch.CodeGen() + g_llvm_builder.branch(merge_block) + + # Codegen of 'Else' can change the current block, update else_block for the + # PHI node. + else_block = g_llvm_builder.basic_block + + # Emit merge block. + g_llvm_builder.position_at_end(merge_block) + phi = g_llvm_builder.phi(Type.double(), 'iftmp') + phi.add_incoming(then_value, then_block) + phi.add_incoming(else_value, else_block) + + return phi + + # Expression class for for/in. + class ForExpressionNode(ExpressionNode): + + def **init**\ (self, loop_variable, start, end, step, body): + self.loop_variable = loop_variable self.start = start self.end = end + self.step = step self.body = body + + def CodeGen(self): # Output this as: # ... # start = startexpr # goto + loop # loop: # variable = phi [start, loopheader], [nextvariable, + loopend] # ... # bodyexpr # ... # loopend: # step = stepexpr # + nextvariable = variable + step # endcond = endexpr # br endcond, loop, + endloop # outloop: + + :: + + # Emit the start code first, without 'variable' in scope. + start_value = self.start.CodeGen() + + # Make the new basic block for the loop header, inserting after current + # block. + function = g_llvm_builder.basic_block.function + pre_header_block = g_llvm_builder.basic_block + loop_block = function.append_basic_block('loop') + + # Insert an explicit fallthrough from the current block to the loop_block. + g_llvm_builder.branch(loop_block) + + # Start insertion in loop_block. + g_llvm_builder.position_at_end(loop_block) + + # Start the PHI node with an entry for start. + variable_phi = g_llvm_builder.phi(Type.double(), self.loop_variable) + variable_phi.add_incoming(start_value, pre_header_block) + + # Within the loop, the variable is defined equal to the PHI node. If it + # shadows an existing variable, we have to restore it, so save it now. + old_value = g_named_values.get(self.loop_variable, None) + g_named_values[self.loop_variable] = variable_phi + + # Emit the body of the loop. This, like any other expr, can change the + # current BB. Note that we ignore the value computed by the body. + self.body.CodeGen() + + # Emit the step value. + if self.step: + step_value = self.step.CodeGen() + else: + # If not specified, use 1.0. + step_value = Constant.real(Type.double(), 1) + + next_value = g_llvm_builder.fadd(variable_phi, step_value, 'next') + + # Compute the end condition and convert it to a bool by comparing to 0.0. + end_condition = self.end.CodeGen() + end_condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, end_condition, Constant.real(Type.double(), 0), 'loopcond') + + # Create the "after loop" block and insert it. + loop_end_block = g_llvm_builder.basic_block + after_block = function.append_basic_block('afterloop') + + # Insert the conditional branch into the end of loop_end_block. + g_llvm_builder.cbranch(end_condition_bool, loop_block, after_block) + + # Any new code will be inserted in after_block. + g_llvm_builder.position_at_end(after_block) + + # Add a new entry to the PHI node for the backedge. + variable_phi.add_incoming(next_value, loop_end_block) + + # Restore the unshadowed variable. + if old_value: + g_named_values[self.loop_variable] = old_value + else: + del g_named_values[self.loop_variable] + + # for expr always returns 0.0. + return Constant.real(Type.double(), 0) + + # This class represents the "prototype" for a function, which captures its name, + # and its argument names (thus implicitly the number of arguments the function + # takes). + class PrototypeNode(object): + + def **init**\ (self, name, args): self.name = name self.args = args + + def CodeGen(self): # Make the function type, eg. double(double,double). + funct_type = Type.function( Type.double(), [Type.double()] \* + len(self.args), False) + + :: + + function = Function.new(g_llvm_module, funct_type, self.name) + + # If the name conflicted, there was already something with the same name. + # If it has a body, don't allow redefinition or reextern. + if function.name != self.name: + function.delete() + function = g_llvm_module.get_function_named(self.name) + + # If the function already has a body, reject this. + if not function.is_declaration: + raise RuntimeError('Redefinition of function.') + + # If the function took a different number of args, reject. + if len(function.args) != len(self.args): + raise RuntimeError('Redeclaration of a function with different number ' + 'of args.') + + # Set names for all arguments and add them to the variables symbol table. + for arg, arg_name in zip(function.args, self.args): + arg.name = arg_name + # Add arguments to variable symbol table. + g_named_values[arg_name] = arg + + return function + + # This class represents a function definition itself. + class FunctionNode(object): + + def **init**\ (self, prototype, body): self.prototype = prototype + self.body = body + + def CodeGen(self): # Clear scope. g_named_values.clear() + + :: + + # Create a function object. + function = self.prototype.CodeGen() + + # Create a new basic block to start insertion into. + block = function.append_basic_block('entry') + global g_llvm_builder + g_llvm_builder = Builder.new(block) + + # Finish off the function. + try: + return_value = self.body.CodeGen() + g_llvm_builder.ret(return_value) + + # Validate the generated code, checking for consistency. + function.verify() + + # Optimize the function. + g_llvm_pass_manager.run(function) + except: + function.delete() + raise + + return function + + Parser + ------ + + class Parser(object): + + def **init**\ (self, tokens, binop_precedence): self.tokens = tokens + self.binop_precedence = binop_precedence self.Next() + + # Provide a simple token buffer. Parser.current is the current token the + # parser is looking at. Parser.Next() reads another token from the lexer + and # updates Parser.current with its results. def Next(self): + self.current = self.tokens.next() + + # Gets the precedence of the current token, or -1 if the token is not a + binary # operator. def GetCurrentTokenPrecedence(self): if + isinstance(self.current, CharacterToken): return + self.binop_precedence.get(self.current.char, -1) else: return -1 + + # identifierexpr ::= identifier \| identifier '(' expression\* ')' def + ParseIdentifierExpr(self): identifier_name = self.current.name + self.Next() # eat identifier. + + :: + + if self.current != CharacterToken('('): # Simple variable reference. + return VariableExpressionNode(identifier_name) + + # Call. + self.Next() # eat '('. + args = [] + if self.current != CharacterToken(')'): + while True: + args.append(self.ParseExpression()) + if self.current == CharacterToken(')'): + break + elif self.current != CharacterToken(','): + raise RuntimeError('Expected ")" or "," in argument list.') + self.Next() + + self.Next() # eat ')'. + return CallExpressionNode(identifier_name, args) + + # numberexpr ::= number def ParseNumberExpr(self): result = + NumberExpressionNode(self.current.value) self.Next() # consume the + number. return result + + # parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() + # eat '('. + + :: + + contents = self.ParseExpression() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")".') + self.Next() # eat ')'. + + return contents + + # ifexpr ::= 'if' expression 'then' expression 'else' expression def + ParseIfExpr(self): self.Next() # eat the if. + + :: + + # condition. + condition = self.ParseExpression() + + if not isinstance(self.current, ThenToken): + raise RuntimeError('Expected "then".') + self.Next() # eat the then. + + then_branch = self.ParseExpression() + + if not isinstance(self.current, ElseToken): + raise RuntimeError('Expected "else".') + self.Next() # eat the else. + + else_branch = self.ParseExpression() + + return IfExpressionNode(condition, then_branch, else_branch) + + # forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' + expression def ParseForExpr(self): self.Next() # eat the for. + + :: + + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after for.') + + loop_variable = self.current.name + self.Next() # eat the identifier. + + if self.current != CharacterToken('='): + raise RuntimeError('Expected "=" after for variable.') + self.Next() # eat the '='. + + start = self.ParseExpression() + + if self.current != CharacterToken(','): + raise RuntimeError('Expected "," after for start value.') + self.Next() # eat the ','. + + end = self.ParseExpression() + + # The step value is optional. + if self.current == CharacterToken(','): + self.Next() # eat the ','. + step = self.ParseExpression() + else: + step = None + + if not isinstance(self.current, InToken): + raise RuntimeError('Expected "in" after for variable specification.') + self.Next() # eat 'in'. + + body = self.ParseExpression() + + return ForExpressionNode(loop_variable, start, end, step, body) + + # primary ::= identifierexpr \| numberexpr \| parenexpr \| ifexpr \| + forexpr def ParsePrimary(self): if isinstance(self.current, + IdentifierToken): return self.ParseIdentifierExpr() elif + isinstance(self.current, NumberToken): return self.ParseNumberExpr() + elif isinstance(self.current, IfToken): return self.ParseIfExpr() elif + isinstance(self.current, ForToken): return self.ParseForExpr() elif + self.current == CharacterToken('('): return self.ParseParenExpr() else: + raise RuntimeError('Unknown token when expecting an expression.') + + # binoprhs ::= (operator primary)\* def ParseBinOpRHS(self, left, + left_precedence): # If this is a binary operator, find its precedence. + while True: precedence = self.GetCurrentTokenPrecedence() + + :: + + # If this is a binary operator that binds at least as tightly as the + # current one, consume it; otherwise we are done. + if precedence < left_precedence: + return left + + binary_operator = self.current.char + self.Next() # eat the operator. + + # Parse the primary expression after the binary operator. + right = self.ParsePrimary() + + # If binary_operator binds less tightly with right than the operator after + # right, let the pending operator take right as its left. + next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: + right = self.ParseBinOpRHS(right, precedence + 1) + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right) + + # expression ::= primary binoprhs def ParseExpression(self): left = + self.ParsePrimary() return self.ParseBinOpRHS(left, 0) + + # prototype ::= id '(' id\* ')' def ParsePrototype(self): if not + isinstance(self.current, IdentifierToken): raise RuntimeError('Expected + function name in prototype.') + + :: + + function_name = self.current.name + self.Next() # eat function name. + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + return PrototypeNode(function_name, arg_names) + + # definition ::= 'def' prototype expression def ParseDefinition(self): + self.Next() # eat def. proto = self.ParsePrototype() body = + self.ParseExpression() return FunctionNode(proto, body) + + # toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = + PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) + + # external ::= 'extern' prototype def ParseExtern(self): self.Next() # + eat extern. return self.ParsePrototype() + + # Top-Level parsing def HandleDefinition(self): + self.Handle(self.ParseDefinition, 'Read a function definition:') + + def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') + + def HandleTopLevelExpression(self): try: function = + self.ParseTopLevelExpr().CodeGen() result = + g_llvm_executor.run_function(function, []) print 'Evaluated to:', + result.as_real(Type.double()) except Exception, e: print 'Error:', e + try: self.Next() # Skip for error recovery. except: pass + + def Handle(self, function, message): try: print message, + function().CodeGen() except Exception, e: print 'Error:', e try: + self.Next() # Skip for error recovery. except: pass + + Main driver code. + ----------------- + + def main(): # Set up the optimizer pipeline. Start with registering info + about how the # target lays out data structures. + g_llvm_pass_manager.add(g_llvm_executor.target_data) # Do simple + "peephole" optimizations and bit-twiddling optzns. + g_llvm_pass_manager.add(PASS_INSTRUCTION_COMBINING) # Reassociate + expressions. g_llvm_pass_manager.add(PASS_REASSOCIATE) # Eliminate + Common SubExpressions. g_llvm_pass_manager.add(PASS_GVN) # Simplify + the control flow graph (deleting unreachable blocks, etc). + g_llvm_pass_manager.add(PASS_CFG_SIMPLIFICATION) + + g_llvm_pass_manager.initialize() + + # Install standard binary operators. # 1 is lowest possible precedence. + 40 is the highest. operator_precedence = { '<': 10, '+': 20, '-': 20, + '\*': 40 } + + # Run the main "interpreter loop". while True: print 'ready>', try: raw + = raw_input() except KeyboardInterrupt: break + + :: + + parser = Parser(Tokenize(raw), operator_precedence) + while True: + # top ::= definition | external | expression | EOF + if isinstance(parser.current, EOFToken): + break + if isinstance(parser.current, DefToken): + parser.HandleDefinition() + elif isinstance(parser.current, ExternToken): + parser.HandleExtern() + else: + parser.HandleTopLevelExpression() + + # Print out all of the generated code. print '', g_llvm_module + + if **name** == '**main**\ ': main() diff --git a/docs/source/doc/kaleidoscope/PythonLangImpl6.rst b/docs/source/doc/kaleidoscope/PythonLangImpl6.rst index c7e354d..dfe95b6 100644 --- a/docs/source/doc/kaleidoscope/PythonLangImpl6.rst +++ b/docs/source/doc/kaleidoscope/PythonLangImpl6.rst @@ -50,23 +50,22 @@ The two specific features we'll add are programmable unary operators (right now, Kaleidoscope has no unary operators at all) as well as binary operators. An example of this is: -{% highlight python %} # Logical unary not. def unary!(v) if v then 0 -else 1 -Define > with the same precedence as <. -======================================= +.. code-block:: python -def binary> 10 (LHS RHS) RHS < LHS + # Logical unary not. def unary!(v) if v then 0 + else 1 + + # Define > with the same precedence as <. + def binary> 10 (LHS RHS) RHS < LHS + + # Binary "logical or", (note that it does not "short circuit"). + def binary\| 5 (LHS RHS) if LHS then 1 else if RHS then 1 else 0 + + # Define = with slightly lower precedence than relationals. + def binary= 9 (LHS RHS) !(LHS < RHS \| LHS > RHS) -Binary "logical or", (note that it does not "short circuit"). -============================================================= -def binary\| 5 (LHS RHS) if LHS then 1 else if RHS then 1 else 0 - -Define = with slightly lower precedence than relationals. -========================================================= - -def binary= 9 (LHS RHS) !(LHS < RHS \| LHS > RHS) {% endhighlight %} Many languages aspire to being able to implement their standard runtime library in the language itself. In Kaleidoscope, we can implement @@ -85,41 +84,46 @@ Adding support for user-defined binary operators is pretty simple with our current framework. We'll first add support for the unary/binary keywords: -{% highlight python %} class InToken(object): pass class -BinaryToken(object): pass class UnaryToken(object): pass ... def -Tokenize(string): ... elif identifier == 'in': yield InToken() elif -identifier == 'binary': yield BinaryToken() elif identifier == 'unary': -yield UnaryToken() else: yield IdentifierToken(identifier) {% -endhighlight %} -This just adds lexer support for the unary and binary keywords, like we -did in `previous chapters `_. One nice -thing about our current AST, is that we represent binary operators with -full generalisation by using their ASCII code as the opcode. For our -extended operators, we'll use this same representation, so we don't need -any new AST or parser support. +.. code-block:: python -On the other hand, we have to be able to represent the definitions of -these new operators, in the "def binary\| 5" part of the function -definition. In our grammar so far, the "name" for the function -definition is parsed as the "prototype" production and into the -``PrototypeNode``. To represent our new user-defined operators as -prototypes, we have to extend the ``PrototypeNode`` like this: + class InToken(object): pass class + BinaryToken(object): pass class UnaryToken(object): pass ... def + Tokenize(string): ... elif identifier == 'in': yield InToken() elif + identifier == 'binary': yield BinaryToken() elif identifier == 'unary': + yield UnaryToken() else: yield IdentifierToken(identifier) {% + endhighlight %} + + This just adds lexer support for the unary and binary keywords, like we + did in `previous chapters `_. One nice + thing about our current AST, is that we represent binary operators with + full generalisation by using their ASCII code as the opcode. For our + extended operators, we'll use this same representation, so we don't need + any new AST or parser support. + + On the other hand, we have to be able to represent the definitions of + these new operators, in the "def binary\| 5" part of the function + definition. In our grammar so far, the "name" for the function + definition is parsed as the "prototype" production and into the + ``PrototypeNode``. To represent our new user-defined operators as + prototypes, we have to extend the ``PrototypeNode`` like this: + + {% highlight python %} # This class represents the "prototype" for a + function, which captures its name, # and its argument names (thus + implicitly the number of arguments the function # takes), as well as if + it is an operator. class PrototypeNode(object): + + def **init**\ (self, name, args, is_operator=False, precedence=0): + self.name = name self.args = args self.is_operator = is_operator + self.precedence = precedence + + def IsBinaryOp(self): return self.is_operator and len(self.args) == 2 + + def GetOperatorName(self): assert self.is_operator return self.name[-1] + + def CodeGen(self): ... -{% highlight python %} # This class represents the "prototype" for a -function, which captures its name, # and its argument names (thus -implicitly the number of arguments the function # takes), as well as if -it is an operator. class PrototypeNode(object): -def **init**\ (self, name, args, is\_operator=False, precedence=0): -self.name = name self.args = args self.is\_operator = is\_operator -self.precedence = precedence - -def IsBinaryOp(self): return self.is\_operator and len(self.args) == 2 - -def GetOperatorName(self): assert self.is\_operator return self.name[-1] - -def CodeGen(self): ... {% endhighlight %} Basically, in addition to knowing a name for the prototype, we now keep track of whether it was an operator, and if it was, what precedence @@ -128,44 +132,49 @@ operators (as you'll see below, it just doesn't apply for unary operators). Now that we have a way to represent the prototype for a user-defined operator, we need to parse it: -{% highlight python %} # prototype # ::= id '(' id\* ')' # ::= binary -LETTER number? (id, id) # ::= unary LETTER (id) def -ParsePrototype(self): precedence = None if isinstance(self.current, -IdentifierToken): kind = 'normal' function\_name = self.current.name -self.Next() # eat function name. elif isinstance(self.current, -BinaryToken): kind = 'binary' self.Next() # eat 'binary'. if not -isinstance(self.current, CharacterToken): raise RuntimeError('Expected -an operator after "binary".') function\_name = 'binary' + -self.current.char self.Next() # eat the operator. if -isinstance(self.current, NumberToken): if not 1 <= self.current.value <= -100: raise RuntimeError('Invalid precedence: must be in range [1, -100].') precedence = self.current.value self.Next() # eat the -precedence. else: raise RuntimeError('Expected function name, "unary" or -"binary" in ' 'prototype.') -:: +.. code-block:: python - if self.current != CharacterToken('('): - raise RuntimeError('Expected "(" in prototype.') - self.Next() # eat '('. + # prototype # ::= id '(' id\* ')' # ::= binary + LETTER number? (id, id) # ::= unary LETTER (id) def + ParsePrototype(self): precedence = None if isinstance(self.current, + IdentifierToken): kind = 'normal' function_name = self.current.name + self.Next() # eat function name. elif isinstance(self.current, + BinaryToken): kind = 'binary' self.Next() # eat 'binary'. if not + isinstance(self.current, CharacterToken): raise RuntimeError('Expected + an operator after "binary".') function_name = 'binary' + + self.current.char self.Next() # eat the operator. if + isinstance(self.current, NumberToken): if not 1 <= self.current.value <= + 100: raise RuntimeError('Invalid precedence: must be in range [1, + 100].') precedence = self.current.value self.Next() # eat the + precedence. else: raise RuntimeError('Expected function name, "unary" or + "binary" in ' 'prototype.') + + :: + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + if kind == 'binary' and len(arg_names) != 2: + raise RuntimeError('Invalid number of arguments for a binary operator.') + + return PrototypeNode(function_name, arg_names, kind != 'normal', precedence) + + - arg_names = [] - while isinstance(self.current, IdentifierToken): - arg_names.append(self.current.name) - self.Next() - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")" in prototype.') - - # Success. - self.Next() # eat ')'. - - if kind == 'binary' and len(arg_names) != 2: - raise RuntimeError('Invalid number of arguments for a binary operator.') - - return PrototypeNode(function_name, arg_names, kind != 'normal', precedence) - -{% endhighlight %} This is all fairly straightforward parsing code, and we have already seen a lot of similar code in the past. One interesting part about the @@ -178,26 +187,31 @@ The next interesting thing to add, is codegen support for these binary operators. Given our current structure, this is a simple addition of a default case for our existing binary operator node: -{% highlight python %} def CodeGen(self): left = self.left.CodeGen() -right = self.right.CodeGen() -:: +.. code-block:: python + + def CodeGen(self): left = self.left.CodeGen() + right = self.right.CodeGen() + + :: + + if self.operator == '+': + return g_llvm_builder.fadd(left, right, 'addtmp') + elif self.operator == '-': + return g_llvm_builder.fsub(left, right, 'subtmp') + elif self.operator == '*': + return g_llvm_builder.fmul(left, right, 'multmp') + elif self.operator == '<': + result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') + # Convert bool 0 or 1 to double 0.0 or 1.0. + return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') + else: + function = g_llvm_module.get_function_named('binary' + self.operator) + return g_llvm_builder.call(function, [left, right], 'binop') + + - if self.operator == '+': - return g_llvm_builder.fadd(left, right, 'addtmp') - elif self.operator == '-': - return g_llvm_builder.fsub(left, right, 'subtmp') - elif self.operator == '*': - return g_llvm_builder.fmul(left, right, 'multmp') - elif self.operator == '<': - result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') - # Convert bool 0 or 1 to double 0.0 or 1.0. - return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') - else: - function = g_llvm_module.get_function_named('binary' + self.operator) - return g_llvm_builder.call(function, [left, right], 'binop') -{% endhighlight %} As you can see above, the new code is actually really simple. It just does a lookup for the appropriate operator in the symbol table and @@ -209,32 +223,37 @@ The final piece of code we are missing, is a bit of top-level magic. We will need to make the dinary precedence map global and modify it whenever we define a new binary operator: -{% highlight python %} # The binary operator precedence chart. -g\_binop\_precedence = {} ... class FunctionNode(object): ... def -CodeGen(self): ... # Create a function object. function = -self.prototype.CodeGen() -:: +.. code-block:: python - # If this is a binary operator, install its precedence. - if self.prototype.IsBinaryOp(): - operator = self.prototype.GetOperatorName() - g_binop_precedence[operator] = self.prototype.precedence - ... - # Finish off the function. - try: - ... - except: - function.delete() - if self.prototype.IsBinaryOp(): - del g_binop_precedence[self.prototype.GetOperatorName()] - raise + # The binary operator precedence chart. + g_binop_precedence = {} ... class FunctionNode(object): ... def + CodeGen(self): ... # Create a function object. function = + self.prototype.CodeGen() + + :: + + # If this is a binary operator, install its precedence. + if self.prototype.IsBinaryOp(): + operator = self.prototype.GetOperatorName() + g_binop_precedence[operator] = self.prototype.precedence + ... + # Finish off the function. + try: + ... + except: + function.delete() + if self.prototype.IsBinaryOp(): + del g_binop_precedence[self.prototype.GetOperatorName()] + raise + + return function + + ... def main(): ... g_binop_precedence['<'] = 10 + g_binop_precedence['+'] = 20 g_binop_precedence['-'] = 20 + g_binop_precedence['\*'] = 40 ... - return function -... def main(): ... g\_binop\_precedence['<'] = 10 -g\_binop\_precedence['+'] = 20 g\_binop\_precedence['-'] = 20 -g\_binop\_precedence['\*'] = 40 ... {% endhighlight %} Basically, before CodeGening a function, if it is a user-defined operator, we register it in the precedence table. This allows the binary @@ -255,33 +274,43 @@ language, we'll need to add everything to support them. Above, we added simple support for the 'unary' keyword to the lexer. In addition to that, we need an AST node: -{% highlight python %} # Expression class for a unary operator. class -UnaryExpressionNode(ExpressionNode): -def **init**\ (self, operator, operand): self.operator = operator -self.operand = operand +.. code-block:: python + + # Expression class for a unary operator. class + UnaryExpressionNode(ExpressionNode): + + def **init**\ (self, operator, operand): self.operator = operator + self.operand = operand + + def CodeGen(self): ... + -def CodeGen(self): ... {% endhighlight %} This AST node is very simple and obvious by now. It directly mirrors the binary operator AST node, except that it only has one child. With this, we need to add the parsing logic. Parsing a unary operator is pretty simple: we'll add a new function to do it: -{% highlight python %} # unary ::= primary \| unary\_operator unary def -ParseUnary(self): # If the current token is not an operator, it must be -a primary expression. if (not isinstance(self.current, CharacterToken) -or self.current in [CharacterToken('('), CharacterToken(',')]): return -self.ParsePrimary() -:: +.. code-block:: python + + # unary ::= primary \| unary_operator unary def + ParseUnary(self): # If the current token is not an operator, it must be + a primary expression. if (not isinstance(self.current, CharacterToken) + or self.current in [CharacterToken('('), CharacterToken(',')]): return + self.ParsePrimary() + + :: + + # If this is a unary operator, read it. + operator = self.current.char + self.Next() # eat the operator. + return UnaryExpressionNode(operator, self.ParseUnary()) + + - # If this is a unary operator, read it. - operator = self.current.char - self.Next() # eat the operator. - return UnaryExpressionNode(operator, self.ParseUnary()) -{% endhighlight %} The grammar we add is pretty straightforward here. If we see a unary operator when parsing a primary operator, we eat the operator as a @@ -294,47 +323,62 @@ The problem with this function, is that we need to call ParseUnary from somewhere. To do this, we change previous callers of ParsePrimary to call ParseUnary instead: -{% highlight python %} # binoprhs ::= (binary\_operator unary)\* def -ParseBinOpRHS(self, left, left\_precedence): ... # Parse the unary -expression after the binary operator. right = self.ParseUnary() ... -# expression ::= unary binoprhs def ParseExpression(self): left = -self.ParseUnary() return self.ParseBinOpRHS(left, 0) {% endhighlight %} +.. code-block:: python + + # binoprhs ::= (binary_operator unary)\* def + ParseBinOpRHS(self, left, left_precedence): ... # Parse the unary + expression after the binary operator. right = self.ParseUnary() ... + + # expression ::= unary binoprhs def ParseExpression(self): left = + self.ParseUnary() return self.ParseBinOpRHS(left, 0) + + With these two simple changes, we are now able to parse unary operators and build the AST for them. Next up, we need to add parser support for prototypes, to parse the unary operator prototype. We extend the binary operator code above with: -{% highlight python %} # prototype # ::= id '(' id\* ')' # ::= binary -LETTER number? (id, id) # ::= unary LETTER (id) def -ParsePrototype(self): precedence = None if isinstance(self.current, -IdentifierToken): ... elif isinstance(self.current, UnaryToken): kind = -'unary' self.Next() # eat 'unary'. if not isinstance(self.current, -CharacterToken): raise RuntimeError('Expected an operator after -"unary".') function\_name = 'unary' + self.current.char self.Next() # -eat the operator. elif isinstance(self.current, BinaryToken): ... else: -raise RuntimeError('Expected function name, "unary" or "binary" in ' -'prototype.') ... if kind == 'unary' and len(arg\_names) != 1: raise -RuntimeError('Invalid number of arguments for a unary operator.') elif -kind == 'binary' and len(arg\_names) != 2: raise RuntimeError('Invalid -number of arguments for a binary operator.') -:: +.. code-block:: python + + # prototype # ::= id '(' id\* ')' # ::= binary + LETTER number? (id, id) # ::= unary LETTER (id) def + ParsePrototype(self): precedence = None if isinstance(self.current, + IdentifierToken): ... elif isinstance(self.current, UnaryToken): kind = + 'unary' self.Next() # eat 'unary'. if not isinstance(self.current, + CharacterToken): raise RuntimeError('Expected an operator after + "unary".') function_name = 'unary' + self.current.char self.Next() # + eat the operator. elif isinstance(self.current, BinaryToken): ... else: + raise RuntimeError('Expected function name, "unary" or "binary" in ' + 'prototype.') ... if kind == 'unary' and len(arg_names) != 1: raise + RuntimeError('Invalid number of arguments for a unary operator.') elif + kind == 'binary' and len(arg_names) != 2: raise RuntimeError('Invalid + number of arguments for a binary operator.') + + :: + + return PrototypeNode(function_name, arg_names, kind != 'normal', precedence) + + - return PrototypeNode(function_name, arg_names, kind != 'normal', precedence) -{% endhighlight %} As with binary operators, we name unary operators with a name that includes the operator character. This assists us at code generation time. Speaking of, the final piece we need to add is codegen support for unary operators. It looks like this: -{% highlight python %} class UnaryExpressionNode(ExpressionNode): ... -def CodeGen(self): operand = self.operand.CodeGen() function = -g\_llvm\_module.get\_function\_named('unary' + self.operator) return -g\_llvm\_builder.call(function, [operand], 'unop') {% endhighlight %} + +.. code-block:: python + + class UnaryExpressionNode(ExpressionNode): ... + def CodeGen(self): operand = self.operand.CodeGen() function = + g_llvm_module.get_function_named('unary' + self.operator) return + g_llvm_builder.call(function, [operand], 'unop') + + This code is similar to, but simpler than, the code for binary operators. It is simpler primarily because it doesn't need to handle any @@ -351,74 +395,77 @@ this, we can do a lot of interesting things, including I/O, math, and a bunch of other things. For example, we can now add a nice sequencing operator (assuming we import ``putchard`` as described in Chapter 4): -{% highlight python %} ready> def binary : 1 (x y) 0 # Low-precedence -operator that ignores operands. ... ready> extern putchard(x) ... ready> -def printd(x) putchard(x) : putchard(10) .. ready> printd(65) : -printd(66) : printd(67) A B C Evaluated to: 0.0 {% endhighlight %} + +.. code-block:: python + + ready> def binary : 1 (x y) 0 # Low-precedence + operator that ignores operands. ... ready> extern putchard(x) ... ready> + def printd(x) putchard(x) : putchard(10) .. ready> printd(65) : + printd(66) : printd(67) A B C Evaluated to: 0.0 + + We can also define a bunch of other "primitive" operations, such as: -{% highlight python %} # Logical unary not. def unary!(v) if v then 0 -else 1 -Unary negate. -============= +.. code-block:: python -def unary-(v) 0-v + # Logical unary not. def unary!(v) if v then 0 + else 1 + + # Unary negate. + def unary-(v) 0-v + + # Define > with the same precedence as <. + def binary> 10 (LHS RHS) RHS < LHS + + # Binary logical or, which does not short circuit. + def binary\| 5 (LHS RHS) if LHS then 1 else if RHS then 1 else 0 + + # Binary logical and, which does not short circuit. + def binary& 6 (LHS RHS) if !LHS then 0 else !!RHS + + # Define = with slightly lower precedence than relationals. + def binary = 9 (LHS RHS) !(LHS < RHS \| LHS > RHS) + + -Define > with the same precedence as <. -======================================= -def binary> 10 (LHS RHS) RHS < LHS - -Binary logical or, which does not short circuit. -================================================ - -def binary\| 5 (LHS RHS) if LHS then 1 else if RHS then 1 else 0 - -Binary logical and, which does not short circuit. -================================================= - -def binary& 6 (LHS RHS) if !LHS then 0 else !!RHS - -Define = with slightly lower precedence than relationals. -========================================================= - -def binary = 9 (LHS RHS) !(LHS < RHS \| LHS > RHS) - -{% endhighlight %} Given the previous if/then/else support, we can also define interesting functions for I/O. For example, the following prints out a character whose "density" reflects the value passed in: the lower the value, the denser the character: -{% highlight python %} ready> -extern putchard(char) def printdensity(d) if d > 8 then putchard(32) # ' -' else if d > 4 then putchard(46) # '.' else if d > 2 then putchard(43) -# '+' else putchard(42); # '*' ... ready> printdensity(1): -printdensity(2): printdensity(3) : printdensity(4): printdensity(5): -printdensity(9): putchard(10)*\ ++.. Evaluated to 0.000000 {% -endhighlight %} +.. code-block:: python -Based on these simple primitive operations, we can start to define more -interesting things. For example, here's a little function that solves -for the number of iterations it takes a function in the complex plane to -converge: + ready> + + extern putchard(char) def printdensity(d) if d > 8 then putchard(32) # ' + ' else if d > 4 then putchard(46) # '.' else if d > 2 then putchard(43) + # '+' else putchard(42); # '*' ... ready> printdensity(1): + printdensity(2): printdensity(3) : printdensity(4): printdensity(5): + printdensity(9): putchard(10)*\ ++.. Evaluated to 0.000000 {% + endhighlight %} + + Based on these simple primitive operations, we can start to define more + interesting things. For example, here's a little function that solves + for the number of iterations it takes a function in the complex plane to + converge: + + {% highlight python %} # determine whether the specific location + diverges. # Solve for z = z^2 + c in the complex plane. def + mandelconverger(real imag iters creal cimag) if iters > 255 \| + (real\ *real + imag*\ imag > 4) then iters else + mandelconverger(real\ *real - imag*\ imag + creal, 2\ *real*\ imag + + cimag, iters+1, creal, cimag) + + # return the number of iterations required for the iteration to escape + def mandelconverge(real imag) mandelconverger(real, imag, 0, real, imag) + -{% highlight python %} # determine whether the specific location -diverges. # Solve for z = z^2 + c in the complex plane. def -mandelconverger(real imag iters creal cimag) if iters > 255 \| -(real\ *real + imag*\ imag > 4) then iters else -mandelconverger(real\ *real - imag*\ imag + creal, 2\ *real*\ imag + -cimag, iters+1, creal, cimag) -return the number of iterations required for the iteration to escape -==================================================================== - -def mandelconverge(real imag) mandelconverger(real, imag, 0, real, imag) -{% endhighlight %} This "z = z2 + c" function is a beautiful little creature that is the basis for computation of the `Mandelbrot @@ -430,175 +477,181 @@ two-dimensional plane, you can see the Mandelbrot set. Given that we are limited to using putchard here, our amazing graphical output is limited, but we can whip together something using the density plotter above: -{% highlight python %} # compute and plot the mandlebrot set with the -specified 2 dimensional range # info. def mandelhelp(xmin xmax xstep -ymin ymax ystep) for y = ymin, y < ymax, ystep in ( (for x = xmin, x < -xmax, xstep in printdensity(mandleconverge(x,y))) : putchard(10) ) -mandel - This is a convenient helper function for ploting the mandelbrot set -============================================================================ +.. code-block:: python + + # compute and plot the mandlebrot set with the + specified 2 dimensional range # info. def mandelhelp(xmin xmax xstep + ymin ymax ystep) for y = ymin, y < ymax, ystep in ( (for x = xmin, x < + xmax, xstep in printdensity(mandleconverge(x,y))) : putchard(10) ) + + # mandel - This is a convenient helper function for ploting the mandelbrot set + # from the specified position with the specified Magnification. + def mandel(realstart imagstart realmag imagmag) mandelhelp(realstart, + realstart+realmag\ *78, realmag, imagstart, imagstart+imagmag*\ 40, + imagmag); -from the specified position with the specified Magnification. -============================================================= -def mandel(realstart imagstart realmag imagmag) mandelhelp(realstart, -realstart+realmag\ *78, realmag, imagstart, imagstart+imagmag*\ 40, -imagmag); {% endhighlight %} Given this, we can try plotting out the mandlebrot set! Lets try it out: -{% highlight bash %} ready> mandel(-2.3, -1.3, 0.05, 0.07) -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++...++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++.. -...+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++.. -..+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++. -..++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++.... -..++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++....... -.....++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++. . ... -.++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++... -++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++.... -.+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++..+++++.... -..+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++. .......... -+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*++++++++.. .. -.++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*++++++++++... -.++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*\*++++++++++.. -.++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*++++++..... -..++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*+........ -...++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*+... .... -...++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*+++++...... -..++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*++++++++++... -.++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*++++++++++... -++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*+++++++++.. .. -..++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*++++++.. .......... -+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++...+++..... -..+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++.... -..++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++... -+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++.. . ... -.++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++....... -......+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++.... -..++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++.. -..++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++.. -...+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++.. -...+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++....+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -Evaluated to 0.0 ready> mandel(-2, -1, 0.02, 0.04) -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++ -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++ -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++ -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++ -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++++ -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++....... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++.......... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++... -... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++...... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++....... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++.......... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++........... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++++......... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++...........+++++.............. -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++.... -......................... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++.... ......... -............ \*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++..... ...... -\*\*\*\*\*\*\*\*+++++++++++++++++++++++....... -\*\*\*\*\*\*+++++++++++++++++++++++++........ -\*\*\*\*+++++++++++++++++++++++++....... -***+++++++++++++++++++++++.........**\ ++++++++++++++++...........*\ ++++++++++++................ -\*++++.................... -*++++....................*\ ++++++++++++................ -**++++++++++++++++...........**\ *+++++++++++++++++++++++......... -\*\*\*\*+++++++++++++++++++++++++....... -\*\*\*\*\*\*+++++++++++++++++++++++++........ -\*\*\*\*\*\*\*\*+++++++++++++++++++++++....... -\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++..... ...... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++.... ......... -............ \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++.... -......................... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++...........+++++.............. -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++++......... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++........... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++.......... -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++....... -Evaluated to: 0.0 ready> mandel(-0.9, -1.4, 0.02, 0.03) -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++...++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++.. . -.++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++... -......++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++... -.......+++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++.... .... -..++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++...... -...++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*\*\*+++++++++++++++++++++++....... -.....++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*\*\*++++++++++++++++++++++++....... -.....+++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -\*\*\*\*+++++++++++++++++++++++++.... . -.....+++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* -**+++++++++++++++++++++++++.... -...++++++++++++++++**\ \*\*\*\*\*\*\*\*\*\*\**\ +++++++++++++++++++++++....... -....++++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\* -+++++++++++++++++++++.......... -.....++++++++++++++++\*\*\*\*\*\*\*\*\*\*\* -++++++++++++++++++............. -.......+++++++++++++++\*\*\*\*\*\*\*\*\*\* -+++++++++++++++................ -............++++++++++\*\*\*\*\*\*\*\*\*\* -+++++++++++++................. .................+++++\*\*\*\*\*\*\*\*\* -+++++++++++... .... .......... .+++++\*\*\*\*\*\*\*\* ++++++++++..... -........ ...+++++\*\*\*\*\*\*\* ++++++++...... ..++++++\*\*\*\*\*\* -+++++++........ ..+++++\*\*\*\*\*\* +++++.......... ..++++++\*\*\*\*\* -++++.......... ....++++++\*\*\*\*\* ++.......... ....+++++++\*\*\*\* -.......... ......+++++++\ **\* .......... .....+++++++**\ \* .......... -.....++++++\ **\* ......... .+++++++** ........ .+++++++\ *\* ...... -...+++++++* . ....++++++++\* ...++++++++\* ..+++++++++ ..+++++++++ -Evaluated to: 0.0 ready> ^C {% endhighlight %} +.. code-block:: bash + + ready> mandel(-2.3, -1.3, 0.05, 0.07) + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++...++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++.. + ...+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++.. + ..+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++. + ..++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++.... + ..++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++....... + .....++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++. . ... + .++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++... + ++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++.... + .+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++..+++++.... + ..+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++. .......... + +++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*++++++++.. .. + .++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*++++++++++... + .++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*\*++++++++++.. + .++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*++++++..... + ..++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*+........ + ...++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*+... .... + ...++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*+++++...... + ..++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*++++++++++... + .++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*++++++++++... + ++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*+++++++++.. .. + ..++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*++++++.. .......... + +++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++...+++..... + ..+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++.... + ..++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++... + +++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++.. . ... + .++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++....... + ......+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++.... + ..++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++.. + ..++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++.. + ...+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++.. + ...+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++....+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + Evaluated to 0.0 ready> mandel(-2, -1, 0.02, 0.04) + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++ + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++ + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++ + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++ + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++++ + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++....... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++.......... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++... + ... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++...... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++....... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++.......... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++........... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++++......... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++...........+++++.............. + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++.... + ......................... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++.... ......... + ............ \*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++..... ...... + \*\*\*\*\*\*\*\*+++++++++++++++++++++++....... + \*\*\*\*\*\*+++++++++++++++++++++++++........ + \*\*\*\*+++++++++++++++++++++++++....... + ***+++++++++++++++++++++++.........**\ ++++++++++++++++...........*\ ++++++++++++................ + \*++++.................... + + *++++....................*\ ++++++++++++................ + **++++++++++++++++...........**\ *+++++++++++++++++++++++......... + \*\*\*\*+++++++++++++++++++++++++....... + \*\*\*\*\*\*+++++++++++++++++++++++++........ + \*\*\*\*\*\*\*\*+++++++++++++++++++++++....... + \*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++..... ...... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++.... ......... + ............ \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++.... + ......................... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++...........+++++.............. + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++++......... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++........... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++.......... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++....... + Evaluated to: 0.0 ready> mandel(-0.9, -1.4, 0.02, 0.03) + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++...++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++.. . + .++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++... + ......++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++... + .......+++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++.... .... + ..++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++...... + ...++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*+++++++++++++++++++++++....... + .....++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*++++++++++++++++++++++++....... + .....+++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*+++++++++++++++++++++++++.... . + .....+++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + **+++++++++++++++++++++++++.... + ...++++++++++++++++**\ \*\*\*\*\*\*\*\*\*\*\**\ +++++++++++++++++++++++....... + ....++++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\* + +++++++++++++++++++++.......... + .....++++++++++++++++\*\*\*\*\*\*\*\*\*\*\* + ++++++++++++++++++............. + .......+++++++++++++++\*\*\*\*\*\*\*\*\*\* + +++++++++++++++................ + ............++++++++++\*\*\*\*\*\*\*\*\*\* + +++++++++++++................. .................+++++\*\*\*\*\*\*\*\*\* + +++++++++++... .... .......... .+++++\*\*\*\*\*\*\*\* ++++++++++..... + ........ ...+++++\*\*\*\*\*\*\* ++++++++...... ..++++++\*\*\*\*\*\* + +++++++........ ..+++++\*\*\*\*\*\* +++++.......... ..++++++\*\*\*\*\* + ++++.......... ....++++++\*\*\*\*\* ++.......... ....+++++++\*\*\*\* + .......... ......+++++++\ **\* .......... .....+++++++**\ \* .......... + .....++++++\ **\* ......... .+++++++** ........ .+++++++\ *\* ...... + ...+++++++* . ....++++++++\* ...++++++++\* ..+++++++++ ..+++++++++ + Evaluated to: 0.0 ready> ^C + + At this point, you may be starting to realize that Kaleidoscope is a real and powerful language. It may not be self-similar :), but it can be @@ -627,722 +680,678 @@ Full Code Listing # {#code} Here is the complete code listing for our running example, enhanced with the if/then/else and for expressions: -{% highlight python %} #!/usr/bin/env python -import re from llvm.core import Module, Constant, Type, Function, -Builder from llvm.ee import ExecutionEngine, TargetData from llvm.passes -import FunctionPassManager - -from llvm.core import FCMP\_ULT, FCMP\_ONE from llvm.passes import -(PASS\_INSTRUCTION\_COMBINING, PASS\_REASSOCIATE, PASS\_GVN, -PASS\_CFG\_SIMPLIFICATION) - -Globals -------- - -The LLVM module, which holds all the IR code. -============================================= - -g\_llvm\_module = Module.new('my cool jit') - -The LLVM instruction builder. Created whenever a new function is entered. -========================================================================= - -g\_llvm\_builder = None - -A dictionary that keeps track of which values are defined in the current scope -============================================================================== - -and what their LLVM representation is. -====================================== - -g\_named\_values = {} - -The function optimization passes manager. -========================================= - -g\_llvm\_pass\_manager = FunctionPassManager.new(g\_llvm\_module) - -The LLVM execution engine. -========================== - -g\_llvm\_executor = ExecutionEngine.new(g\_llvm\_module) - -The binary operator precedence chart. -===================================== - -g\_binop\_precedence = {} - -Lexer ------ - -The lexer yields one of these types for each token. -=================================================== - -class EOFToken(object): pass class DefToken(object): pass class -ExternToken(object): pass class IfToken(object): pass class -ThenToken(object): pass class ElseToken(object): pass class -ForToken(object): pass class InToken(object): pass class -BinaryToken(object): pass class UnaryToken(object): pass - -class IdentifierToken(object): def **init**\ (self, name): self.name = -name - -class NumberToken(object): def **init**\ (self, value): self.value = -value - -class CharacterToken(object): def **init**\ (self, char): self.char = -char def **eq**\ (self, other): return isinstance(other, CharacterToken) -and self.char == other.char def **ne**\ (self, other): return not self -== other - -Regular expressions that tokens and comments of our language. -============================================================= - -REGEX\_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX\_IDENTIFIER = -re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX\_COMMENT = re.compile('#.*') - -def Tokenize(string): while string: # Skip whitespace. if -string[0].isspace(): string = string[1:] continue - -:: - - # Run regexes. - comment_match = REGEX_COMMENT.match(string) - number_match = REGEX_NUMBER.match(string) - identifier_match = REGEX_IDENTIFIER.match(string) - - # Check if any of the regexes matched and yield the appropriate result. - if comment_match: - comment = comment_match.group(0) - string = string[len(comment):] - elif number_match: - number = number_match.group(0) - yield NumberToken(float(number)) - string = string[len(number):] - elif identifier_match: - identifier = identifier_match.group(0) - # Check if we matched a keyword. - if identifier == 'def': - yield DefToken() - elif identifier == 'extern': - yield ExternToken() - elif identifier == 'if': - yield IfToken() - elif identifier == 'then': - yield ThenToken() - elif identifier == 'else': - yield ElseToken() - elif identifier == 'for': - yield ForToken() - elif identifier == 'in': - yield InToken() - elif identifier == 'binary': - yield BinaryToken() - elif identifier == 'unary': - yield UnaryToken() - else: - yield IdentifierToken(identifier) - string = string[len(identifier):] - else: - # Yield the ASCII value of the unknown character. - yield CharacterToken(string[0]) - string = string[1:] - -yield EOFToken() - -Abstract Syntax Tree (aka Parse Tree) -------------------------------------- - -Base class for all expression nodes. -==================================== - -class ExpressionNode(object): pass - -Expression class for numeric literals like "1.0". -================================================= - -class NumberExpressionNode(ExpressionNode): - -def **init**\ (self, value): self.value = value - -def CodeGen(self): return Constant.real(Type.double(), self.value) - -Expression class for referencing a variable, like "a". -====================================================== - -class VariableExpressionNode(ExpressionNode): - -def **init**\ (self, name): self.name = name - -def CodeGen(self): if self.name in g\_named\_values: return -g\_named\_values[self.name] else: raise RuntimeError('Unknown variable -name: ' + self.name) - -Expression class for a binary operator. -======================================= - -class BinaryOperatorExpressionNode(ExpressionNode): - -def **init**\ (self, operator, left, right): self.operator = operator -self.left = left self.right = right - -def CodeGen(self): left = self.left.CodeGen() right = -self.right.CodeGen() - -:: - - if self.operator == '+': - return g_llvm_builder.fadd(left, right, 'addtmp') - elif self.operator == '-': - return g_llvm_builder.fsub(left, right, 'subtmp') - elif self.operator == '*': - return g_llvm_builder.fmul(left, right, 'multmp') - elif self.operator == '<': - result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') - # Convert bool 0 or 1 to double 0.0 or 1.0. - return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') - else: - function = g_llvm_module.get_function_named('binary' + self.operator) - return g_llvm_builder.call(function, [left, right], 'binop') - -Expression class for function calls. -==================================== - -class CallExpressionNode(ExpressionNode): - -def **init**\ (self, callee, args): self.callee = callee self.args = -args - -def CodeGen(self): # Look up the name in the global module table. callee -= g\_llvm\_module.get\_function\_named(self.callee) - -:: - - # Check for argument mismatch error. - if len(callee.args) != len(self.args): - raise RuntimeError('Incorrect number of arguments passed.') - - arg_values = [i.CodeGen() for i in self.args] - - return g_llvm_builder.call(callee, arg_values, 'calltmp') - -Expression class for if/then/else. -================================== - -class IfExpressionNode(ExpressionNode): - -def **init**\ (self, condition, then\_branch, else\_branch): -self.condition = condition self.then\_branch = then\_branch -self.else\_branch = else\_branch - -def CodeGen(self): condition = self.condition.CodeGen() - -:: - - # Convert condition to a bool by comparing equal to 0.0. - condition_bool = g_llvm_builder.fcmp( - FCMP_ONE, condition, Constant.real(Type.double(), 0), 'ifcond') - - function = g_llvm_builder.basic_block.function - - # Create blocks for the then and else cases. Insert the 'then' block at the - # end of the function. - then_block = function.append_basic_block('then') - else_block = function.append_basic_block('else') - merge_block = function.append_basic_block('ifcond') - - g_llvm_builder.cbranch(condition_bool, then_block, else_block) - - # Emit then value. - g_llvm_builder.position_at_end(then_block) - then_value = self.then_branch.CodeGen() - g_llvm_builder.branch(merge_block) - - # Codegen of 'Then' can change the current block; update then_block for the - # PHI node. - then_block = g_llvm_builder.basic_block - - # Emit else block. - g_llvm_builder.position_at_end(else_block) - else_value = self.else_branch.CodeGen() - g_llvm_builder.branch(merge_block) - - # Codegen of 'Else' can change the current block, update else_block for the - # PHI node. - else_block = g_llvm_builder.basic_block - - # Emit merge block. - g_llvm_builder.position_at_end(merge_block) - phi = g_llvm_builder.phi(Type.double(), 'iftmp') - phi.add_incoming(then_value, then_block) - phi.add_incoming(else_value, else_block) - - return phi - -Expression class for for/in. -============================ - -class ForExpressionNode(ExpressionNode): - -def **init**\ (self, loop\_variable, start, end, step, body): -self.loop\_variable = loop\_variable self.start = start self.end = end -self.step = step self.body = body - -def CodeGen(self): # Output this as: # ... # start = startexpr # goto -loop # loop: # variable = phi [start, loopheader], [nextvariable, -loopend] # ... # bodyexpr # ... # loopend: # step = stepexpr # -nextvariable = variable + step # endcond = endexpr # br endcond, loop, -endloop # outloop: - -:: - - # Emit the start code first, without 'variable' in scope. - start_value = self.start.CodeGen() - - # Make the new basic block for the loop header, inserting after current - # block. - function = g_llvm_builder.basic_block.function - pre_header_block = g_llvm_builder.basic_block - loop_block = function.append_basic_block('loop') - - # Insert an explicit fallthrough from the current block to the loop_block. - g_llvm_builder.branch(loop_block) - - # Start insertion in loop_block. - g_llvm_builder.position_at_end(loop_block) - - # Start the PHI node with an entry for start. - variable_phi = g_llvm_builder.phi(Type.double(), self.loop_variable) - variable_phi.add_incoming(start_value, pre_header_block) - - # Within the loop, the variable is defined equal to the PHI node. If it - # shadows an existing variable, we have to restore it, so save it now. - old_value = g_named_values.get(self.loop_variable, None) - g_named_values[self.loop_variable] = variable_phi - - # Emit the body of the loop. This, like any other expr, can change the - # current BB. Note that we ignore the value computed by the body. - self.body.CodeGen() - - # Emit the step value. - if self.step: - step_value = self.step.CodeGen() - else: - # If not specified, use 1.0. - step_value = Constant.real(Type.double(), 1) - - next_value = g_llvm_builder.fadd(variable_phi, step_value, 'next') - - # Compute the end condition and convert it to a bool by comparing to 0.0. - end_condition = self.end.CodeGen() - end_condition_bool = g_llvm_builder.fcmp( - FCMP_ONE, end_condition, Constant.real(Type.double(), 0), 'loopcond') - - # Create the "after loop" block and insert it. - loop_end_block = g_llvm_builder.basic_block - after_block = function.append_basic_block('afterloop') - - # Insert the conditional branch into the end of loop_end_block. - g_llvm_builder.cbranch(end_condition_bool, loop_block, after_block) - - # Any new code will be inserted in after_block. - g_llvm_builder.position_at_end(after_block) - - # Add a new entry to the PHI node for the backedge. - variable_phi.add_incoming(next_value, loop_end_block) - - # Restore the unshadowed variable. - if old_value: - g_named_values[self.loop_variable] = old_value - else: - del g_named_values[self.loop_variable] - - # for expr always returns 0.0. - return Constant.real(Type.double(), 0) - -Expression class for a unary operator. -====================================== - -class UnaryExpressionNode(ExpressionNode): - -def **init**\ (self, operator, operand): self.operator = operator -self.operand = operand - -def CodeGen(self): operand = self.operand.CodeGen() function = -g\_llvm\_module.get\_function\_named('unary' + self.operator) return -g\_llvm\_builder.call(function, [operand], 'unop') - -This class represents the "prototype" for a function, which captures its name, -============================================================================== - -and its argument names (thus implicitly the number of arguments the function -============================================================================ - -takes), as well as if it is an operator. -======================================== - -class PrototypeNode(object): - -def **init**\ (self, name, args, is\_operator=False, precedence=0): -self.name = name self.args = args self.is\_operator = is\_operator -self.precedence = precedence - -def IsBinaryOp(self): return self.is\_operator and len(self.args) == 2 - -def GetOperatorName(self): assert self.is\_operator return self.name[-1] - -def CodeGen(self): # Make the function type, eg. double(double,double). -funct\_type = Type.function( Type.double(), [Type.double()] \* -len(self.args), False) - -:: - - function = Function.new(g_llvm_module, funct_type, self.name) - - # If the name conflicted, there was already something with the same name. - # If it has a body, don't allow redefinition or reextern. - if function.name != self.name: - function.delete() - function = g_llvm_module.get_function_named(self.name) - - # If the function already has a body, reject this. - if not function.is_declaration: - raise RuntimeError('Redefinition of function.') - - # If the function took a different number of args, reject. - if len(function.args) != len(self.args): - raise RuntimeError('Redeclaration of a function with different number ' - 'of args.') - - # Set names for all arguments and add them to the variables symbol table. - for arg, arg_name in zip(function.args, self.args): - arg.name = arg_name - # Add arguments to variable symbol table. - g_named_values[arg_name] = arg - - return function - -This class represents a function definition itself. -=================================================== - -class FunctionNode(object): - -def **init**\ (self, prototype, body): self.prototype = prototype -self.body = body - -def CodeGen(self): # Clear scope. g\_named\_values.clear() - -:: - - # Create a function object. - function = self.prototype.CodeGen() - - # If this is a binary operator, install its precedence. - if self.prototype.IsBinaryOp(): - operator = self.prototype.GetOperatorName() - g_binop_precedence[operator] = self.prototype.precedence - - # Create a new basic block to start insertion into. - block = function.append_basic_block('entry') - global g_llvm_builder - g_llvm_builder = Builder.new(block) - - # Finish off the function. - try: - return_value = self.body.CodeGen() - g_llvm_builder.ret(return_value) - - # Validate the generated code, checking for consistency. - function.verify() - - # Optimize the function. - g_llvm_pass_manager.run(function) - except: - function.delete() - if self.prototype.IsBinaryOp(): - del g_binop_precedence[self.prototype.GetOperatorName()] - raise - - return function - -Parser ------- - -class Parser(object): - -def **init**\ (self, tokens): self.tokens = tokens self.Next() - -# Provide a simple token buffer. Parser.current is the current token the -# parser is looking at. Parser.Next() reads another token from the lexer -and # updates Parser.current with its results. def Next(self): -self.current = self.tokens.next() - -# Gets the precedence of the current token, or -1 if the token is not a -binary # operator. def GetCurrentTokenPrecedence(self): if -isinstance(self.current, CharacterToken): return -g\_binop\_precedence.get(self.current.char, -1) else: return -1 - -# identifierexpr ::= identifier \| identifier '(' expression\* ')' def -ParseIdentifierExpr(self): identifier\_name = self.current.name -self.Next() # eat identifier. - -:: - - if self.current != CharacterToken('('): # Simple variable reference. - return VariableExpressionNode(identifier_name) - - # Call. - self.Next() # eat '('. - args = [] - if self.current != CharacterToken(')'): - while True: - args.append(self.ParseExpression()) - if self.current == CharacterToken(')'): - break - elif self.current != CharacterToken(','): - raise RuntimeError('Expected ")" or "," in argument list.') - self.Next() - - self.Next() # eat ')'. - return CallExpressionNode(identifier_name, args) - -# numberexpr ::= number def ParseNumberExpr(self): result = -NumberExpressionNode(self.current.value) self.Next() # consume the -number. return result - -# parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() -# eat '('. - -:: - - contents = self.ParseExpression() - - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")".') - self.Next() # eat ')'. - - return contents - -# ifexpr ::= 'if' expression 'then' expression 'else' expression def -ParseIfExpr(self): self.Next() # eat the if. - -:: - - # condition. - condition = self.ParseExpression() - - if not isinstance(self.current, ThenToken): - raise RuntimeError('Expected "then".') - self.Next() # eat the then. - - then_branch = self.ParseExpression() - - if not isinstance(self.current, ElseToken): - raise RuntimeError('Expected "else".') - self.Next() # eat the else. - - else_branch = self.ParseExpression() - - return IfExpressionNode(condition, then_branch, else_branch) - -# forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' -expression def ParseForExpr(self): self.Next() # eat the for. - -:: - - if not isinstance(self.current, IdentifierToken): - raise RuntimeError('Expected identifier after for.') - - loop_variable = self.current.name - self.Next() # eat the identifier. - - if self.current != CharacterToken('='): - raise RuntimeError('Expected "=" after for variable.') - self.Next() # eat the '='. - - start = self.ParseExpression() - - if self.current != CharacterToken(','): - raise RuntimeError('Expected "," after for start value.') - self.Next() # eat the ','. - - end = self.ParseExpression() - - # The step value is optional. - if self.current == CharacterToken(','): - self.Next() # eat the ','. - step = self.ParseExpression() - else: - step = None - - if not isinstance(self.current, InToken): - raise RuntimeError('Expected "in" after for variable specification.') - self.Next() # eat 'in'. - - body = self.ParseExpression() - - return ForExpressionNode(loop_variable, start, end, step, body) - -# primary ::= identifierexpr \| numberexpr \| parenexpr \| ifexpr \| -forexpr def ParsePrimary(self): if isinstance(self.current, -IdentifierToken): return self.ParseIdentifierExpr() elif -isinstance(self.current, NumberToken): return self.ParseNumberExpr() -elif isinstance(self.current, IfToken): return self.ParseIfExpr() elif -isinstance(self.current, ForToken): return self.ParseForExpr() elif -self.current == CharacterToken('('): return self.ParseParenExpr() else: -raise RuntimeError('Unknown token when expecting an expression.') - -# unary ::= primary \| unary\_operator unary def ParseUnary(self): # If -the current token is not an operator, it must be a primary expression. -if (not isinstance(self.current, CharacterToken) or self.current in -[CharacterToken('('), CharacterToken(',')]): return self.ParsePrimary() - -:: - - # If this is a unary operator, read it. - operator = self.current.char - self.Next() # eat the operator. - return UnaryExpressionNode(operator, self.ParseUnary()) - -# binoprhs ::= (binary\_operator unary)\* def ParseBinOpRHS(self, left, -left\_precedence): # If this is a binary operator, find its precedence. -while True: precedence = self.GetCurrentTokenPrecedence() - -:: - - # If this is a binary operator that binds at least as tightly as the - # current one, consume it; otherwise we are done. - if precedence < left_precedence: - return left - - binary_operator = self.current.char - self.Next() # eat the operator. - - # Parse the unary expression after the binary operator. - right = self.ParseUnary() - - # If binary_operator binds less tightly with right than the operator after - # right, let the pending operator take right as its left. - next_precedence = self.GetCurrentTokenPrecedence() - if precedence < next_precedence: - right = self.ParseBinOpRHS(right, precedence + 1) - - # Merge left/right. - left = BinaryOperatorExpressionNode(binary_operator, left, right) - -# expression ::= unary binoprhs def ParseExpression(self): left = -self.ParseUnary() return self.ParseBinOpRHS(left, 0) - -# prototype # ::= id '(' id\* ')' # ::= binary LETTER number? (id, id) # -::= unary LETTER (id) def ParsePrototype(self): precedence = None if -isinstance(self.current, IdentifierToken): kind = 'normal' -function\_name = self.current.name self.Next() # eat function name. elif -isinstance(self.current, UnaryToken): kind = 'unary' self.Next() # eat -'unary'. if not isinstance(self.current, CharacterToken): raise -RuntimeError('Expected an operator after "unary".') function\_name = -'unary' + self.current.char self.Next() # eat the operator. elif -isinstance(self.current, BinaryToken): kind = 'binary' self.Next() # eat -'binary'. if not isinstance(self.current, CharacterToken): raise -RuntimeError('Expected an operator after "binary".') function\_name = -'binary' + self.current.char self.Next() # eat the operator. if -isinstance(self.current, NumberToken): if not 1 <= self.current.value <= -100: raise RuntimeError('Invalid precedence: must be in range [1, -100].') precedence = self.current.value self.Next() # eat the -precedence. else: raise RuntimeError('Expected function name, "unary" or -"binary" in ' 'prototype.') - -:: - - if self.current != CharacterToken('('): - raise RuntimeError('Expected "(" in prototype.') - self.Next() # eat '('. - - arg_names = [] - while isinstance(self.current, IdentifierToken): - arg_names.append(self.current.name) - self.Next() - - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")" in prototype.') - - # Success. - self.Next() # eat ')'. - - if kind == 'unary' and len(arg_names) != 1: - raise RuntimeError('Invalid number of arguments for a unary operator.') - elif kind == 'binary' and len(arg_names) != 2: - raise RuntimeError('Invalid number of arguments for a binary operator.') - - return PrototypeNode(function_name, arg_names, kind != 'normal', precedence) - -# definition ::= 'def' prototype expression def ParseDefinition(self): -self.Next() # eat def. proto = self.ParsePrototype() body = -self.ParseExpression() return FunctionNode(proto, body) - -# toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = -PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) - -# external ::= 'extern' prototype def ParseExtern(self): self.Next() # -eat extern. return self.ParsePrototype() - -# Top-Level parsing def HandleDefinition(self): -self.Handle(self.ParseDefinition, 'Read a function definition:') - -def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') - -def HandleTopLevelExpression(self): try: function = -self.ParseTopLevelExpr().CodeGen() result = -g\_llvm\_executor.run\_function(function, []) print 'Evaluated to:', -result.as\_real(Type.double()) except Exception, e: print 'Error:', e -try: self.Next() # Skip for error recovery. except: pass - -def Handle(self, function, message): try: print message, -function().CodeGen() except Exception, e: print 'Error:', e try: -self.Next() # Skip for error recovery. except: pass - -Main driver code. ------------------ - -def main(): # Set up the optimizer pipeline. Start with registering info -about how the # target lays out data structures. -g\_llvm\_pass\_manager.add(g\_llvm\_executor.target\_data) # Do simple -"peephole" optimizations and bit-twiddling optzns. -g\_llvm\_pass\_manager.add(PASS\_INSTRUCTION\_COMBINING) # Reassociate -expressions. g\_llvm\_pass\_manager.add(PASS\_REASSOCIATE) # Eliminate -Common SubExpressions. g\_llvm\_pass\_manager.add(PASS\_GVN) # Simplify -the control flow graph (deleting unreachable blocks, etc). -g\_llvm\_pass\_manager.add(PASS\_CFG\_SIMPLIFICATION) - -g\_llvm\_pass\_manager.initialize() - -# Install standard binary operators. # 1 is lowest possible precedence. -40 is the highest. g\_binop\_precedence['<'] = 10 -g\_binop\_precedence['+'] = 20 g\_binop\_precedence['-'] = 20 -g\_binop\_precedence['\*'] = 40 - -# Run the main "interpreter loop". while True: print 'ready>', try: raw -= raw\_input() except KeyboardInterrupt: break - -:: - - parser = Parser(Tokenize(raw)) - while True: - # top ::= definition | external | expression | EOF - if isinstance(parser.current, EOFToken): - break - if isinstance(parser.current, DefToken): - parser.HandleDefinition() - elif isinstance(parser.current, ExternToken): - parser.HandleExtern() - else: - parser.HandleTopLevelExpression() - -# Print out all of the generated code. print '', g\_llvm\_module - -if **name** == '**main**\ ': main() {% endhighlight %} - --------------- - -**`Next: Extending the language: mutable variables / SSA -construction `_** +.. code-block:: python + + #!/usr/bin/env python + + import re from llvm.core import Module, Constant, Type, Function, + Builder from llvm.ee import ExecutionEngine, TargetData from llvm.passes + import FunctionPassManager + + from llvm.core import FCMP_ULT, FCMP_ONE from llvm.passes import + (PASS_INSTRUCTION_COMBINING, PASS_REASSOCIATE, PASS_GVN, + PASS_CFG_SIMPLIFICATION) + + Globals + ------- + + # The LLVM module, which holds all the IR code. + g_llvm_module = Module.new('my cool jit') + + # The LLVM instruction builder. Created whenever a new function is entered. + g_llvm_builder = None + + # A dictionary that keeps track of which values are defined in the current scope + # and what their LLVM representation is. + g_named_values = {} + + # The function optimization passes manager. + g_llvm_pass_manager = FunctionPassManager.new(g_llvm_module) + + # The LLVM execution engine. + g_llvm_executor = ExecutionEngine.new(g_llvm_module) + + # The binary operator precedence chart. + g_binop_precedence = {} + + Lexer + ----- + + # The lexer yields one of these types for each token. + class EOFToken(object): pass class DefToken(object): pass class + ExternToken(object): pass class IfToken(object): pass class + ThenToken(object): pass class ElseToken(object): pass class + ForToken(object): pass class InToken(object): pass class + BinaryToken(object): pass class UnaryToken(object): pass + + class IdentifierToken(object): def **init**\ (self, name): self.name = + name + + class NumberToken(object): def **init**\ (self, value): self.value = + value + + class CharacterToken(object): def **init**\ (self, char): self.char = + char def **eq**\ (self, other): return isinstance(other, CharacterToken) + and self.char == other.char def **ne**\ (self, other): return not self + == other + + # Regular expressions that tokens and comments of our language. + REGEX_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX_IDENTIFIER = + re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX_COMMENT = re.compile('#.*') + + def Tokenize(string): while string: # Skip whitespace. if + string[0].isspace(): string = string[1:] continue + + :: + + # Run regexes. + comment_match = REGEX_COMMENT.match(string) + number_match = REGEX_NUMBER.match(string) + identifier_match = REGEX_IDENTIFIER.match(string) + + # Check if any of the regexes matched and yield the appropriate result. + if comment_match: + comment = comment_match.group(0) + string = string[len(comment):] + elif number_match: + number = number_match.group(0) + yield NumberToken(float(number)) + string = string[len(number):] + elif identifier_match: + identifier = identifier_match.group(0) + # Check if we matched a keyword. + if identifier == 'def': + yield DefToken() + elif identifier == 'extern': + yield ExternToken() + elif identifier == 'if': + yield IfToken() + elif identifier == 'then': + yield ThenToken() + elif identifier == 'else': + yield ElseToken() + elif identifier == 'for': + yield ForToken() + elif identifier == 'in': + yield InToken() + elif identifier == 'binary': + yield BinaryToken() + elif identifier == 'unary': + yield UnaryToken() + else: + yield IdentifierToken(identifier) + string = string[len(identifier):] + else: + # Yield the ASCII value of the unknown character. + yield CharacterToken(string[0]) + string = string[1:] + + yield EOFToken() + + Abstract Syntax Tree (aka Parse Tree) + ------------------------------------- + + # Base class for all expression nodes. + class ExpressionNode(object): pass + + # Expression class for numeric literals like "1.0". + class NumberExpressionNode(ExpressionNode): + + def **init**\ (self, value): self.value = value + + def CodeGen(self): return Constant.real(Type.double(), self.value) + + # Expression class for referencing a variable, like "a". + class VariableExpressionNode(ExpressionNode): + + def **init**\ (self, name): self.name = name + + def CodeGen(self): if self.name in g_named_values: return + g_named_values[self.name] else: raise RuntimeError('Unknown variable + name: ' + self.name) + + # Expression class for a binary operator. + class BinaryOperatorExpressionNode(ExpressionNode): + + def **init**\ (self, operator, left, right): self.operator = operator + self.left = left self.right = right + + def CodeGen(self): left = self.left.CodeGen() right = + self.right.CodeGen() + + :: + + if self.operator == '+': + return g_llvm_builder.fadd(left, right, 'addtmp') + elif self.operator == '-': + return g_llvm_builder.fsub(left, right, 'subtmp') + elif self.operator == '*': + return g_llvm_builder.fmul(left, right, 'multmp') + elif self.operator == '<': + result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') + # Convert bool 0 or 1 to double 0.0 or 1.0. + return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') + else: + function = g_llvm_module.get_function_named('binary' + self.operator) + return g_llvm_builder.call(function, [left, right], 'binop') + + # Expression class for function calls. + class CallExpressionNode(ExpressionNode): + + def **init**\ (self, callee, args): self.callee = callee self.args = + args + + def CodeGen(self): # Look up the name in the global module table. callee + = g_llvm_module.get_function_named(self.callee) + + :: + + # Check for argument mismatch error. + if len(callee.args) != len(self.args): + raise RuntimeError('Incorrect number of arguments passed.') + + arg_values = [i.CodeGen() for i in self.args] + + return g_llvm_builder.call(callee, arg_values, 'calltmp') + + # Expression class for if/then/else. + class IfExpressionNode(ExpressionNode): + + def **init**\ (self, condition, then_branch, else_branch): + self.condition = condition self.then_branch = then_branch + self.else_branch = else_branch + + def CodeGen(self): condition = self.condition.CodeGen() + + :: + + # Convert condition to a bool by comparing equal to 0.0. + condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, condition, Constant.real(Type.double(), 0), 'ifcond') + + function = g_llvm_builder.basic_block.function + + # Create blocks for the then and else cases. Insert the 'then' block at the + # end of the function. + then_block = function.append_basic_block('then') + else_block = function.append_basic_block('else') + merge_block = function.append_basic_block('ifcond') + + g_llvm_builder.cbranch(condition_bool, then_block, else_block) + + # Emit then value. + g_llvm_builder.position_at_end(then_block) + then_value = self.then_branch.CodeGen() + g_llvm_builder.branch(merge_block) + + # Codegen of 'Then' can change the current block; update then_block for the + # PHI node. + then_block = g_llvm_builder.basic_block + + # Emit else block. + g_llvm_builder.position_at_end(else_block) + else_value = self.else_branch.CodeGen() + g_llvm_builder.branch(merge_block) + + # Codegen of 'Else' can change the current block, update else_block for the + # PHI node. + else_block = g_llvm_builder.basic_block + + # Emit merge block. + g_llvm_builder.position_at_end(merge_block) + phi = g_llvm_builder.phi(Type.double(), 'iftmp') + phi.add_incoming(then_value, then_block) + phi.add_incoming(else_value, else_block) + + return phi + + # Expression class for for/in. + class ForExpressionNode(ExpressionNode): + + def **init**\ (self, loop_variable, start, end, step, body): + self.loop_variable = loop_variable self.start = start self.end = end + self.step = step self.body = body + + def CodeGen(self): # Output this as: # ... # start = startexpr # goto + loop # loop: # variable = phi [start, loopheader], [nextvariable, + loopend] # ... # bodyexpr # ... # loopend: # step = stepexpr # + nextvariable = variable + step # endcond = endexpr # br endcond, loop, + endloop # outloop: + + :: + + # Emit the start code first, without 'variable' in scope. + start_value = self.start.CodeGen() + + # Make the new basic block for the loop header, inserting after current + # block. + function = g_llvm_builder.basic_block.function + pre_header_block = g_llvm_builder.basic_block + loop_block = function.append_basic_block('loop') + + # Insert an explicit fallthrough from the current block to the loop_block. + g_llvm_builder.branch(loop_block) + + # Start insertion in loop_block. + g_llvm_builder.position_at_end(loop_block) + + # Start the PHI node with an entry for start. + variable_phi = g_llvm_builder.phi(Type.double(), self.loop_variable) + variable_phi.add_incoming(start_value, pre_header_block) + + # Within the loop, the variable is defined equal to the PHI node. If it + # shadows an existing variable, we have to restore it, so save it now. + old_value = g_named_values.get(self.loop_variable, None) + g_named_values[self.loop_variable] = variable_phi + + # Emit the body of the loop. This, like any other expr, can change the + # current BB. Note that we ignore the value computed by the body. + self.body.CodeGen() + + # Emit the step value. + if self.step: + step_value = self.step.CodeGen() + else: + # If not specified, use 1.0. + step_value = Constant.real(Type.double(), 1) + + next_value = g_llvm_builder.fadd(variable_phi, step_value, 'next') + + # Compute the end condition and convert it to a bool by comparing to 0.0. + end_condition = self.end.CodeGen() + end_condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, end_condition, Constant.real(Type.double(), 0), 'loopcond') + + # Create the "after loop" block and insert it. + loop_end_block = g_llvm_builder.basic_block + after_block = function.append_basic_block('afterloop') + + # Insert the conditional branch into the end of loop_end_block. + g_llvm_builder.cbranch(end_condition_bool, loop_block, after_block) + + # Any new code will be inserted in after_block. + g_llvm_builder.position_at_end(after_block) + + # Add a new entry to the PHI node for the backedge. + variable_phi.add_incoming(next_value, loop_end_block) + + # Restore the unshadowed variable. + if old_value: + g_named_values[self.loop_variable] = old_value + else: + del g_named_values[self.loop_variable] + + # for expr always returns 0.0. + return Constant.real(Type.double(), 0) + + # Expression class for a unary operator. + class UnaryExpressionNode(ExpressionNode): + + def **init**\ (self, operator, operand): self.operator = operator + self.operand = operand + + def CodeGen(self): operand = self.operand.CodeGen() function = + g_llvm_module.get_function_named('unary' + self.operator) return + g_llvm_builder.call(function, [operand], 'unop') + + # This class represents the "prototype" for a function, which captures its name, + # and its argument names (thus implicitly the number of arguments the function + # takes), as well as if it is an operator. + class PrototypeNode(object): + + def **init**\ (self, name, args, is_operator=False, precedence=0): + self.name = name self.args = args self.is_operator = is_operator + self.precedence = precedence + + def IsBinaryOp(self): return self.is_operator and len(self.args) == 2 + + def GetOperatorName(self): assert self.is_operator return self.name[-1] + + def CodeGen(self): # Make the function type, eg. double(double,double). + funct_type = Type.function( Type.double(), [Type.double()] \* + len(self.args), False) + + :: + + function = Function.new(g_llvm_module, funct_type, self.name) + + # If the name conflicted, there was already something with the same name. + # If it has a body, don't allow redefinition or reextern. + if function.name != self.name: + function.delete() + function = g_llvm_module.get_function_named(self.name) + + # If the function already has a body, reject this. + if not function.is_declaration: + raise RuntimeError('Redefinition of function.') + + # If the function took a different number of args, reject. + if len(function.args) != len(self.args): + raise RuntimeError('Redeclaration of a function with different number ' + 'of args.') + + # Set names for all arguments and add them to the variables symbol table. + for arg, arg_name in zip(function.args, self.args): + arg.name = arg_name + # Add arguments to variable symbol table. + g_named_values[arg_name] = arg + + return function + + # This class represents a function definition itself. + class FunctionNode(object): + + def **init**\ (self, prototype, body): self.prototype = prototype + self.body = body + + def CodeGen(self): # Clear scope. g_named_values.clear() + + :: + + # Create a function object. + function = self.prototype.CodeGen() + + # If this is a binary operator, install its precedence. + if self.prototype.IsBinaryOp(): + operator = self.prototype.GetOperatorName() + g_binop_precedence[operator] = self.prototype.precedence + + # Create a new basic block to start insertion into. + block = function.append_basic_block('entry') + global g_llvm_builder + g_llvm_builder = Builder.new(block) + + # Finish off the function. + try: + return_value = self.body.CodeGen() + g_llvm_builder.ret(return_value) + + # Validate the generated code, checking for consistency. + function.verify() + + # Optimize the function. + g_llvm_pass_manager.run(function) + except: + function.delete() + if self.prototype.IsBinaryOp(): + del g_binop_precedence[self.prototype.GetOperatorName()] + raise + + return function + + Parser + ------ + + class Parser(object): + + def **init**\ (self, tokens): self.tokens = tokens self.Next() + + # Provide a simple token buffer. Parser.current is the current token the + # parser is looking at. Parser.Next() reads another token from the lexer + and # updates Parser.current with its results. def Next(self): + self.current = self.tokens.next() + + # Gets the precedence of the current token, or -1 if the token is not a + binary # operator. def GetCurrentTokenPrecedence(self): if + isinstance(self.current, CharacterToken): return + g_binop_precedence.get(self.current.char, -1) else: return -1 + + # identifierexpr ::= identifier \| identifier '(' expression\* ')' def + ParseIdentifierExpr(self): identifier_name = self.current.name + self.Next() # eat identifier. + + :: + + if self.current != CharacterToken('('): # Simple variable reference. + return VariableExpressionNode(identifier_name) + + # Call. + self.Next() # eat '('. + args = [] + if self.current != CharacterToken(')'): + while True: + args.append(self.ParseExpression()) + if self.current == CharacterToken(')'): + break + elif self.current != CharacterToken(','): + raise RuntimeError('Expected ")" or "," in argument list.') + self.Next() + + self.Next() # eat ')'. + return CallExpressionNode(identifier_name, args) + + # numberexpr ::= number def ParseNumberExpr(self): result = + NumberExpressionNode(self.current.value) self.Next() # consume the + number. return result + + # parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() + # eat '('. + + :: + + contents = self.ParseExpression() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")".') + self.Next() # eat ')'. + + return contents + + # ifexpr ::= 'if' expression 'then' expression 'else' expression def + ParseIfExpr(self): self.Next() # eat the if. + + :: + + # condition. + condition = self.ParseExpression() + + if not isinstance(self.current, ThenToken): + raise RuntimeError('Expected "then".') + self.Next() # eat the then. + + then_branch = self.ParseExpression() + + if not isinstance(self.current, ElseToken): + raise RuntimeError('Expected "else".') + self.Next() # eat the else. + + else_branch = self.ParseExpression() + + return IfExpressionNode(condition, then_branch, else_branch) + + # forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' + expression def ParseForExpr(self): self.Next() # eat the for. + + :: + + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after for.') + + loop_variable = self.current.name + self.Next() # eat the identifier. + + if self.current != CharacterToken('='): + raise RuntimeError('Expected "=" after for variable.') + self.Next() # eat the '='. + + start = self.ParseExpression() + + if self.current != CharacterToken(','): + raise RuntimeError('Expected "," after for start value.') + self.Next() # eat the ','. + + end = self.ParseExpression() + + # The step value is optional. + if self.current == CharacterToken(','): + self.Next() # eat the ','. + step = self.ParseExpression() + else: + step = None + + if not isinstance(self.current, InToken): + raise RuntimeError('Expected "in" after for variable specification.') + self.Next() # eat 'in'. + + body = self.ParseExpression() + + return ForExpressionNode(loop_variable, start, end, step, body) + + # primary ::= identifierexpr \| numberexpr \| parenexpr \| ifexpr \| + forexpr def ParsePrimary(self): if isinstance(self.current, + IdentifierToken): return self.ParseIdentifierExpr() elif + isinstance(self.current, NumberToken): return self.ParseNumberExpr() + elif isinstance(self.current, IfToken): return self.ParseIfExpr() elif + isinstance(self.current, ForToken): return self.ParseForExpr() elif + self.current == CharacterToken('('): return self.ParseParenExpr() else: + raise RuntimeError('Unknown token when expecting an expression.') + + # unary ::= primary \| unary_operator unary def ParseUnary(self): # If + the current token is not an operator, it must be a primary expression. + if (not isinstance(self.current, CharacterToken) or self.current in + [CharacterToken('('), CharacterToken(',')]): return self.ParsePrimary() + + :: + + # If this is a unary operator, read it. + operator = self.current.char + self.Next() # eat the operator. + return UnaryExpressionNode(operator, self.ParseUnary()) + + # binoprhs ::= (binary_operator unary)\* def ParseBinOpRHS(self, left, + left_precedence): # If this is a binary operator, find its precedence. + while True: precedence = self.GetCurrentTokenPrecedence() + + :: + + # If this is a binary operator that binds at least as tightly as the + # current one, consume it; otherwise we are done. + if precedence < left_precedence: + return left + + binary_operator = self.current.char + self.Next() # eat the operator. + + # Parse the unary expression after the binary operator. + right = self.ParseUnary() + + # If binary_operator binds less tightly with right than the operator after + # right, let the pending operator take right as its left. + next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: + right = self.ParseBinOpRHS(right, precedence + 1) + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right) + + # expression ::= unary binoprhs def ParseExpression(self): left = + self.ParseUnary() return self.ParseBinOpRHS(left, 0) + + # prototype # ::= id '(' id\* ')' # ::= binary LETTER number? (id, id) # + ::= unary LETTER (id) def ParsePrototype(self): precedence = None if + isinstance(self.current, IdentifierToken): kind = 'normal' + function_name = self.current.name self.Next() # eat function name. elif + isinstance(self.current, UnaryToken): kind = 'unary' self.Next() # eat + 'unary'. if not isinstance(self.current, CharacterToken): raise + RuntimeError('Expected an operator after "unary".') function_name = + 'unary' + self.current.char self.Next() # eat the operator. elif + isinstance(self.current, BinaryToken): kind = 'binary' self.Next() # eat + 'binary'. if not isinstance(self.current, CharacterToken): raise + RuntimeError('Expected an operator after "binary".') function_name = + 'binary' + self.current.char self.Next() # eat the operator. if + isinstance(self.current, NumberToken): if not 1 <= self.current.value <= + 100: raise RuntimeError('Invalid precedence: must be in range [1, + 100].') precedence = self.current.value self.Next() # eat the + precedence. else: raise RuntimeError('Expected function name, "unary" or + "binary" in ' 'prototype.') + + :: + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + if kind == 'unary' and len(arg_names) != 1: + raise RuntimeError('Invalid number of arguments for a unary operator.') + elif kind == 'binary' and len(arg_names) != 2: + raise RuntimeError('Invalid number of arguments for a binary operator.') + + return PrototypeNode(function_name, arg_names, kind != 'normal', precedence) + + # definition ::= 'def' prototype expression def ParseDefinition(self): + self.Next() # eat def. proto = self.ParsePrototype() body = + self.ParseExpression() return FunctionNode(proto, body) + + # toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = + PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) + + # external ::= 'extern' prototype def ParseExtern(self): self.Next() # + eat extern. return self.ParsePrototype() + + # Top-Level parsing def HandleDefinition(self): + self.Handle(self.ParseDefinition, 'Read a function definition:') + + def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') + + def HandleTopLevelExpression(self): try: function = + self.ParseTopLevelExpr().CodeGen() result = + g_llvm_executor.run_function(function, []) print 'Evaluated to:', + result.as_real(Type.double()) except Exception, e: print 'Error:', e + try: self.Next() # Skip for error recovery. except: pass + + def Handle(self, function, message): try: print message, + function().CodeGen() except Exception, e: print 'Error:', e try: + self.Next() # Skip for error recovery. except: pass + + Main driver code. + ----------------- + + def main(): # Set up the optimizer pipeline. Start with registering info + about how the # target lays out data structures. + g_llvm_pass_manager.add(g_llvm_executor.target_data) # Do simple + "peephole" optimizations and bit-twiddling optzns. + g_llvm_pass_manager.add(PASS_INSTRUCTION_COMBINING) # Reassociate + expressions. g_llvm_pass_manager.add(PASS_REASSOCIATE) # Eliminate + Common SubExpressions. g_llvm_pass_manager.add(PASS_GVN) # Simplify + the control flow graph (deleting unreachable blocks, etc). + g_llvm_pass_manager.add(PASS_CFG_SIMPLIFICATION) + + g_llvm_pass_manager.initialize() + + # Install standard binary operators. # 1 is lowest possible precedence. + 40 is the highest. g_binop_precedence['<'] = 10 + g_binop_precedence['+'] = 20 g_binop_precedence['-'] = 20 + g_binop_precedence['\*'] = 40 + + # Run the main "interpreter loop". while True: print 'ready>', try: raw + = raw_input() except KeyboardInterrupt: break + + :: + + parser = Parser(Tokenize(raw)) + while True: + # top ::= definition | external | expression | EOF + if isinstance(parser.current, EOFToken): + break + if isinstance(parser.current, DefToken): + parser.HandleDefinition() + elif isinstance(parser.current, ExternToken): + parser.HandleExtern() + else: + parser.HandleTopLevelExpression() + + # Print out all of the generated code. print '', g_llvm_module + + if **name** == '**main**\ ': main() diff --git a/docs/source/doc/kaleidoscope/PythonLangImpl7.rst b/docs/source/doc/kaleidoscope/PythonLangImpl7.rst index cc44969..576b5b4 100644 --- a/docs/source/doc/kaleidoscope/PythonLangImpl7.rst +++ b/docs/source/doc/kaleidoscope/PythonLangImpl7.rst @@ -37,20 +37,30 @@ Why is this a hard problem? # {#why} To understand why mutable variables cause complexities in SSA construction, consider this extremely simple C example: -{% highlight python %} int G, H; int test(\_Bool Condition) { int X; if -(Condition) X = G; else X = H; return X; } {% endhighlight %} + +.. code-block:: python + + int G, H; int test(_Bool Condition) { int X; if + (Condition) X = G; else X = H; return X; } + + In this case, we have the variable "X", whose value depends on the path executed in the program. Because there are two different possible values for X before the return instruction, a PHI node is inserted to merge the two values. The LLVM IR that we want for this example looks like this: -{% highlight llvm %} @G = weak global i32 0 ; type of @G is i32\* @H = -weak global i32 0 ; type of @H is i32\* define i32 @test(i1 %Condition) -{ entry: br i1 %Condition, label %cond\_true, label %cond\_false -cond\_true: %X.0 = load i32\* @G br label %cond\_next cond\_false: %X.1 -= load i32\* @H br label %cond\_next cond\_next: %X.2 = phi i32 [ %X.1, -%cond\_false ], [ %X.0, %cond\_true ] ret i32 %X.2 } {% endhighlight %} + +.. code-block:: llvm + + @G = weak global i32 0 ; type of @G is i32\* @H = + weak global i32 0 ; type of @H is i32\* define i32 @test(i1 %Condition) + { entry: br i1 %Condition, label %cond_true, label %cond_false + cond_true: %X.0 = load i32\* @G br label %cond_next cond_false: %X.1 + = load i32\* @H br label %cond_next cond_next: %X.2 = phi i32 [ %X.1, + %cond_false ], [ %X.0, %cond_true ] ret i32 %X.2 } + + In this example, the loads from the G and H global variables are explicit in the LLVM IR, and they live in the then/else branches of the @@ -98,10 +108,15 @@ work the same way, except that instead of being declared with global variable definitions, they are declared with the `LLVM alloca instruction `_: -{% highlight python %} define i32 @example() { entry: %X = alloca i32 ; -type of %X is i32\ *. ... %tmp = load i32* %X ; load the stack value %X -from the stack. %tmp2 = add i32 %tmp, 1 ; increment it store i32 %tmp2, -i32\* %X ; store it back ... {% endhighlight %} + +.. code-block:: python + + define i32 @example() { entry: %X = alloca i32 ; + type of %X is i32\ *. ... %tmp = load i32* %X ; load the stack value %X + from the stack. %tmp2 = add i32 %tmp, 1 ; increment it store i32 %tmp2, + i32\* %X ; store it back ... + + This code shows an example of how you can declare and manipulate a stack variable in the LLVM IR. Stack memory allocated with the alloca @@ -110,68 +125,78 @@ to functions, you can store it in other variables, etc. In our example above, we could rewrite the example to use the alloca technique to avoid using a PHI node: -{% highlight llvm %} @G = weak global i32 0 ; type of @G is i32\* @H = -weak global i32 0 ; type of @H is i32\* define i32 @test(i1 %Condition) -{ entry: %X = alloca i32 ; type of %X is i32\ *. br i1 %Condition, label -%cond\_true, label %cond\_false cond\_true: %X.0 = load i32* @G store -i32 %X.0, i32\* %X ; Update X br label %cond\_next cond\_false: %X.1 = -load i32\* @H store i32 %X.1, i32\* %X ; Update X br label %cond\_next -cond\_next: %X.2 = load i32\* %X ; Read X ret i32 %X.2 } {% endhighlight -%} -With this, we have discovered a way to handle arbitrary mutable -variables without the need to create Phi nodes at all: - -.. raw:: html +.. code-block:: llvm + @G = weak global i32 0 ; type of @G is i32\* @H = + weak global i32 0 ; type of @H is i32\* define i32 @test(i1 %Condition) + { entry: %X = alloca i32 ; type of %X is i32\ *. br i1 %Condition, label + %cond_true, label %cond_false cond_true: %X.0 = load i32* @G store + i32 %X.0, i32\* %X ; Update X br label %cond_next cond_false: %X.1 = + load i32\* @H store i32 %X.1, i32\* %X ; Update X br label %cond_next + cond_next: %X.2 = load i32\* %X ; Read X ret i32 %X.2 } {% endhighlight + %} + + With this, we have discovered a way to handle arbitrary mutable + variables without the need to create Phi nodes at all: + + .. raw:: html +
  1. - -Each mutable variable becomes a stack allocation. - -.. raw:: html - + + Each mutable variable becomes a stack allocation. + + .. raw:: html +
  2. - -Each read of the variable becomes a load from the stack. - -.. raw:: html - + + Each read of the variable becomes a load from the stack. + + .. raw:: html +
  3. - -Each update of the variable becomes a store to the stack. - -.. raw:: html - + + Each update of the variable becomes a store to the stack. + + .. raw:: html +
  4. - -Taking the address of a variable just uses the stack address directly. - -.. raw:: html - + + Taking the address of a variable just uses the stack address directly. + + .. raw:: html +
+ + While this solution has solved our immediate problem, it introduced + another one: we have now apparently introduced a lot of stack traffic + for very simple and common operations, a major performance problem. + Fortunately for us, the LLVM optimizer has a highly-tuned optimization + pass named "mem2reg" that handles this case, promoting allocas like this + into SSA registers, inserting Phi nodes as appropriate. If you run this + example through the pass, for example, you'll get: + + {% highlight bash %} $ llvm-as < example.ll \| opt -mem2reg \| llvm-dis + -While this solution has solved our immediate problem, it introduced -another one: we have now apparently introduced a lot of stack traffic -for very simple and common operations, a major performance problem. -Fortunately for us, the LLVM optimizer has a highly-tuned optimization -pass named "mem2reg" that handles this case, promoting allocas like this -into SSA registers, inserting Phi nodes as appropriate. If you run this -example through the pass, for example, you'll get: -{% highlight bash %} $ llvm-as < example.ll \| opt -mem2reg \| llvm-dis -{% endhighlight %} -{% highlight llvm %} @G = weak global i32 0 @H = weak global i32 0 -define i32 @test(i1 %Condition) { entry: br i1 %Condition, label -%cond\_true, label %cond\_false cond\_true: %X.0 = load i32\* @G br -label %cond\_next cond\_false: %X.1 = load i32\* @H br label %cond\_next -cond\_next: %X.01 = phi i32 [ %X.1, %cond\_false ], [ %X.0, %cond\_true -] ret i32 %X.01 } {% endhighlight %} + +.. code-block:: llvm + + @G = weak global i32 0 @H = weak global i32 0 + define i32 @test(i1 %Condition) { entry: br i1 %Condition, label + %cond_true, label %cond_false cond_true: %X.0 = load i32\* @G br + label %cond_next cond_false: %X.1 = load i32\* @H br label %cond_next + cond_next: %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true + ] ret i32 %X.01 } + + The mem2reg pass implements the standard "iterated dominance frontier" algorithm for constructing SSA form and has a number of optimizations @@ -249,25 +274,24 @@ redefining those only goes so far :). Also, the ability to define new variables is a useful thing regardless of whether you will be mutating them. Here's a motivating example that shows how we could use these: -{% highlight python %} # Define ':' for sequencing: as a low-precedence -operator that ignores operands # and just returns the RHS. def binary : -1 (x y) y; -Recursive fib, we could do this before. -======================================= +.. code-block:: python -def fib(x) if (x < 3) then 1 else fib(x-1) + fib(x-2) + # Define ':' for sequencing: as a low-precedence + operator that ignores operands # and just returns the RHS. def binary : + 1 (x y) y; + + # Recursive fib, we could do this before. + def fib(x) if (x < 3) then 1 else fib(x-1) + fib(x-2) + + # Iterative fib. + def fibi(x) var a = 1, b = 1, c in (for i = 3, i < x in c = a + b : a = + b : b = c) : b + + # Call it. + fibi(10) -Iterative fib. -============== -def fibi(x) var a = 1, b = 1, c in (for i = 3, i < x in c = a + b : a = -b : b = c) : b - -Call it. -======== - -fibi(10) {% endhighlight %} In order to mutate variables, we have to change our existing variables to use the "alloca trick". Once we have that, we'll add our new @@ -298,12 +322,17 @@ allocas that we will store in ``g_named_values``. We'll use a helper function that ensures that the allocas are created in the entry block of the function: -{% highlight python %} # Creates an alloca instruction in the entry -block of the function. This is used # for mutable variables. def -CreateEntryBlockAlloca(function, var\_name): entry = -function.get\_entry\_basic\_block() builder = Builder.new(entry) -builder.position\_at\_beginning(entry) return -builder.alloca(Type.double(), var\_name) {% endhighlight %} + +.. code-block:: python + + # Creates an alloca instruction in the entry + block of the function. This is used # for mutable variables. def + CreateEntryBlockAlloca(function, var_name): entry = + function.get_entry_basic_block() builder = Builder.new(entry) + builder.position_at_beginning(entry) return + builder.alloca(Type.double(), var_name) + + This code creates a temporary ``llvm.core.Builder`` that is pointing at the first instruction of the entry block. It then creates an alloca with @@ -315,46 +344,51 @@ variable references. In our new scheme, variables live on the stack, so code generating a reference to them actually needs to produce a load from the stack slot: -{% highlight python %} def CodeGen(self): if self.name in -g\_named\_values: return -g\_llvm\_builder.load(g\_named\_values[self.name], self.name) else: -raise RuntimeError('Unknown variable name: ' + self.name) {% -endhighlight %} -As you can see, this is pretty straightforward. Now we need to update -the things that define the variables to set up the alloca. We'll start -with ``ForExpressionNode.CodeGen`` (see the `full code listing <#code>`_ -for the unabridged code): +.. code-block:: python -{% highlight python %} def CodeGen(self): function = -g\_llvm\_builder.basic\_block.function + def CodeGen(self): if self.name in + g_named_values: return + g_llvm_builder.load(g_named_values[self.name], self.name) else: + raise RuntimeError('Unknown variable name: ' + self.name) {% + endhighlight %} + + As you can see, this is pretty straightforward. Now we need to update + the things that define the variables to set up the alloca. We'll start + with ``ForExpressionNode.CodeGen`` (see the `full code listing <#code>`_ + for the unabridged code): + + {% highlight python %} def CodeGen(self): function = + g_llvm_builder.basic_block.function + + :: + + # Create an alloca for the variable in the entry block. + alloca = CreateEntryBlockAlloca(function, self.loop_variable) + + # Emit the start code first, without 'variable' in scope. + start_value = self.start.CodeGen() + + # Store the value into the alloca. + g_llvm_builder.store(start_value, alloca) + ... + # Compute the end condition. + end_condition = self.end.CodeGen() + + # Reload, increment, and restore the alloca. This handles the case where + # the body of the loop mutates the variable. + cur_value = g_llvm_builder.load(alloca, self.loop_variable) + next_value = g_llvm_builder.fadd(cur_value, step_value, 'nextvar') + g_llvm_builder.store(next_value, alloca) + + # Convert condition to a bool by comparing equal to 0.0. + end_condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, end_condition, Constant.real(Type.double(), 0), 'loopcond') + ... + + -:: - # Create an alloca for the variable in the entry block. - alloca = CreateEntryBlockAlloca(function, self.loop_variable) - - # Emit the start code first, without 'variable' in scope. - start_value = self.start.CodeGen() - - # Store the value into the alloca. - g_llvm_builder.store(start_value, alloca) - ... - # Compute the end condition. - end_condition = self.end.CodeGen() - - # Reload, increment, and restore the alloca. This handles the case where - # the body of the loop mutates the variable. - cur_value = g_llvm_builder.load(alloca, self.loop_variable) - next_value = g_llvm_builder.fadd(cur_value, step_value, 'nextvar') - g_llvm_builder.store(next_value, alloca) - - # Convert condition to a bool by comparing equal to 0.0. - end_condition_bool = g_llvm_builder.fcmp( - FCMP_ONE, end_condition, Constant.real(Type.double(), 0), 'loopcond') - ... - -{% endhighlight %} This code is virtually identical to the code `before we allowed mutable variables `_. The big difference is @@ -364,12 +398,17 @@ access the variable as needed. To support mutable argument variables, we need to also make allocas for them. The code for this is also pretty simple: -{% highlight python %} class PrototypeNode(object): ... # Create an -alloca for each argument and register the argument in the symbol # table -so that references to it will succeed. def CreateArgumentAllocas(self, -function): for arg\_name, arg in zip(self.args, function.args): alloca = -CreateEntryBlockAlloca(function, arg\_name) g\_llvm\_builder.store(arg, -alloca) g\_named\_values[arg\_name] = alloca {% endhighlight %} + +.. code-block:: python + + class PrototypeNode(object): ... # Create an + alloca for each argument and register the argument in the symbol # table + so that references to it will succeed. def CreateArgumentAllocas(self, + function): for arg_name, arg in zip(self.args, function.args): alloca = + CreateEntryBlockAlloca(function, arg_name) g_llvm_builder.store(arg, + alloca) g_named_values[arg_name] = alloca + + For each argument, we make an alloca, store the input value to the function into the alloca, and register the alloca as the memory location @@ -379,56 +418,61 @@ right after it sets up the entry block for the function. The final missing piece is adding the mem2reg pass, which allows us to get good codegen once again: -{% highlight python %} from llvm.passes import -(PASS\_PROMOTE\_MEMORY\_TO\_REGISTER, PASS\_INSTRUCTION\_COMBINING, -PASS\_REASSOCIATE, PASS\_GVN, PASS\_CFG\_SIMPLIFICATION) ... def main(): -# Set up the optimizer pipeline. Start with registering info about how -the # target lays out data structures. -g\_llvm\_pass\_manager.add(g\_llvm\_executor.target\_data) # Promote -allocas to registers. -g\_llvm\_pass\_manager.add(PASS\_PROMOTE\_MEMORY\_TO\_REGISTER) # Do -simple "peephole" optimizations and bit-twiddling optzns. -g\_llvm\_pass\_manager.add(PASS\_INSTRUCTION\_COMBINING) # Reassociate -expressions. g\_llvm\_pass\_manager.add(PASS\_REASSOCIATE) {% -endhighlight %} -It is interesting to see what the code looks like before and after the -mem2reg optimization runs. For example, this is the before/after code -for our recursive fib function. Before the optimization: +.. code-block:: python -{% highlight llvm %} define double @fib(double %x) { entry: %x1 = alloca -double store double %x, double\* %x1 %x2 = load double\* %x1 %cmptmp = -fcmp ult double %x2, 3.000000e+00 %booltmp = uitofp i1 %cmptmp to double -%ifcond = fcmp one double %booltmp, 0.000000e+00 br i1 %ifcond, label -%then, label %else then: ; preds = %entry br label %ifcont else: ; preds -= %entry %x3 = load double\* %x1 %subtmp = fsub double %x3, 1.000000e+00 -%calltmp = call double @fib(double %subtmp) %x4 = load double\* %x1 -%subtmp5 = fsub double %x4, 2.000000e+00 %calltmp6 = call double -@fib(double %subtmp5) %addtmp = fadd double %calltmp, %calltmp6 br label -%ifcont ifcont: ; preds = %else, %then %iftmp = phi double [ -1.000000e+00, %then ], [ %addtmp, %else ] ret double %iftmp } {% -endhighlight %} + from llvm.passes import + (PASS_PROMOTE_MEMORY_TO_REGISTER, PASS_INSTRUCTION_COMBINING, + PASS_REASSOCIATE, PASS_GVN, PASS_CFG_SIMPLIFICATION) ... def main(): + # Set up the optimizer pipeline. Start with registering info about how + the # target lays out data structures. + g_llvm_pass_manager.add(g_llvm_executor.target_data) # Promote + allocas to registers. + g_llvm_pass_manager.add(PASS_PROMOTE_MEMORY_TO_REGISTER) # Do + simple "peephole" optimizations and bit-twiddling optzns. + g_llvm_pass_manager.add(PASS_INSTRUCTION_COMBINING) # Reassociate + expressions. g_llvm_pass_manager.add(PASS_REASSOCIATE) {% + endhighlight %} + + It is interesting to see what the code looks like before and after the + mem2reg optimization runs. For example, this is the before/after code + for our recursive fib function. Before the optimization: + + {% highlight llvm %} define double @fib(double %x) { entry: %x1 = alloca + double store double %x, double\* %x1 %x2 = load double\* %x1 %cmptmp = + fcmp ult double %x2, 3.000000e+00 %booltmp = uitofp i1 %cmptmp to double + %ifcond = fcmp one double %booltmp, 0.000000e+00 br i1 %ifcond, label + %then, label %else then: ; preds = %entry br label %ifcont else: ; preds + = %entry %x3 = load double\* %x1 %subtmp = fsub double %x3, 1.000000e+00 + %calltmp = call double @fib(double %subtmp) %x4 = load double\* %x1 + %subtmp5 = fsub double %x4, 2.000000e+00 %calltmp6 = call double + @fib(double %subtmp5) %addtmp = fadd double %calltmp, %calltmp6 br label + %ifcont ifcont: ; preds = %else, %then %iftmp = phi double [ + 1.000000e+00, %then ], [ %addtmp, %else ] ret double %iftmp } {% + endhighlight %} + + Here there is only one variable (x, the input argument) but you can + still see the extremely simple-minded code generation strategy we are + using. In the entry block, an alloca is created, and the initial input + value is stored into it. Each reference to the variable does a reload + from the stack. Also, note that we didn't modify the if/then/else + expression, so it still inserts a PHI node. While we could make an + alloca for it, it is actually easier to create a PHI node for it, so we + still just make the PHI. + + Here is the code after the mem2reg pass runs: + + {% highlight llvm %} define double @fib(double %x) { entry: %cmptmp = + fcmp ult double %x, 3.000000e+00 %booltmp = uitofp i1 %cmptmp to double + %ifcond = fcmp one double %booltmp, 0.000000e+00 br i1 %ifcond, label + %then, label %else then: br label %ifcont else: %subtmp = fsub double + %x, 1.000000e+00 %calltmp = call double @fib(double %subtmp) %subtmp5 = + fsub double %x, 2.000000e+00 %calltmp6 = call double @fib(double + %subtmp5) %addtmp = fadd double %calltmp, %calltmp6 br label %ifcont + ifcont: ; preds = %else, %then %iftmp = phi double [ 1.000000e+00, %then + ], [ %addtmp, %else ] ret double %iftmp } -Here there is only one variable (x, the input argument) but you can -still see the extremely simple-minded code generation strategy we are -using. In the entry block, an alloca is created, and the initial input -value is stored into it. Each reference to the variable does a reload -from the stack. Also, note that we didn't modify the if/then/else -expression, so it still inserts a PHI node. While we could make an -alloca for it, it is actually easier to create a PHI node for it, so we -still just make the PHI. -Here is the code after the mem2reg pass runs: - -{% highlight llvm %} define double @fib(double %x) { entry: %cmptmp = -fcmp ult double %x, 3.000000e+00 %booltmp = uitofp i1 %cmptmp to double -%ifcond = fcmp one double %booltmp, 0.000000e+00 br i1 %ifcond, label -%then, label %else then: br label %ifcont else: %subtmp = fsub double -%x, 1.000000e+00 %calltmp = call double @fib(double %subtmp) %subtmp5 = -fsub double %x, 2.000000e+00 %calltmp6 = call double @fib(double -%subtmp5) %addtmp = fadd double %calltmp, %calltmp6 br label %ifcont -ifcont: ; preds = %else, %then %iftmp = phi double [ 1.000000e+00, %then -], [ %addtmp, %else ] ret double %iftmp } {% endhighlight %} This is a trivial case for mem2reg, since there are no redefinitions of the variable. The point of showing this is to calm your tension about @@ -436,14 +480,19 @@ inserting such blatent inefficiencies :). After the rest of the optimizers run, we get: -{% highlight llvm %} define double @fib(double %x) { entry: %cmptmp = -fcmp ult double %x, 3.000000e+00 %booltmp = uitofp i1 %cmptmp to double -%ifcond = fcmp ueq double %booltmp, 0.000000e+00 br i1 %ifcond, label -%else, label %ifcont else: %subtmp = fsub double %x, 1.000000e+00 -%calltmp = call double @fib(double %subtmp) %subtmp5 = fsub double %x, -2.000000e+00 %calltmp6 = call double @fib(double %subtmp5) %addtmp = -fadd double %calltmp, %calltmp6 ret double %addtmp ifcont: ret double -1.000000e+00 } {% endhighlight %} + +.. code-block:: llvm + + define double @fib(double %x) { entry: %cmptmp = + fcmp ult double %x, 3.000000e+00 %booltmp = uitofp i1 %cmptmp to double + %ifcond = fcmp ueq double %booltmp, 0.000000e+00 br i1 %ifcond, label + %else, label %ifcont else: %subtmp = fsub double %x, 1.000000e+00 + %calltmp = call double @fib(double %subtmp) %subtmp5 = fsub double %x, + 2.000000e+00 %calltmp6 = call double @fib(double %subtmp5) %addtmp = + fadd double %calltmp, %calltmp6 ret double %addtmp ifcont: ret double + 1.000000e+00 } + + Here we see that the simplifycfg pass decided to clone the return instruction into the end of the 'else' block. This allowed it to @@ -462,45 +511,50 @@ simple. We will parse it just like any other binary operator, but handle it internally (instead of allowing the user to define it). The first step is to set a precedence: -{% highlight python %} def main(): ... # Install standard binary -operators. # 1 is lowest possible precedence. 40 is the highest. -g\_binop\_precedence['='] = 2 g\_binop\_precedence['<'] = 10 -g\_binop\_precedence['+'] = 20 g\_binop\_precedence['-'] = 20 {% -endhighlight %} -Now that the parser knows the precedence of the binary operator, it -takes care of all the parsing and AST generation. We just need to -implement codegen for the assignment operator. This looks like: +.. code-block:: python -{% highlight python %} class -BinaryOperatorExpressionNode(ExpressionNode): ... def CodeGen(self): # A -special case for '=' because we don't want to emit the LHS as an # -expression. if self.operator == '=': # Assignment requires the LHS to be -an identifier. if not isinstance(self.left, VariableExpressionNode): -raise RuntimeError('Destination of "=" must be a variable.') {% -endhighlight %} + def main(): ... # Install standard binary + operators. # 1 is lowest possible precedence. 40 is the highest. + g_binop_precedence['='] = 2 g_binop_precedence['<'] = 10 + g_binop_precedence['+'] = 20 g_binop_precedence['-'] = 20 {% + endhighlight %} + + Now that the parser knows the precedence of the binary operator, it + takes care of all the parsing and AST generation. We just need to + implement codegen for the assignment operator. This looks like: + + {% highlight python %} class + BinaryOperatorExpressionNode(ExpressionNode): ... def CodeGen(self): # A + special case for '=' because we don't want to emit the LHS as an # + expression. if self.operator == '=': # Assignment requires the LHS to be + an identifier. if not isinstance(self.left, VariableExpressionNode): + raise RuntimeError('Destination of "=" must be a variable.') {% + endhighlight %} + + Unlike the rest of the binary operators, our assignment operator doesn't + follow the "emit LHS, emit RHS, do computation" model. As such, it is + handled as a special case before the other binary operators are handled. + The other strange thing is that it requires the LHS to be a variable. It + is invalid to have ``(x+1) = expr`` -- only things like ``x = expr`` are + allowed. + + {% highlight python %} # Codegen the RHS. value = self.right.CodeGen() + + :: + + # Look up the name. + variable = g_named_values[self.left.name] + + # Store the value and return it. + g_llvm_builder.store(value, variable) + + return value + ... + + -Unlike the rest of the binary operators, our assignment operator doesn't -follow the "emit LHS, emit RHS, do computation" model. As such, it is -handled as a special case before the other binary operators are handled. -The other strange thing is that it requires the LHS to be a variable. It -is invalid to have ``(x+1) = expr`` -- only things like ``x = expr`` are -allowed. -{% highlight python %} # Codegen the RHS. value = self.right.CodeGen() - -:: - - # Look up the name. - variable = g_named_values[self.left.name] - - # Store the value and return it. - g_llvm_builder.store(value, variable) - - return value - ... - -{% endhighlight %} Once we have the variable, CodeGening the assignment is straightforward: we emit the RHS of the assignment, create a store, and return the @@ -510,19 +564,20 @@ computed value. Returning a value allows for chained assignments like Now that we have an assignment operator, we can mutate loop variables and arguments. For example, we can now run code like this: -{% highlight python %} # Function to print a double. extern printd(x) -Define ':' for sequencing: as a low-precedence operator that ignores operands -============================================================================= +.. code-block:: python -and just returns the RHS. -========================= + # Function to print a double. extern printd(x) + + # Define ':' for sequencing: as a low-precedence operator that ignores operands + # and just returns the RHS. + def binary : 1 (x y) y + + def test(x) printd(x) : x = 4 : printd(x) + + test(123) -def binary : 1 (x y) y -def test(x) printd(x) : x = 4 : printd(x) - -test(123) {% endhighlight %} When run, this example prints "123" and then "4", showing that we did actually mutate the value! Okay, we have now officially implemented our @@ -541,21 +596,31 @@ generator. The first step for adding our new 'var/in' construct is to extend the lexer. As before, this is pretty trivial, the code looks like this: -{% highlight python %} ... class UnaryToken(object): pass class -VarToken(object): pass ... def Tokenize(string): ... elif identifier == -'unary': yield UnaryToken() elif identifier == 'var': yield VarToken() -else: yield IdentifierToken(identifier) {% endhighlight %} + +.. code-block:: python + + ... class UnaryToken(object): pass class + VarToken(object): pass ... def Tokenize(string): ... elif identifier == + 'unary': yield UnaryToken() elif identifier == 'var': yield VarToken() + else: yield IdentifierToken(identifier) + + The next step is to define the AST node that we will construct. For var/in, it looks like this: -{% highlight python %} # Expression class for var/in. class -VarExpressionNode(ExpressionNode): -def **init**\ (self, variables, body): self.variables = variables -self.body = body +.. code-block:: python + + # Expression class for var/in. class + VarExpressionNode(ExpressionNode): + + def **init**\ (self, variables, body): self.variables = variables + self.body = body + + def CodeGen(self): ... + -def CodeGen(self): ... {% endhighlight %} var/in allows a list of names to be defined all at once, and each name can optionally have an initializer value. As such, we capture this @@ -565,105 +630,130 @@ allowed to access the variables defined by the var/in. With this in place, we can define the parser pieces. The first thing we do is add it as a primary expression: -{% highlight python %} # primary ::= # dentifierexpr \| numberexpr \| -parenexpr \| ifexpr \| forexpr \| varexpr def ParsePrimary(self): if -isinstance(self.current, IdentifierToken): return -self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): -return self.ParseNumberExpr() elif isinstance(self.current, IfToken): -return self.ParseIfExpr() elif isinstance(self.current, ForToken): -return self.ParseForExpr() elif isinstance(self.current, VarToken): -return self.ParseVarExpr() elif self.current == CharacterToken('('): -return self.ParseParenExpr() else: raise RuntimeError('Unknown token -when expecting an expression.') {% endhighlight %} + +.. code-block:: python + + # primary ::= # dentifierexpr \| numberexpr \| + parenexpr \| ifexpr \| forexpr \| varexpr def ParsePrimary(self): if + isinstance(self.current, IdentifierToken): return + self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): + return self.ParseNumberExpr() elif isinstance(self.current, IfToken): + return self.ParseIfExpr() elif isinstance(self.current, ForToken): + return self.ParseForExpr() elif isinstance(self.current, VarToken): + return self.ParseVarExpr() elif self.current == CharacterToken('('): + return self.ParseParenExpr() else: raise RuntimeError('Unknown token + when expecting an expression.') + + Next we define ParseVarExpr: -{% highlight python %} # varexpr ::= 'var' (identifier ('=' -expression)?)+ 'in' expression def ParseVarExpr(self): self.Next() # eat -'var'. -:: +.. code-block:: python - variables = {} + # varexpr ::= 'var' (identifier ('=' + expression)?)+ 'in' expression def ParseVarExpr(self): self.Next() # eat + 'var'. + + :: + + variables = {} + + # At least one variable name is required. + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after "var".') + + - # At least one variable name is required. - if not isinstance(self.current, IdentifierToken): - raise RuntimeError('Expected identifier after "var".') -{% endhighlight %} The first part of this code parses the list of identifier/expr pairs into the local ``variables`` list. -{% highlight python %} while True: var\_name = self.current.name -self.Next() # eat the identifier. -:: +.. code-block:: python - # Read the optional initializer. - if self.current == CharacterToken('='): - self.Next() # eat '='. - variables[var_name] = self.ParseExpression() - else: - variables[var_name] = None + while True: var_name = self.current.name + self.Next() # eat the identifier. + + :: + + # Read the optional initializer. + if self.current == CharacterToken('='): + self.Next() # eat '='. + variables[var_name] = self.ParseExpression() + else: + variables[var_name] = None + + # End of var list, exit loop. + if self.current != CharacterToken(','): + break + self.Next() # eat ','. + + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after "," in a var expression.') + + - # End of var list, exit loop. - if self.current != CharacterToken(','): - break - self.Next() # eat ','. - if not isinstance(self.current, IdentifierToken): - raise RuntimeError('Expected identifier after "," in a var expression.') - -{% endhighlight %} Once all the variables are parsed, we then parse the body and create the AST node: -{% highlight python %} # At this point, we have to have 'in'. if not -isinstance(self.current, InToken): raise RuntimeError('Expected "in" -keyword after "var".') self.Next() # eat 'in'. -:: +.. code-block:: python - body = self.ParseExpression() + # At this point, we have to have 'in'. if not + isinstance(self.current, InToken): raise RuntimeError('Expected "in" + keyword after "var".') self.Next() # eat 'in'. + + :: + + body = self.ParseExpression() + + return VarExpressionNode(variables, body) + + - return VarExpressionNode(variables, body) -{% endhighlight %} Now that we can parse and represent the code, we need to support emission of LLVM IR for it. This code starts out with: -{% highlight python %} class VarExpressionNode(ExpressionNode): ... def -CodeGen(self): old\_bindings = {} function = -g\_llvm\_builder.basic\_block.function -:: +.. code-block:: python - # Register all variables and emit their initializer. - for var_name, var_expression in self.variables.iteritems(): - # Emit the initializer before adding the variable to scope, this prevents - # the initializer from referencing the variable itself, and permits stuff - # like this: - # var a = 1 in - # var a = a in ... # refers to outer 'a'. - if var_expression is not None: - var_value = var_expression.CodeGen() - else: - var_value = Constant.real(Type.double(), 0) + class VarExpressionNode(ExpressionNode): ... def + CodeGen(self): old_bindings = {} function = + g_llvm_builder.basic_block.function + + :: + + # Register all variables and emit their initializer. + for var_name, var_expression in self.variables.iteritems(): + # Emit the initializer before adding the variable to scope, this prevents + # the initializer from referencing the variable itself, and permits stuff + # like this: + # var a = 1 in + # var a = a in ... # refers to outer 'a'. + if var_expression is not None: + var_value = var_expression.CodeGen() + else: + var_value = Constant.real(Type.double(), 0) + + alloca = CreateEntryBlockAlloca(function, var_name) + g_llvm_builder.store(var_value, alloca) + + # Remember the old variable binding so that we can restore the binding + # when we unrecurse. + old_bindings[var_name] = g_named_values.get(var_name, None) + + # Remember this binding. + g_named_values[var_name] = alloca + + - alloca = CreateEntryBlockAlloca(function, var_name) - g_llvm_builder.store(var_value, alloca) - # Remember the old variable binding so that we can restore the binding - # when we unrecurse. - old_bindings[var_name] = g_named_values.get(var_name, None) - - # Remember this binding. - g_named_values[var_name] = alloca - -{% endhighlight %} Basically it loops over all the variables, installing them one at a time. For each variable we put into the symbol table, we remember the @@ -674,22 +764,32 @@ the initializer, create the alloca, then update the symbol table to point to it. Once all the variables are installed in the symbol table, we evaluate the body of the var/in expression: -{% highlight python %} # Codegen the body, now that all vars are in -scope. body = self.body.CodeGen() {% endhighlight %} + +.. code-block:: python + + # Codegen the body, now that all vars are in + scope. body = self.body.CodeGen() + + Finally, before returning, we restore the previous variable bindings: -{% highlight python %} # Pop all our variables from scope. for var\_name -in self.variables: if old\_bindings[var\_name] is not None: -g\_named\_values[var\_name] = old\_bindings[var\_name] else: del -g\_named\_values[var\_name] -:: +.. code-block:: python + + # Pop all our variables from scope. for var_name + in self.variables: if old_bindings[var_name] is not None: + g_named_values[var_name] = old_bindings[var_name] else: del + g_named_values[var_name] + + :: + + # Return the body computation. + return body + + - # Return the body computation. - return body -{% endhighlight %} The end result of all of this is that we get properly scoped variable definitions, and we even (trivially) allow mutation of them :). @@ -708,854 +808,804 @@ Full Code Listing # {#code} Here is the complete code listing for our running example, enhanced with mutable variables and var/in support: -{% highlight python %} #!/usr/bin/env python -import re from llvm.core import Module, Constant, Type, Function, -Builder from llvm.ee import ExecutionEngine, TargetData from llvm.passes -import FunctionPassManager - -from llvm.core import FCMP\_ULT, FCMP\_ONE from llvm.passes import -(PASS\_PROMOTE\_MEMORY\_TO\_REGISTER, PASS\_INSTRUCTION\_COMBINING, -PASS\_REASSOCIATE, PASS\_GVN, PASS\_CFG\_SIMPLIFICATION) - -Globals -------- - -The LLVM module, which holds all the IR code. -============================================= - -g\_llvm\_module = Module.new('my cool jit') - -The LLVM instruction builder. Created whenever a new function is entered. -========================================================================= - -g\_llvm\_builder = None - -A dictionary that keeps track of which values are defined in the current scope -============================================================================== - -and what their LLVM representation is. -====================================== - -g\_named\_values = {} - -The function optimization passes manager. -========================================= - -g\_llvm\_pass\_manager = FunctionPassManager.new(g\_llvm\_module) - -The LLVM execution engine. -========================== - -g\_llvm\_executor = ExecutionEngine.new(g\_llvm\_module) - -The binary operator precedence chart. -===================================== - -g\_binop\_precedence = {} - -Creates an alloca instruction in the entry block of the function. This is used -============================================================================== - -for mutable variables. -====================== - -def CreateEntryBlockAlloca(function, var\_name): entry = -function.get\_entry\_basic\_block() builder = Builder.new(entry) -builder.position\_at\_beginning(entry) return -builder.alloca(Type.double(), var\_name) - -Lexer ------ - -The lexer yields one of these types for each token. -=================================================== - -class EOFToken(object): pass class DefToken(object): pass class -ExternToken(object): pass class IfToken(object): pass class -ThenToken(object): pass class ElseToken(object): pass class -ForToken(object): pass class InToken(object): pass class -BinaryToken(object): pass class UnaryToken(object): pass class -VarToken(object): pass - -class IdentifierToken(object): def **init**\ (self, name): self.name = -name - -class NumberToken(object): def **init**\ (self, value): self.value = -value - -class CharacterToken(object): def **init**\ (self, char): self.char = -char def **eq**\ (self, other): return isinstance(other, CharacterToken) -and self.char == other.char def **ne**\ (self, other): return not self -== other - -Regular expressions that tokens and comments of our language. -============================================================= - -REGEX\_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX\_IDENTIFIER = -re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX\_COMMENT = re.compile('#.*') - -def Tokenize(string): while string: # Skip whitespace. if -string[0].isspace(): string = string[1:] continue - -:: - - # Run regexes. - comment_match = REGEX_COMMENT.match(string) - number_match = REGEX_NUMBER.match(string) - identifier_match = REGEX_IDENTIFIER.match(string) - - # Check if any of the regexes matched and yield the appropriate result. - if comment_match: - comment = comment_match.group(0) - string = string[len(comment):] - elif number_match: - number = number_match.group(0) - yield NumberToken(float(number)) - string = string[len(number):] - elif identifier_match: - identifier = identifier_match.group(0) - # Check if we matched a keyword. - if identifier == 'def': - yield DefToken() - elif identifier == 'extern': - yield ExternToken() - elif identifier == 'if': - yield IfToken() - elif identifier == 'then': - yield ThenToken() - elif identifier == 'else': - yield ElseToken() - elif identifier == 'for': - yield ForToken() - elif identifier == 'in': - yield InToken() - elif identifier == 'binary': - yield BinaryToken() - elif identifier == 'unary': - yield UnaryToken() - elif identifier == 'var': - yield VarToken() - else: - yield IdentifierToken(identifier) - string = string[len(identifier):] - else: - # Yield the ASCII value of the unknown character. - yield CharacterToken(string[0]) - string = string[1:] - -yield EOFToken() - -Abstract Syntax Tree (aka Parse Tree) -------------------------------------- - -Base class for all expression nodes. -==================================== - -class ExpressionNode(object): pass - -Expression class for numeric literals like "1.0". -================================================= - -class NumberExpressionNode(ExpressionNode): - -def **init**\ (self, value): self.value = value - -def CodeGen(self): return Constant.real(Type.double(), self.value) - -Expression class for referencing a variable, like "a". -====================================================== - -class VariableExpressionNode(ExpressionNode): - -def **init**\ (self, name): self.name = name - -def CodeGen(self): if self.name in g\_named\_values: return -g\_llvm\_builder.load(g\_named\_values[self.name], self.name) else: -raise RuntimeError('Unknown variable name: ' + self.name) - -Expression class for a binary operator. -======================================= - -class BinaryOperatorExpressionNode(ExpressionNode): - -def **init**\ (self, operator, left, right): self.operator = operator -self.left = left self.right = right - -def CodeGen(self): # A special case for '=' because we don't want to -emit the LHS as an # expression. if self.operator == '=': # Assignment -requires the LHS to be an identifier. if not isinstance(self.left, -VariableExpressionNode): raise RuntimeError('Destination of "=" must be -a variable.') - -:: - - # Codegen the RHS. - value = self.right.CodeGen() - - # Look up the name. - variable = g_named_values[self.left.name] - - # Store the value and return it. - g_llvm_builder.store(value, variable) - - return value - - left = self.left.CodeGen() - right = self.right.CodeGen() - - if self.operator == '+': - return g_llvm_builder.fadd(left, right, 'addtmp') - elif self.operator == '-': - return g_llvm_builder.fsub(left, right, 'subtmp') - elif self.operator == '*': - return g_llvm_builder.fmul(left, right, 'multmp') - elif self.operator == '<': - result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') - # Convert bool 0 or 1 to double 0.0 or 1.0. - return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') - else: - function = g_llvm_module.get_function_named('binary' + self.operator) - return g_llvm_builder.call(function, [left, right], 'binop') - -Expression class for function calls. -==================================== - -class CallExpressionNode(ExpressionNode): - -def **init**\ (self, callee, args): self.callee = callee self.args = -args - -def CodeGen(self): # Look up the name in the global module table. callee -= g\_llvm\_module.get\_function\_named(self.callee) - -:: - - # Check for argument mismatch error. - if len(callee.args) != len(self.args): - raise RuntimeError('Incorrect number of arguments passed.') - - arg_values = [i.CodeGen() for i in self.args] - - return g_llvm_builder.call(callee, arg_values, 'calltmp') - -Expression class for if/then/else. -================================== - -class IfExpressionNode(ExpressionNode): - -def **init**\ (self, condition, then\_branch, else\_branch): -self.condition = condition self.then\_branch = then\_branch -self.else\_branch = else\_branch - -def CodeGen(self): condition = self.condition.CodeGen() - -:: - - # Convert condition to a bool by comparing equal to 0.0. - condition_bool = g_llvm_builder.fcmp( - FCMP_ONE, condition, Constant.real(Type.double(), 0), 'ifcond') - - function = g_llvm_builder.basic_block.function - - # Create blocks for the then and else cases. Insert the 'then' block at the - # end of the function. - then_block = function.append_basic_block('then') - else_block = function.append_basic_block('else') - merge_block = function.append_basic_block('ifcond') - - g_llvm_builder.cbranch(condition_bool, then_block, else_block) - - # Emit then value. - g_llvm_builder.position_at_end(then_block) - then_value = self.then_branch.CodeGen() - g_llvm_builder.branch(merge_block) - - # Codegen of 'Then' can change the current block; update then_block for the - # PHI node. - then_block = g_llvm_builder.basic_block - - # Emit else block. - g_llvm_builder.position_at_end(else_block) - else_value = self.else_branch.CodeGen() - g_llvm_builder.branch(merge_block) - - # Codegen of 'Else' can change the current block, update else_block for the - # PHI node. - else_block = g_llvm_builder.basic_block - - # Emit merge block. - g_llvm_builder.position_at_end(merge_block) - phi = g_llvm_builder.phi(Type.double(), 'iftmp') - phi.add_incoming(then_value, then_block) - phi.add_incoming(else_value, else_block) - - return phi - -Expression class for for/in. -============================ - -class ForExpressionNode(ExpressionNode): - -def **init**\ (self, loop\_variable, start, end, step, body): -self.loop\_variable = loop\_variable self.start = start self.end = end -self.step = step self.body = body - -def CodeGen(self): # Output this as: # var = alloca double # ... # start -= startexpr # store start -> var # goto loop # loop: # ... # bodyexpr # -... # loopend: # step = stepexpr # endcond = endexpr # # curvar = load -var # nextvar = curvar + step # store nextvar -> var # br endcond, loop, -endloop # outloop: - -:: - - function = g_llvm_builder.basic_block.function - - # Create an alloca for the variable in the entry block. - alloca = CreateEntryBlockAlloca(function, self.loop_variable) - - # Emit the start code first, without 'variable' in scope. - start_value = self.start.CodeGen() - - # Store the value into the alloca. - g_llvm_builder.store(start_value, alloca) - - # Make the new basic block for the loop, inserting after current block. - loop_block = function.append_basic_block('loop') - - # Insert an explicit fall through from the current block to the loop_block. - g_llvm_builder.branch(loop_block) - - # Start insertion in loop_block. - g_llvm_builder.position_at_end(loop_block) - - # Within the loop, the variable is defined equal to the alloca. If it - # shadows an existing variable, we have to restore it, so save it now. - old_value = g_named_values.get(self.loop_variable, None) - g_named_values[self.loop_variable] = alloca - - # Emit the body of the loop. This, like any other expr, can change the - # current BB. Note that we ignore the value computed by the body. - self.body.CodeGen() - - # Emit the step value. - if self.step: - step_value = self.step.CodeGen() - else: - # If not specified, use 1.0. - step_value = Constant.real(Type.double(), 1) - - # Compute the end condition. - end_condition = self.end.CodeGen() - - # Reload, increment, and restore the alloca. This handles the case where - # the body of the loop mutates the variable. - cur_value = g_llvm_builder.load(alloca, self.loop_variable) - next_value = g_llvm_builder.fadd(cur_value, step_value, 'nextvar') - g_llvm_builder.store(next_value, alloca) - - # Convert condition to a bool by comparing equal to 0.0. - end_condition_bool = g_llvm_builder.fcmp( - FCMP_ONE, end_condition, Constant.real(Type.double(), 0), 'loopcond') - - # Create the "after loop" block and insert it. - after_block = function.append_basic_block('afterloop') - - # Insert the conditional branch into the end of loop_block. - g_llvm_builder.cbranch(end_condition_bool, loop_block, after_block) - - # Any new code will be inserted in after_block. - g_llvm_builder.position_at_end(after_block) - - # Restore the unshadowed variable. - if old_value is not None: - g_named_values[self.loop_variable] = old_value - else: - del g_named_values[self.loop_variable] - - # for expr always returns 0.0. - return Constant.real(Type.double(), 0) - -Expression class for a unary operator. -====================================== - -class UnaryExpressionNode(ExpressionNode): - -def **init**\ (self, operator, operand): self.operator = operator -self.operand = operand - -def CodeGen(self): operand = self.operand.CodeGen() function = -g\_llvm\_module.get\_function\_named('unary' + self.operator) return -g\_llvm\_builder.call(function, [operand], 'unop') - -Expression class for var/in. -============================ - -class VarExpressionNode(ExpressionNode): - -def **init**\ (self, variables, body): self.variables = variables -self.body = body - -def CodeGen(self): old\_bindings = {} function = -g\_llvm\_builder.basic\_block.function - -:: - - # Register all variables and emit their initializer. - for var_name, var_expression in self.variables.iteritems(): - # Emit the initializer before adding the variable to scope, this prevents - # the initializer from referencing the variable itself, and permits stuff - # like this: - # var a = 1 in - # var a = a in ... # refers to outer 'a'. - if var_expression is not None: - var_value = var_expression.CodeGen() - else: - var_value = Constant.real(Type.double(), 0) - - alloca = CreateEntryBlockAlloca(function, var_name) - g_llvm_builder.store(var_value, alloca) - - # Remember the old variable binding so that we can restore the binding - # when we unrecurse. - old_bindings[var_name] = g_named_values.get(var_name, None) - - # Remember this binding. - g_named_values[var_name] = alloca - - # Codegen the body, now that all vars are in scope. - body = self.body.CodeGen() - - # Pop all our variables from scope. - for var_name in self.variables: - if old_bindings[var_name] is not None: - g_named_values[var_name] = old_bindings[var_name] - else: - del g_named_values[var_name] - - # Return the body computation. - return body - -This class represents the "prototype" for a function, which captures its name, -============================================================================== - -and its argument names (thus implicitly the number of arguments the function -============================================================================ - -takes), as well as if it is an operator. -======================================== - -class PrototypeNode(object): - -def **init**\ (self, name, args, is\_operator=False, precedence=0): -self.name = name self.args = args self.is\_operator = is\_operator -self.precedence = precedence - -def IsBinaryOp(self): return self.is\_operator and len(self.args) == 2 - -def GetOperatorName(self): assert self.is\_operator return self.name[-1] - -def CodeGen(self): # Make the function type, eg. double(double,double). -funct\_type = Type.function( Type.double(), [Type.double()] \* -len(self.args), False) - -:: - - function = Function.new(g_llvm_module, funct_type, self.name) - - # If the name conflicted, there was already something with the same name. - # If it has a body, don't allow redefinition or reextern. - if function.name != self.name: - function.delete() - function = g_llvm_module.get_function_named(self.name) - - # If the function already has a body, reject this. - if not function.is_declaration: - raise RuntimeError('Redefinition of function.') - - # If the function took a different number of args, reject. - if len(function.args) != len(self.args): - raise RuntimeError('Redeclaration of a function with different number ' - 'of args.') - - # Set names for all arguments and add them to the variables symbol table. - for arg, arg_name in zip(function.args, self.args): - arg.name = arg_name - - return function - -# Create an alloca for each argument and register the argument in the -symbol # table so that references to it will succeed. def -CreateArgumentAllocas(self, function): for arg\_name, arg in -zip(self.args, function.args): alloca = CreateEntryBlockAlloca(function, -arg\_name) g\_llvm\_builder.store(arg, alloca) -g\_named\_values[arg\_name] = alloca - -This class represents a function definition itself. -=================================================== - -class FunctionNode(object): - -def **init**\ (self, prototype, body): self.prototype = prototype -self.body = body - -def CodeGen(self): # Clear scope. g\_named\_values.clear() - -:: - - # Create a function object. - function = self.prototype.CodeGen() - - # If this is a binary operator, install its precedence. - if self.prototype.IsBinaryOp(): - operator = self.prototype.GetOperatorName() - g_binop_precedence[operator] = self.prototype.precedence - - # Create a new basic block to start insertion into. - block = function.append_basic_block('entry') - global g_llvm_builder - g_llvm_builder = Builder.new(block) - - # Add all arguments to the symbol table and create their allocas. - self.prototype.CreateArgumentAllocas(function) - - # Finish off the function. - try: - return_value = self.body.CodeGen() - g_llvm_builder.ret(return_value) - - # Validate the generated code, checking for consistency. - function.verify() - - # Optimize the function. - g_llvm_pass_manager.run(function) - except: - function.delete() - if self.prototype.IsBinaryOp(): - del g_binop_precedence[self.prototype.GetOperatorName()] - raise - - return function - -Parser ------- - -class Parser(object): - -def **init**\ (self, tokens): self.tokens = tokens self.Next() - -# Provide a simple token buffer. Parser.current is the current token the -# parser is looking at. Parser.Next() reads another token from the lexer -and # updates Parser.current with its results. def Next(self): -self.current = self.tokens.next() - -# Gets the precedence of the current token, or -1 if the token is not a -binary # operator. def GetCurrentTokenPrecedence(self): if -isinstance(self.current, CharacterToken): return -g\_binop\_precedence.get(self.current.char, -1) else: return -1 - -# identifierexpr ::= identifier \| identifier '(' expression\* ')' def -ParseIdentifierExpr(self): identifier\_name = self.current.name -self.Next() # eat identifier. - -:: - - if self.current != CharacterToken('('): # Simple variable reference. - return VariableExpressionNode(identifier_name) - - # Call. - self.Next() # eat '('. - args = [] - if self.current != CharacterToken(')'): - while True: - args.append(self.ParseExpression()) - if self.current == CharacterToken(')'): - break - elif self.current != CharacterToken(','): - raise RuntimeError('Expected ")" or "," in argument list.') - self.Next() - - self.Next() # eat ')'. - return CallExpressionNode(identifier_name, args) - -# numberexpr ::= number def ParseNumberExpr(self): result = -NumberExpressionNode(self.current.value) self.Next() # consume the -number. return result - -# parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() -# eat '('. - -:: - - contents = self.ParseExpression() - - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")".') - self.Next() # eat ')'. - - return contents - -# ifexpr ::= 'if' expression 'then' expression 'else' expression def -ParseIfExpr(self): self.Next() # eat the if. - -:: - - # condition. - condition = self.ParseExpression() - - if not isinstance(self.current, ThenToken): - raise RuntimeError('Expected "then".') - self.Next() # eat the then. - - then_branch = self.ParseExpression() - - if not isinstance(self.current, ElseToken): - raise RuntimeError('Expected "else".') - self.Next() # eat the else. - - else_branch = self.ParseExpression() - - return IfExpressionNode(condition, then_branch, else_branch) - -# forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' -expression def ParseForExpr(self): self.Next() # eat the for. - -:: - - if not isinstance(self.current, IdentifierToken): - raise RuntimeError('Expected identifier after for.') - - loop_variable = self.current.name - self.Next() # eat the identifier. - - if self.current != CharacterToken('='): - raise RuntimeError('Expected "=" after for variable.') - self.Next() # eat the '='. - - start = self.ParseExpression() - - if self.current != CharacterToken(','): - raise RuntimeError('Expected "," after for start value.') - self.Next() # eat the ','. - - end = self.ParseExpression() - - # The step value is optional. - if self.current == CharacterToken(','): - self.Next() # eat the ','. - step = self.ParseExpression() - else: - step = None - - if not isinstance(self.current, InToken): - raise RuntimeError('Expected "in" after for variable specification.') - self.Next() # eat 'in'. - - body = self.ParseExpression() - - return ForExpressionNode(loop_variable, start, end, step, body) - -# varexpr ::= 'var' (identifier ('=' expression)?)+ 'in' expression def -ParseVarExpr(self): self.Next() # eat 'var'. - -:: - - variables = {} - - # At least one variable name is required. - if not isinstance(self.current, IdentifierToken): - raise RuntimeError('Expected identifier after "var".') - - while True: - var_name = self.current.name - self.Next() # eat the identifier. - - # Read the optional initializer. - if self.current == CharacterToken('='): - self.Next() # eat '='. - variables[var_name] = self.ParseExpression() - else: - variables[var_name] = None - - # End of var list, exit loop. - if self.current != CharacterToken(','): - break - self.Next() # eat ','. - - if not isinstance(self.current, IdentifierToken): - raise RuntimeError('Expected identifier after "," in a var expression.') - - # At this point, we have to have 'in'. - if not isinstance(self.current, InToken): - raise RuntimeError('Expected "in" keyword after "var".') - self.Next() # eat 'in'. - - body = self.ParseExpression() - - return VarExpressionNode(variables, body) - -# primary ::= # dentifierexpr \| numberexpr \| parenexpr \| ifexpr \| -forexpr \| varexpr def ParsePrimary(self): if isinstance(self.current, -IdentifierToken): return self.ParseIdentifierExpr() elif -isinstance(self.current, NumberToken): return self.ParseNumberExpr() -elif isinstance(self.current, IfToken): return self.ParseIfExpr() elif -isinstance(self.current, ForToken): return self.ParseForExpr() elif -isinstance(self.current, VarToken): return self.ParseVarExpr() elif -self.current == CharacterToken('('): return self.ParseParenExpr() else: -raise RuntimeError('Unknown token when expecting an expression.') - -# unary ::= primary \| unary\_operator unary def ParseUnary(self): # If -the current token is not an operator, it must be a primary expression. -if (not isinstance(self.current, CharacterToken) or self.current in -[CharacterToken('('), CharacterToken(',')]): return self.ParsePrimary() - -:: - - # If this is a unary operator, read it. - operator = self.current.char - self.Next() # eat the operator. - return UnaryExpressionNode(operator, self.ParseUnary()) - -# binoprhs ::= (binary\_operator unary)\* def ParseBinOpRHS(self, left, -left\_precedence): # If this is a binary operator, find its precedence. -while True: precedence = self.GetCurrentTokenPrecedence() - -:: - - # If this is a binary operator that binds at least as tightly as the - # current one, consume it; otherwise we are done. - if precedence < left_precedence: - return left - - binary_operator = self.current.char - self.Next() # eat the operator. - - # Parse the unary expression after the binary operator. - right = self.ParseUnary() - - # If binary_operator binds less tightly with right than the operator after - # right, let the pending operator take right as its left. - next_precedence = self.GetCurrentTokenPrecedence() - if precedence < next_precedence: - right = self.ParseBinOpRHS(right, precedence + 1) - - # Merge left/right. - left = BinaryOperatorExpressionNode(binary_operator, left, right) - -# expression ::= unary binoprhs def ParseExpression(self): left = -self.ParseUnary() return self.ParseBinOpRHS(left, 0) - -# prototype # ::= id '(' id\* ')' # ::= binary LETTER number? (id, id) # -::= unary LETTER (id) def ParsePrototype(self): precedence = None if -isinstance(self.current, IdentifierToken): kind = 'normal' -function\_name = self.current.name self.Next() # eat function name. elif -isinstance(self.current, UnaryToken): kind = 'unary' self.Next() # eat -'unary'. if not isinstance(self.current, CharacterToken): raise -RuntimeError('Expected an operator after "unary".') function\_name = -'unary' + self.current.char self.Next() # eat the operator. elif -isinstance(self.current, BinaryToken): kind = 'binary' self.Next() # eat -'binary'. if not isinstance(self.current, CharacterToken): raise -RuntimeError('Expected an operator after "binary".') function\_name = -'binary' + self.current.char self.Next() # eat the operator. if -isinstance(self.current, NumberToken): if not 1 <= self.current.value <= -100: raise RuntimeError('Invalid precedence: must be in range [1, -100].') precedence = self.current.value self.Next() # eat the -precedence. else: raise RuntimeError('Expected function name, "unary" or -"binary" in ' 'prototype.') - -:: - - if self.current != CharacterToken('('): - raise RuntimeError('Expected "(" in prototype.') - self.Next() # eat '('. - - arg_names = [] - while isinstance(self.current, IdentifierToken): - arg_names.append(self.current.name) - self.Next() - - if self.current != CharacterToken(')'): - raise RuntimeError('Expected ")" in prototype.') - - # Success. - self.Next() # eat ')'. - - if kind == 'unary' and len(arg_names) != 1: - raise RuntimeError('Invalid number of arguments for a unary operator.') - elif kind == 'binary' and len(arg_names) != 2: - raise RuntimeError('Invalid number of arguments for a binary operator.') - - return PrototypeNode(function_name, arg_names, kind != 'normal', precedence) - -# definition ::= 'def' prototype expression def ParseDefinition(self): -self.Next() # eat def. proto = self.ParsePrototype() body = -self.ParseExpression() return FunctionNode(proto, body) - -# toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = -PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) - -# external ::= 'extern' prototype def ParseExtern(self): self.Next() # -eat extern. return self.ParsePrototype() - -# Top-Level parsing def HandleDefinition(self): -self.Handle(self.ParseDefinition, 'Read a function definition:') - -def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') - -def HandleTopLevelExpression(self): try: function = -self.ParseTopLevelExpr().CodeGen() result = -g\_llvm\_executor.run\_function(function, []) print 'Evaluated to:', -result.as\_real(Type.double()) except Exception, e: raise#print -'Error:', e try: self.Next() # Skip for error recovery. except: pass - -def Handle(self, function, message): try: print message, -function().CodeGen() except Exception, e: raise#print 'Error:', e try: -self.Next() # Skip for error recovery. except: pass - -Main driver code. ------------------ - -def main(): # Set up the optimizer pipeline. Start with registering info -about how the # target lays out data structures. -g\_llvm\_pass\_manager.add(g\_llvm\_executor.target\_data) # Promote -allocas to registers. -g\_llvm\_pass\_manager.add(PASS\_PROMOTE\_MEMORY\_TO\_REGISTER) # Do -simple "peephole" optimizations and bit-twiddling optzns. -g\_llvm\_pass\_manager.add(PASS\_INSTRUCTION\_COMBINING) # Reassociate -expressions. g\_llvm\_pass\_manager.add(PASS\_REASSOCIATE) # Eliminate -Common SubExpressions. g\_llvm\_pass\_manager.add(PASS\_GVN) # Simplify -the control flow graph (deleting unreachable blocks, etc). -g\_llvm\_pass\_manager.add(PASS\_CFG\_SIMPLIFICATION) - -g\_llvm\_pass\_manager.initialize() - -# Install standard binary operators. # 1 is lowest possible precedence. -40 is the highest. g\_binop\_precedence['='] = 2 -g\_binop\_precedence['<'] = 10 g\_binop\_precedence['+'] = 20 -g\_binop\_precedence['-'] = 20 g\_binop\_precedence['\*'] = 40 - -# Run the main "interpreter loop". while True: print 'ready<', try: raw -= raw\_input() except KeyboardInterrupt: break - -:: - - parser = Parser(Tokenize(raw)) - while True: - # top ::= definition | external | expression | EOF - if isinstance(parser.current, EOFToken): - break - if isinstance(parser.current, DefToken): - parser.HandleDefinition() - elif isinstance(parser.current, ExternToken): - parser.HandleExtern() - else: - parser.HandleTopLevelExpression() - -# Print out all of the generated code. print '', g\_llvm\_module - -if **name** == '**main**\ ': main() {% endhighlight %} - --------------- - -**`Next: Conclusion and other useful LLVM -tidbits `_** +.. code-block:: python + + #!/usr/bin/env python + + import re from llvm.core import Module, Constant, Type, Function, + Builder from llvm.ee import ExecutionEngine, TargetData from llvm.passes + import FunctionPassManager + + from llvm.core import FCMP_ULT, FCMP_ONE from llvm.passes import + (PASS_PROMOTE_MEMORY_TO_REGISTER, PASS_INSTRUCTION_COMBINING, + PASS_REASSOCIATE, PASS_GVN, PASS_CFG_SIMPLIFICATION) + + Globals + ------- + + # The LLVM module, which holds all the IR code. + g_llvm_module = Module.new('my cool jit') + + # The LLVM instruction builder. Created whenever a new function is entered. + g_llvm_builder = None + + # A dictionary that keeps track of which values are defined in the current scope + # and what their LLVM representation is. + g_named_values = {} + + # The function optimization passes manager. + g_llvm_pass_manager = FunctionPassManager.new(g_llvm_module) + + # The LLVM execution engine. + g_llvm_executor = ExecutionEngine.new(g_llvm_module) + + # The binary operator precedence chart. + g_binop_precedence = {} + + # Creates an alloca instruction in the entry block of the function. This is used + # for mutable variables. + def CreateEntryBlockAlloca(function, var_name): entry = + function.get_entry_basic_block() builder = Builder.new(entry) + builder.position_at_beginning(entry) return + builder.alloca(Type.double(), var_name) + + Lexer + ----- + + # The lexer yields one of these types for each token. + class EOFToken(object): pass class DefToken(object): pass class + ExternToken(object): pass class IfToken(object): pass class + ThenToken(object): pass class ElseToken(object): pass class + ForToken(object): pass class InToken(object): pass class + BinaryToken(object): pass class UnaryToken(object): pass class + VarToken(object): pass + + class IdentifierToken(object): def **init**\ (self, name): self.name = + name + + class NumberToken(object): def **init**\ (self, value): self.value = + value + + class CharacterToken(object): def **init**\ (self, char): self.char = + char def **eq**\ (self, other): return isinstance(other, CharacterToken) + and self.char == other.char def **ne**\ (self, other): return not self + == other + + # Regular expressions that tokens and comments of our language. + REGEX_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX_IDENTIFIER = + re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX_COMMENT = re.compile('#.*') + + def Tokenize(string): while string: # Skip whitespace. if + string[0].isspace(): string = string[1:] continue + + :: + + # Run regexes. + comment_match = REGEX_COMMENT.match(string) + number_match = REGEX_NUMBER.match(string) + identifier_match = REGEX_IDENTIFIER.match(string) + + # Check if any of the regexes matched and yield the appropriate result. + if comment_match: + comment = comment_match.group(0) + string = string[len(comment):] + elif number_match: + number = number_match.group(0) + yield NumberToken(float(number)) + string = string[len(number):] + elif identifier_match: + identifier = identifier_match.group(0) + # Check if we matched a keyword. + if identifier == 'def': + yield DefToken() + elif identifier == 'extern': + yield ExternToken() + elif identifier == 'if': + yield IfToken() + elif identifier == 'then': + yield ThenToken() + elif identifier == 'else': + yield ElseToken() + elif identifier == 'for': + yield ForToken() + elif identifier == 'in': + yield InToken() + elif identifier == 'binary': + yield BinaryToken() + elif identifier == 'unary': + yield UnaryToken() + elif identifier == 'var': + yield VarToken() + else: + yield IdentifierToken(identifier) + string = string[len(identifier):] + else: + # Yield the ASCII value of the unknown character. + yield CharacterToken(string[0]) + string = string[1:] + + yield EOFToken() + + Abstract Syntax Tree (aka Parse Tree) + ------------------------------------- + + # Base class for all expression nodes. + class ExpressionNode(object): pass + + # Expression class for numeric literals like "1.0". + class NumberExpressionNode(ExpressionNode): + + def **init**\ (self, value): self.value = value + + def CodeGen(self): return Constant.real(Type.double(), self.value) + + # Expression class for referencing a variable, like "a". + class VariableExpressionNode(ExpressionNode): + + def **init**\ (self, name): self.name = name + + def CodeGen(self): if self.name in g_named_values: return + g_llvm_builder.load(g_named_values[self.name], self.name) else: + raise RuntimeError('Unknown variable name: ' + self.name) + + # Expression class for a binary operator. + class BinaryOperatorExpressionNode(ExpressionNode): + + def **init**\ (self, operator, left, right): self.operator = operator + self.left = left self.right = right + + def CodeGen(self): # A special case for '=' because we don't want to + emit the LHS as an # expression. if self.operator == '=': # Assignment + requires the LHS to be an identifier. if not isinstance(self.left, + VariableExpressionNode): raise RuntimeError('Destination of "=" must be + a variable.') + + :: + + # Codegen the RHS. + value = self.right.CodeGen() + + # Look up the name. + variable = g_named_values[self.left.name] + + # Store the value and return it. + g_llvm_builder.store(value, variable) + + return value + + left = self.left.CodeGen() + right = self.right.CodeGen() + + if self.operator == '+': + return g_llvm_builder.fadd(left, right, 'addtmp') + elif self.operator == '-': + return g_llvm_builder.fsub(left, right, 'subtmp') + elif self.operator == '*': + return g_llvm_builder.fmul(left, right, 'multmp') + elif self.operator == '<': + result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') + # Convert bool 0 or 1 to double 0.0 or 1.0. + return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') + else: + function = g_llvm_module.get_function_named('binary' + self.operator) + return g_llvm_builder.call(function, [left, right], 'binop') + + # Expression class for function calls. + class CallExpressionNode(ExpressionNode): + + def **init**\ (self, callee, args): self.callee = callee self.args = + args + + def CodeGen(self): # Look up the name in the global module table. callee + = g_llvm_module.get_function_named(self.callee) + + :: + + # Check for argument mismatch error. + if len(callee.args) != len(self.args): + raise RuntimeError('Incorrect number of arguments passed.') + + arg_values = [i.CodeGen() for i in self.args] + + return g_llvm_builder.call(callee, arg_values, 'calltmp') + + # Expression class for if/then/else. + class IfExpressionNode(ExpressionNode): + + def **init**\ (self, condition, then_branch, else_branch): + self.condition = condition self.then_branch = then_branch + self.else_branch = else_branch + + def CodeGen(self): condition = self.condition.CodeGen() + + :: + + # Convert condition to a bool by comparing equal to 0.0. + condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, condition, Constant.real(Type.double(), 0), 'ifcond') + + function = g_llvm_builder.basic_block.function + + # Create blocks for the then and else cases. Insert the 'then' block at the + # end of the function. + then_block = function.append_basic_block('then') + else_block = function.append_basic_block('else') + merge_block = function.append_basic_block('ifcond') + + g_llvm_builder.cbranch(condition_bool, then_block, else_block) + + # Emit then value. + g_llvm_builder.position_at_end(then_block) + then_value = self.then_branch.CodeGen() + g_llvm_builder.branch(merge_block) + + # Codegen of 'Then' can change the current block; update then_block for the + # PHI node. + then_block = g_llvm_builder.basic_block + + # Emit else block. + g_llvm_builder.position_at_end(else_block) + else_value = self.else_branch.CodeGen() + g_llvm_builder.branch(merge_block) + + # Codegen of 'Else' can change the current block, update else_block for the + # PHI node. + else_block = g_llvm_builder.basic_block + + # Emit merge block. + g_llvm_builder.position_at_end(merge_block) + phi = g_llvm_builder.phi(Type.double(), 'iftmp') + phi.add_incoming(then_value, then_block) + phi.add_incoming(else_value, else_block) + + return phi + + # Expression class for for/in. + class ForExpressionNode(ExpressionNode): + + def **init**\ (self, loop_variable, start, end, step, body): + self.loop_variable = loop_variable self.start = start self.end = end + self.step = step self.body = body + + def CodeGen(self): # Output this as: # var = alloca double # ... # start + = startexpr # store start -> var # goto loop # loop: # ... # bodyexpr # + ... # loopend: # step = stepexpr # endcond = endexpr # # curvar = load + var # nextvar = curvar + step # store nextvar -> var # br endcond, loop, + endloop # outloop: + + :: + + function = g_llvm_builder.basic_block.function + + # Create an alloca for the variable in the entry block. + alloca = CreateEntryBlockAlloca(function, self.loop_variable) + + # Emit the start code first, without 'variable' in scope. + start_value = self.start.CodeGen() + + # Store the value into the alloca. + g_llvm_builder.store(start_value, alloca) + + # Make the new basic block for the loop, inserting after current block. + loop_block = function.append_basic_block('loop') + + # Insert an explicit fall through from the current block to the loop_block. + g_llvm_builder.branch(loop_block) + + # Start insertion in loop_block. + g_llvm_builder.position_at_end(loop_block) + + # Within the loop, the variable is defined equal to the alloca. If it + # shadows an existing variable, we have to restore it, so save it now. + old_value = g_named_values.get(self.loop_variable, None) + g_named_values[self.loop_variable] = alloca + + # Emit the body of the loop. This, like any other expr, can change the + # current BB. Note that we ignore the value computed by the body. + self.body.CodeGen() + + # Emit the step value. + if self.step: + step_value = self.step.CodeGen() + else: + # If not specified, use 1.0. + step_value = Constant.real(Type.double(), 1) + + # Compute the end condition. + end_condition = self.end.CodeGen() + + # Reload, increment, and restore the alloca. This handles the case where + # the body of the loop mutates the variable. + cur_value = g_llvm_builder.load(alloca, self.loop_variable) + next_value = g_llvm_builder.fadd(cur_value, step_value, 'nextvar') + g_llvm_builder.store(next_value, alloca) + + # Convert condition to a bool by comparing equal to 0.0. + end_condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, end_condition, Constant.real(Type.double(), 0), 'loopcond') + + # Create the "after loop" block and insert it. + after_block = function.append_basic_block('afterloop') + + # Insert the conditional branch into the end of loop_block. + g_llvm_builder.cbranch(end_condition_bool, loop_block, after_block) + + # Any new code will be inserted in after_block. + g_llvm_builder.position_at_end(after_block) + + # Restore the unshadowed variable. + if old_value is not None: + g_named_values[self.loop_variable] = old_value + else: + del g_named_values[self.loop_variable] + + # for expr always returns 0.0. + return Constant.real(Type.double(), 0) + + # Expression class for a unary operator. + class UnaryExpressionNode(ExpressionNode): + + def **init**\ (self, operator, operand): self.operator = operator + self.operand = operand + + def CodeGen(self): operand = self.operand.CodeGen() function = + g_llvm_module.get_function_named('unary' + self.operator) return + g_llvm_builder.call(function, [operand], 'unop') + + # Expression class for var/in. + class VarExpressionNode(ExpressionNode): + + def **init**\ (self, variables, body): self.variables = variables + self.body = body + + def CodeGen(self): old_bindings = {} function = + g_llvm_builder.basic_block.function + + :: + + # Register all variables and emit their initializer. + for var_name, var_expression in self.variables.iteritems(): + # Emit the initializer before adding the variable to scope, this prevents + # the initializer from referencing the variable itself, and permits stuff + # like this: + # var a = 1 in + # var a = a in ... # refers to outer 'a'. + if var_expression is not None: + var_value = var_expression.CodeGen() + else: + var_value = Constant.real(Type.double(), 0) + + alloca = CreateEntryBlockAlloca(function, var_name) + g_llvm_builder.store(var_value, alloca) + + # Remember the old variable binding so that we can restore the binding + # when we unrecurse. + old_bindings[var_name] = g_named_values.get(var_name, None) + + # Remember this binding. + g_named_values[var_name] = alloca + + # Codegen the body, now that all vars are in scope. + body = self.body.CodeGen() + + # Pop all our variables from scope. + for var_name in self.variables: + if old_bindings[var_name] is not None: + g_named_values[var_name] = old_bindings[var_name] + else: + del g_named_values[var_name] + + # Return the body computation. + return body + + # This class represents the "prototype" for a function, which captures its name, + # and its argument names (thus implicitly the number of arguments the function + # takes), as well as if it is an operator. + class PrototypeNode(object): + + def **init**\ (self, name, args, is_operator=False, precedence=0): + self.name = name self.args = args self.is_operator = is_operator + self.precedence = precedence + + def IsBinaryOp(self): return self.is_operator and len(self.args) == 2 + + def GetOperatorName(self): assert self.is_operator return self.name[-1] + + def CodeGen(self): # Make the function type, eg. double(double,double). + funct_type = Type.function( Type.double(), [Type.double()] \* + len(self.args), False) + + :: + + function = Function.new(g_llvm_module, funct_type, self.name) + + # If the name conflicted, there was already something with the same name. + # If it has a body, don't allow redefinition or reextern. + if function.name != self.name: + function.delete() + function = g_llvm_module.get_function_named(self.name) + + # If the function already has a body, reject this. + if not function.is_declaration: + raise RuntimeError('Redefinition of function.') + + # If the function took a different number of args, reject. + if len(function.args) != len(self.args): + raise RuntimeError('Redeclaration of a function with different number ' + 'of args.') + + # Set names for all arguments and add them to the variables symbol table. + for arg, arg_name in zip(function.args, self.args): + arg.name = arg_name + + return function + + # Create an alloca for each argument and register the argument in the + symbol # table so that references to it will succeed. def + CreateArgumentAllocas(self, function): for arg_name, arg in + zip(self.args, function.args): alloca = CreateEntryBlockAlloca(function, + arg_name) g_llvm_builder.store(arg, alloca) + g_named_values[arg_name] = alloca + + # This class represents a function definition itself. + class FunctionNode(object): + + def **init**\ (self, prototype, body): self.prototype = prototype + self.body = body + + def CodeGen(self): # Clear scope. g_named_values.clear() + + :: + + # Create a function object. + function = self.prototype.CodeGen() + + # If this is a binary operator, install its precedence. + if self.prototype.IsBinaryOp(): + operator = self.prototype.GetOperatorName() + g_binop_precedence[operator] = self.prototype.precedence + + # Create a new basic block to start insertion into. + block = function.append_basic_block('entry') + global g_llvm_builder + g_llvm_builder = Builder.new(block) + + # Add all arguments to the symbol table and create their allocas. + self.prototype.CreateArgumentAllocas(function) + + # Finish off the function. + try: + return_value = self.body.CodeGen() + g_llvm_builder.ret(return_value) + + # Validate the generated code, checking for consistency. + function.verify() + + # Optimize the function. + g_llvm_pass_manager.run(function) + except: + function.delete() + if self.prototype.IsBinaryOp(): + del g_binop_precedence[self.prototype.GetOperatorName()] + raise + + return function + + Parser + ------ + + class Parser(object): + + def **init**\ (self, tokens): self.tokens = tokens self.Next() + + # Provide a simple token buffer. Parser.current is the current token the + # parser is looking at. Parser.Next() reads another token from the lexer + and # updates Parser.current with its results. def Next(self): + self.current = self.tokens.next() + + # Gets the precedence of the current token, or -1 if the token is not a + binary # operator. def GetCurrentTokenPrecedence(self): if + isinstance(self.current, CharacterToken): return + g_binop_precedence.get(self.current.char, -1) else: return -1 + + # identifierexpr ::= identifier \| identifier '(' expression\* ')' def + ParseIdentifierExpr(self): identifier_name = self.current.name + self.Next() # eat identifier. + + :: + + if self.current != CharacterToken('('): # Simple variable reference. + return VariableExpressionNode(identifier_name) + + # Call. + self.Next() # eat '('. + args = [] + if self.current != CharacterToken(')'): + while True: + args.append(self.ParseExpression()) + if self.current == CharacterToken(')'): + break + elif self.current != CharacterToken(','): + raise RuntimeError('Expected ")" or "," in argument list.') + self.Next() + + self.Next() # eat ')'. + return CallExpressionNode(identifier_name, args) + + # numberexpr ::= number def ParseNumberExpr(self): result = + NumberExpressionNode(self.current.value) self.Next() # consume the + number. return result + + # parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() + # eat '('. + + :: + + contents = self.ParseExpression() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")".') + self.Next() # eat ')'. + + return contents + + # ifexpr ::= 'if' expression 'then' expression 'else' expression def + ParseIfExpr(self): self.Next() # eat the if. + + :: + + # condition. + condition = self.ParseExpression() + + if not isinstance(self.current, ThenToken): + raise RuntimeError('Expected "then".') + self.Next() # eat the then. + + then_branch = self.ParseExpression() + + if not isinstance(self.current, ElseToken): + raise RuntimeError('Expected "else".') + self.Next() # eat the else. + + else_branch = self.ParseExpression() + + return IfExpressionNode(condition, then_branch, else_branch) + + # forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' + expression def ParseForExpr(self): self.Next() # eat the for. + + :: + + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after for.') + + loop_variable = self.current.name + self.Next() # eat the identifier. + + if self.current != CharacterToken('='): + raise RuntimeError('Expected "=" after for variable.') + self.Next() # eat the '='. + + start = self.ParseExpression() + + if self.current != CharacterToken(','): + raise RuntimeError('Expected "," after for start value.') + self.Next() # eat the ','. + + end = self.ParseExpression() + + # The step value is optional. + if self.current == CharacterToken(','): + self.Next() # eat the ','. + step = self.ParseExpression() + else: + step = None + + if not isinstance(self.current, InToken): + raise RuntimeError('Expected "in" after for variable specification.') + self.Next() # eat 'in'. + + body = self.ParseExpression() + + return ForExpressionNode(loop_variable, start, end, step, body) + + # varexpr ::= 'var' (identifier ('=' expression)?)+ 'in' expression def + ParseVarExpr(self): self.Next() # eat 'var'. + + :: + + variables = {} + + # At least one variable name is required. + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after "var".') + + while True: + var_name = self.current.name + self.Next() # eat the identifier. + + # Read the optional initializer. + if self.current == CharacterToken('='): + self.Next() # eat '='. + variables[var_name] = self.ParseExpression() + else: + variables[var_name] = None + + # End of var list, exit loop. + if self.current != CharacterToken(','): + break + self.Next() # eat ','. + + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after "," in a var expression.') + + # At this point, we have to have 'in'. + if not isinstance(self.current, InToken): + raise RuntimeError('Expected "in" keyword after "var".') + self.Next() # eat 'in'. + + body = self.ParseExpression() + + return VarExpressionNode(variables, body) + + # primary ::= # dentifierexpr \| numberexpr \| parenexpr \| ifexpr \| + forexpr \| varexpr def ParsePrimary(self): if isinstance(self.current, + IdentifierToken): return self.ParseIdentifierExpr() elif + isinstance(self.current, NumberToken): return self.ParseNumberExpr() + elif isinstance(self.current, IfToken): return self.ParseIfExpr() elif + isinstance(self.current, ForToken): return self.ParseForExpr() elif + isinstance(self.current, VarToken): return self.ParseVarExpr() elif + self.current == CharacterToken('('): return self.ParseParenExpr() else: + raise RuntimeError('Unknown token when expecting an expression.') + + # unary ::= primary \| unary_operator unary def ParseUnary(self): # If + the current token is not an operator, it must be a primary expression. + if (not isinstance(self.current, CharacterToken) or self.current in + [CharacterToken('('), CharacterToken(',')]): return self.ParsePrimary() + + :: + + # If this is a unary operator, read it. + operator = self.current.char + self.Next() # eat the operator. + return UnaryExpressionNode(operator, self.ParseUnary()) + + # binoprhs ::= (binary_operator unary)\* def ParseBinOpRHS(self, left, + left_precedence): # If this is a binary operator, find its precedence. + while True: precedence = self.GetCurrentTokenPrecedence() + + :: + + # If this is a binary operator that binds at least as tightly as the + # current one, consume it; otherwise we are done. + if precedence < left_precedence: + return left + + binary_operator = self.current.char + self.Next() # eat the operator. + + # Parse the unary expression after the binary operator. + right = self.ParseUnary() + + # If binary_operator binds less tightly with right than the operator after + # right, let the pending operator take right as its left. + next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: + right = self.ParseBinOpRHS(right, precedence + 1) + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right) + + # expression ::= unary binoprhs def ParseExpression(self): left = + self.ParseUnary() return self.ParseBinOpRHS(left, 0) + + # prototype # ::= id '(' id\* ')' # ::= binary LETTER number? (id, id) # + ::= unary LETTER (id) def ParsePrototype(self): precedence = None if + isinstance(self.current, IdentifierToken): kind = 'normal' + function_name = self.current.name self.Next() # eat function name. elif + isinstance(self.current, UnaryToken): kind = 'unary' self.Next() # eat + 'unary'. if not isinstance(self.current, CharacterToken): raise + RuntimeError('Expected an operator after "unary".') function_name = + 'unary' + self.current.char self.Next() # eat the operator. elif + isinstance(self.current, BinaryToken): kind = 'binary' self.Next() # eat + 'binary'. if not isinstance(self.current, CharacterToken): raise + RuntimeError('Expected an operator after "binary".') function_name = + 'binary' + self.current.char self.Next() # eat the operator. if + isinstance(self.current, NumberToken): if not 1 <= self.current.value <= + 100: raise RuntimeError('Invalid precedence: must be in range [1, + 100].') precedence = self.current.value self.Next() # eat the + precedence. else: raise RuntimeError('Expected function name, "unary" or + "binary" in ' 'prototype.') + + :: + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + if kind == 'unary' and len(arg_names) != 1: + raise RuntimeError('Invalid number of arguments for a unary operator.') + elif kind == 'binary' and len(arg_names) != 2: + raise RuntimeError('Invalid number of arguments for a binary operator.') + + return PrototypeNode(function_name, arg_names, kind != 'normal', precedence) + + # definition ::= 'def' prototype expression def ParseDefinition(self): + self.Next() # eat def. proto = self.ParsePrototype() body = + self.ParseExpression() return FunctionNode(proto, body) + + # toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = + PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) + + # external ::= 'extern' prototype def ParseExtern(self): self.Next() # + eat extern. return self.ParsePrototype() + + # Top-Level parsing def HandleDefinition(self): + self.Handle(self.ParseDefinition, 'Read a function definition:') + + def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') + + def HandleTopLevelExpression(self): try: function = + self.ParseTopLevelExpr().CodeGen() result = + g_llvm_executor.run_function(function, []) print 'Evaluated to:', + result.as_real(Type.double()) except Exception, e: raise#print + 'Error:', e try: self.Next() # Skip for error recovery. except: pass + + def Handle(self, function, message): try: print message, + function().CodeGen() except Exception, e: raise#print 'Error:', e try: + self.Next() # Skip for error recovery. except: pass + + Main driver code. + ----------------- + + def main(): # Set up the optimizer pipeline. Start with registering info + about how the # target lays out data structures. + g_llvm_pass_manager.add(g_llvm_executor.target_data) # Promote + allocas to registers. + g_llvm_pass_manager.add(PASS_PROMOTE_MEMORY_TO_REGISTER) # Do + simple "peephole" optimizations and bit-twiddling optzns. + g_llvm_pass_manager.add(PASS_INSTRUCTION_COMBINING) # Reassociate + expressions. g_llvm_pass_manager.add(PASS_REASSOCIATE) # Eliminate + Common SubExpressions. g_llvm_pass_manager.add(PASS_GVN) # Simplify + the control flow graph (deleting unreachable blocks, etc). + g_llvm_pass_manager.add(PASS_CFG_SIMPLIFICATION) + + g_llvm_pass_manager.initialize() + + # Install standard binary operators. # 1 is lowest possible precedence. + 40 is the highest. g_binop_precedence['='] = 2 + g_binop_precedence['<'] = 10 g_binop_precedence['+'] = 20 + g_binop_precedence['-'] = 20 g_binop_precedence['\*'] = 40 + + # Run the main "interpreter loop". while True: print 'ready<', try: raw + = raw_input() except KeyboardInterrupt: break + + :: + + parser = Parser(Tokenize(raw)) + while True: + # top ::= definition | external | expression | EOF + if isinstance(parser.current, EOFToken): + break + if isinstance(parser.current, DefToken): + parser.HandleDefinition() + elif isinstance(parser.current, ExternToken): + parser.HandleExtern() + else: + parser.HandleTopLevelExpression() + + # Print out all of the generated code. print '', g_llvm_module + + if **name** == '**main**\ ': main() diff --git a/docs/source/doc/kaleidoscope/PythonLangImpl8.rst b/docs/source/doc/kaleidoscope/PythonLangImpl8.rst index d9b81ac..e69de29 100644 --- a/docs/source/doc/kaleidoscope/PythonLangImpl8.rst +++ b/docs/source/doc/kaleidoscope/PythonLangImpl8.rst @@ -1,277 +0,0 @@ -***************************************************************** -Chapter 8: Conclusion and other useful LLVM tidbits -***************************************************************** - -Written by `Chris Lattner `_ - - -Tutorial Conclusion # {#conclusion} -=================================== - -Welcome to the the final chapter of the `Implementing a language with -LLVM `_ tutorial. In the -course of this tutorial, we have grown our little Kaleidoscope language -from being a useless toy, to being a semi-interesting (but probably -still useless) toy. :) - -It is interesting to see how far we've come, and how little code it has -taken. We built the entire lexer, parser, AST, code generator, and an -interactive run-loop (with a JIT!) by-hand in under 540 lines of -(non-comment/non-blank) code. - -Our little language supports a couple of interesting features: it -supports user defined binary and unary operators, it uses JIT -compilation for immediate evaluation, and it supports a few control flow -constructs with SSA construction. - -Part of the idea of this tutorial was to show you how easy and fun it -can be to define, build, and play with languages. Building a compiler -need not be a scary or mystical process! Now that you've seen some of -the basics, I strongly encourage you to take the code and hack on it. -For example, try adding: - -- **global variables** -- While global variables have questional value - in modern software engineering, they are often useful when putting - together quick little hacks like the Kaleidoscope compiler itself. - Fortunately, our current setup makes it very easy to add global - variables: just have value lookup check to see if an unresolved - variable is in the global variable symbol table before rejecting it. - To create a new global variable, make an instance of the LLVM - ``GlobalVariable`` class. - -- **typed variables** -- Kaleidoscope currently only supports variables - of type double. This gives the language a very nice elegance, because - only supporting one type means that you never have to specify types. - Different languages have different ways of handling this. The easiest - way is to require the user to specify types for every variable - definition, and record the type of the variable in the symbol table - along with its Value\*. - -- **arrays, structs, vectors, etc** -- Once you add types, you can - start extending the type system in all sorts of interesting ways. - Simple arrays are very easy and are quite useful for many different - applications. Adding them is mostly an exercise in learning how the - LLVM - `getelementptr `_ - instruction works: it is so nifty/unconventional, it `has its own - FAQ `_! If you add - support for recursive types (e.g. linked lists), make sure to read - the `section in the LLVM Programmer's - Manual `_ - that describes how to construct them. - -- **standard runtime** -- Our current language allows the user to - access arbitrary external functions, and we use it for things like - "putchard". As you extend the language to add higher-level - constructs, often these constructs make the most sense if they are - lowered to calls into a language-supplied runtime. For example, if - you add hash tables to the language, it would probably make sense to - add the routines to a runtime, instead of inlining them all the way. - -- **memory management** -- Currently we can only access the stack in - Kaleidoscope. It would also be useful to be able to allocate heap - memory, either with calls to the standard libc malloc/free interface - or with a garbage collector. If you would like to use garbage - collection, note that LLVM fully supports `Accurate Garbage - Collection `_ - including algorithms that move objects and need to scan/update the - stack. - -- **debugger support** -- LLVM supports generation of `DWARF Debug - info `_ which is - understood by common debuggers like GDB. Adding support for debug - info is fairly straightforward. The best way to understand it is to - compile some C/C++ code with "``llvm-gcc -g -O0``\ " and taking a - look at what it produces. - -- **exception handling support** - LLVM supports generation of `zero - cost exceptions `_ - which interoperate with code compiled in other languages. You could - also generate code by implicitly making every function return an - error value and checking it. You could also make explicit use of - setjmp/longjmp. There are many different ways to go here. - -- **object orientation, generics, database access, complex numbers, - geometric programming, ...** -- Really, there is no end of crazy - features that you can add to the language. - -- **unusual domains** -- We've been talking about applying LLVM to a - domain that many people are interested in: building a compiler for a - specific language. However, there are many other domains that can use - compiler technology that are not typically considered. For example, - LLVM has been used to implement OpenGL graphics acceleration, - translate C++ code to ActionScript, and many other cute and clever - things. Maybe you will be the first to JIT compile a regular - expression interpreter into native code with LLVM? - -Have fun - try doing something crazy and unusual. Building a language -like everyone else always has, is much less fun than trying something a -little crazy or off the wall and seeing how it turns out. If you get -stuck or want to talk about it, feel free to email the `llvmdev mailing -list `_: it has lots -of people who are interested in languages and are often willing to help -out. - -Before we end this tutorial, I want to talk about some "tips and tricks" -for generating LLVM IR. These are some of the more subtle things that -may not be obvious, but are very useful if you want to take advantage of -LLVM's capabilities. - --------------- - -Properties of the LLVM IR # {#llvmirproperties} -=============================================== - -We have a couple common questions about code in the LLVM IR form - let's -just get these out of the way right now, shall we? - -Target Independence ## {#targetindep} -------------------------------------- - -Kaleidoscope is an example of a "portable language": any program written -in Kaleidoscope will work the same way on any target that it runs on. -Many other languages have this property, e.g. LISP, Java, Haskell, -Javascript, Python, etc. (note that while these languages are portable, -not all their libraries are). - -One nice aspect of LLVM is that it is often capable of preserving target -independence in the IR: you can take the LLVM IR for a -Kaleidoscope-compiled program and run it on any target that LLVM -supports, even emitting C code and compiling that on targets that LLVM -doesn't support natively. You can trivially tell that the Kaleidoscope -compiler generates target-independent code because it never queries for -any target-specific information when generating code. - -The fact that LLVM provides a compact, target-independent, -representation for code gets a lot of people excited. Unfortunately, -these people are usually thinking about C or a language from the C -family when they are asking questions about language portability. I say -"unfortunately", because there is really no way to make (fully general) -C code portable, other than shipping the source code around (and of -course, C source code is not actually portable in general either - ever -port a really old application from 32- to 64-bits?). - -The problem with C (again, in its full generality) is that it is heavily -laden with target specific assumptions. As one simple example, the -preprocessor often destructively removes target-independence from the -code when it processes the input text: - -{% highlight c %} #ifdef **i386** int X = 1; #else int X = 42; #endif {% -endhighlight %} - -While it is possible to engineer more and more complex solutions to -problems like this, it cannot be solved in full generality in a way that -is better than shipping the actual source code. - -That said, there are interesting subsets of C that can be made portable. -If you are willing to fix primitive types to a fixed size (say int = -32-bits, and long = 64-bits), don't care about ABI compatibility with -existing binaries, and are willing to give up some other minor features, -you can have portable code. This can make sense for specialized domains -such as an in-kernel language. - -Safety Guarantees ## {#safety} ------------------------------- - -Many of the languages above are also "safe" languages: it is impossible -for a program written in Java to corrupt its address space and crash the -process (assuming the JVM has no bugs). Safety is an interesting -property that requires a combination of language design, runtime -support, and often operating system support. - -It is certainly possible to implement a safe language in LLVM, but LLVM -IR does not itself guarantee safety. The LLVM IR allows unsafe pointer -casts, use after free bugs, buffer over-runs, and a variety of other -problems. Safety needs to be implemented as a layer on top of LLVM and, -conveniently, several groups have investigated this. Ask on the `llvmdev -mailing list `_ if -you are interested in more details. - -Language-Specific Optimizations ## {#langspecific} --------------------------------------------------- - -One thing about LLVM that turns off many people is that it does not -solve all the world's problems in one system (sorry 'world hunger', -someone else will have to solve you some other day). One specific -complaint is that people perceive LLVM as being incapable of performing -high-level language-specific optimization: LLVM "loses too much -information". - -Unfortunately, this is really not the place to give you a full and -unified version of "Chris Lattner's theory of compiler design". Instead, -I'll make a few observations: - -First, you're right that LLVM does lose information. For example, as of -this writing, there is no way to distinguish in the LLVM IR whether an -SSA-value came from a C "int" or a C "long" on an ILP32 machine (other -than debug info). Both get compiled down to an 'i32' value and the -information about what it came from is lost. The more general issue -here, is that the LLVM type system uses "structural equivalence" instead -of "name equivalence". Another place this surprises people is if you -have two types in a high-level language that have the same structure -(e.g. two different structs that have a single int field): these types -will compile down into a single LLVM type and it will be impossible to -tell what it came from. - -Second, while LLVM does lose information, LLVM is not a fixed target: we -continue to enhance and improve it in many different ways. In addition -to adding new features (LLVM did not always support exceptions or debug -info), we also extend the IR to capture important information for -optimization (e.g. whether an argument is sign or zero extended, -information about pointers aliasing, etc). Many of the enhancements are -user-driven: people want LLVM to include some specific feature, so they -go ahead and extend it. - -Third, it is *possible and easy* to add language-specific optimizations, -and you have a number of choices in how to do it. As one trivial -example, it is easy to add language-specific optimization passes that -"know" things about code compiled for a language. In the case of the C -family, there is an optimization pass that "knows" about the standard C -library functions. If you call "exit(0)" in main(), it knows that it is -safe to optimize that into "return 0;" because C specifies what the -'exit' function does. - -In addition to simple library knowledge, it is possible to embed a -variety of other language-specific information into the LLVM IR. If you -have a specific need and run into a wall, please bring the topic up on -the llvmdev list. At the very worst, you can always treat LLVM as if it -were a "dumb code generator" and implement the high-level optimizations -you desire in your front-end, on the language-specific AST. - --------------- - -Tips and Tricks # {#tipsandtricks} -================================== - -There is a variety of useful tips and tricks that you come to know after -working on/with LLVM that aren't obvious at first glance. Instead of -letting everyone rediscover them, this section talks about some of these -issues. - -Implementing portable offsetof/sizeof ## {#offsetofsizeof} ----------------------------------------------------------- - -One interesting thing that comes up, if you are trying to keep the code -generated by your compiler "target independent", is that you often need -to know the size of some LLVM type or the offset of some field in an -llvm structure. For example, you might need to pass the size of a type -into a function that allocates memory. - -Unfortunately, this can vary widely across targets: for example the -width of a pointer is trivially target-specific. However, there is a -`clever way to use the getelementptr -instruction `_ -that allows you to compute this in a portable way. - -Garbage Collected Stack Frames ## {#gcstack} --------------------------------------------- - -Some languages want to explicitly manage their stack frames, often so -that they are garbage collected or to allow easy implementation of -closures. There are often better ways to implement these features than -explicit stack frames, but `LLVM does support -them `_, -if you want. It requires your front-end to convert the code into -`Continuation Passing -Style `_ and -the use of tail calls (which LLVM also supports). diff --git a/docs/source/doc/llvm.core.Constant.rst b/docs/source/doc/llvm.core.Constant.rst index 1656381..4f2ed08 100644 --- a/docs/source/doc/llvm.core.Constant.rst +++ b/docs/source/doc/llvm.core.Constant.rst @@ -11,344 +11,343 @@ created from Python constants. A constant expression is also a constant etc) can be specified, to yield a new ``Constant`` object. Let's see some examples: -{% highlight python %} #!/usr/bin/env python -ti = Type.int() # a 32-bit int type +.. code-block:: python -k1 = Constant.int(ti, 42) # "int k1 = 42;" k2 = k1.add( Constant.int( -ti, 10 ) ) # "int k2 = k1 + 10;" - -tr = Type.float() - -r1 = Constant.real(tr, "3.141592") # create from a string r2 = -Constant.real(tr, 1.61803399) # create from a Python float {% -endhighlight %} - -llvm.core.Constant -================== - -- This will become a table of contents (this text will be scraped). + #!/usr/bin/env python + + ti = Type.int() # a 32-bit int type + + k1 = Constant.int(ti, 42) # "int k1 = 42;" k2 = k1.add( Constant.int( + ti, 10 ) ) # "int k2 = k1 + 10;" + + tr = Type.float() + + r1 = Constant.real(tr, "3.141592") # create from a string r2 = + Constant.real(tr, 1.61803399) # create from a Python float {% + endhighlight %} + + # llvm.core.Constant + - This will become a table of contents (this text will be scraped). {:toc} - -Static factory methods ----------------------- - -``null(ty)`` -~~~~~~~~~~~~ - -A null value (all zeros) of type ``ty`` - -``all_ones(ty)`` -~~~~~~~~~~~~~~~~ - -All 1's value of type ``ty`` - -``undef(ty)`` -~~~~~~~~~~~~~ - -An undefined value of type ``ty`` - -``int(ty, value)`` -~~~~~~~~~~~~~~~~~~ - -Integer of type ``ty``, with value ``value`` (a Python int or long) - -``int_signextend(ty, value)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Integer of signed type ``ty`` (use for signed types) - -``real(ty, value)`` -~~~~~~~~~~~~~~~~~~~ - -Floating point value of type ``ty``, with value ``value`` (a Python -float) - -``stringz(value)`` -~~~~~~~~~~~~~~~~~~ - -A null-terminated string. ``value`` is a Python string - -``string(value)`` -~~~~~~~~~~~~~~~~~ - -As ``string(ty)``, but not null terminated - -``array(ty, consts)`` -~~~~~~~~~~~~~~~~~~~~~ - -Array of type ``ty``, initialized with ``consts`` (an iterable yielding -``Constant`` objects of the appropriate type) - -``struct(ty, consts)`` -~~~~~~~~~~~~~~~~~~~~~~ - -Struct (unpacked) of type ``ty``, initialized with ``consts`` (an -iterable yielding ``Constant`` objects of the appropriate type) - -``packed_struct(ty, consts)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -As ``struct(ty, consts)`` but packed - -``vector(consts)`` -~~~~~~~~~~~~~~~~~~ - -Vector, initialized with ``consts`` (an iterable yielding ``Constant`` -objects of the appropriate type) - -``sizeof(ty)`` -~~~~~~~~~~~~~~ - -Constant value representing the sizeof the type ``ty`` - -Methods -------- - -The following operations on constants are supported. For more details on -any operation, consult the `Constant -Expressions `_ -section of the LLVM Language Reference. - -``k.neg()`` -~~~~~~~~~~~ - -negation, same as ``0 - k`` - -``k.not_()`` -~~~~~~~~~~~~ - -1's complement of ``k``. Note trailing underscore. - -``k.add(k2)`` -~~~~~~~~~~~~~ - -``k + k2``, where ``k`` and ``k2`` are integers. - -``k.fadd(k2)`` -~~~~~~~~~~~~~~ - -``k + k2``, where ``k`` and ``k2`` are floating-point. - -``k.sub(k2)`` -~~~~~~~~~~~~~ - -``k - k2``, where ``k`` and ``k2`` are integers. - -``k.fsub(k2)`` -~~~~~~~~~~~~~~ - -``k - k2``, where ``k`` and ``k2`` are floating-point. - -``k.mul(k2)`` -~~~~~~~~~~~~~ - -``k * k2``, where ``k`` and ``k2`` are integers. - -``k.fmul(k2)`` -~~~~~~~~~~~~~~ - -``k * k2``, where ``k`` and ``k2`` are floating-point. - -``k.udiv(k2)`` -~~~~~~~~~~~~~~ - -Quotient of unsigned division of ``k`` with ``k2`` - -``k.sdiv(k2)`` -~~~~~~~~~~~~~~ - -Quotient of signed division of ``k`` with ``k2`` - -``k.fdiv(k2)`` -~~~~~~~~~~~~~~ - -Quotient of floating point division of ``k`` with ``k2`` - -``k.urem(k2)`` -~~~~~~~~~~~~~~ - -Reminder of unsigned division of ``k`` with ``k2`` - -``k.srem(k2)`` -~~~~~~~~~~~~~~ - -Reminder of signed division of ``k`` with ``k2`` - -``k.frem(k2)`` -~~~~~~~~~~~~~~ - -Reminder of floating point division of ``k`` with ``k2`` - -``k.and_(k2)`` -~~~~~~~~~~~~~~ - -Bitwise and of ``k`` and ``k2``. Note trailing underscore. - -``k.or_(k2)`` -~~~~~~~~~~~~~ - -Bitwise or of ``k`` and ``k2``. Note trailing underscore. - -``k.xor(k2)`` -~~~~~~~~~~~~~ - -Bitwise exclusive-or of ``k`` and ``k2``. - -``k.icmp(icmp, k2)`` -~~~~~~~~~~~~~~~~~~~~ - -Compare ``k`` with ``k2`` using the predicate ``icmp``. See -`here `_ for list of predicates for integer -operands. - -``k.fcmp(fcmp, k2)`` -~~~~~~~~~~~~~~~~~~~~ - -Compare ``k`` with ``k2`` using the predicate ``fcmp``. See -`here `_ for list of predicates for real -operands. - -``k.shl(k2)`` -~~~~~~~~~~~~~ - -Shift ``k`` left by ``k2`` bits. - -``k.lshr(k2)`` -~~~~~~~~~~~~~~ - -Shift ``k`` logically right by ``k2`` bits (new bits are 0s). - -``k.ashr(k2)`` -~~~~~~~~~~~~~~ - -Shift ``k`` arithmetically right by ``k2`` bits (new bits are same as -previous sign bit). - -``k.gep(indices)`` -~~~~~~~~~~~~~~~~~~ - -GEP, see `LLVM docs `_. - -``k.trunc(ty)`` -~~~~~~~~~~~~~~~ - -Truncate ``k`` to a type ``ty`` of lower bitwidth. - -``k.sext(ty)`` -~~~~~~~~~~~~~~ - -Sign extend ``k`` to a type ``ty`` of higher bitwidth, while extending -the sign bit. - -``k.zext(ty)`` -~~~~~~~~~~~~~~ - -Sign extend ``k`` to a type ``ty`` of higher bitwidth, all new bits are -0s. - -``k.fptrunc(ty)`` -~~~~~~~~~~~~~~~~~ - -Truncate floating point constant ``k`` to floating point type ``ty`` of -lower size than k's. - -``k.fpext(ty)`` -~~~~~~~~~~~~~~~ - -Extend floating point constant ``k`` to floating point type ``ty`` of -higher size than k's. - -``k.uitofp(ty)`` -~~~~~~~~~~~~~~~~ - -Convert an unsigned integer constant ``k`` to floating point constant of -type ``ty``. - -``k.sitofp(ty)`` -~~~~~~~~~~~~~~~~ - -Convert a signed integer constant ``k`` to floating point constant of -type ``ty``. - -``k.fptoui(ty)`` -~~~~~~~~~~~~~~~~ - -Convert a floating point constant ``k`` to an unsigned integer constant -of type ``ty``. - -``k.fptosi(ty)`` -~~~~~~~~~~~~~~~~ - -Convert a floating point constant ``k`` to a signed integer constant of -type ``ty``. - -``k.ptrtoint(ty)`` -~~~~~~~~~~~~~~~~~~ - -Convert a pointer constant ``k`` to an integer constant of type ``ty``. - -``k.inttoptr(ty)`` -~~~~~~~~~~~~~~~~~~ - -Convert an integer constant ``k`` to a pointer constant of type ``ty``. - -``k.bitcast(ty)`` -~~~~~~~~~~~~~~~~~ - -Convert ``k`` to a (equal-width) constant of type ``ty``. - -``k.select(cond,k2,k3)`` -~~~~~~~~~~~~~~~~~~~~~~~~ - -Replace value with ``k2`` if the 1-bit integer constant ``cond`` is 1, -else with ``k3``. - -``k.extract_element(idx)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Extract value at ``idx`` (integer constant) from a vector constant -``k``. - -``k.insert_element(k2,idx)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Insert value ``k2`` (scalar constant) at index ``idx`` (integer -constant) of vector constant ``k``. - -``k.shuffle_vector(k2,mask)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Shuffle vector constant ``k`` based on vector constants ``k2`` and -``mask``. - --------------- - -Other Constant Classes -====================== - -The following subclasses of ``Constant`` do not provide additional -methods, **they serve only to provide richer type information.** - -Subclass \| LLVM C++ Class \| Remarks \| ----------\|----------------\|---------\| ``ConstantExpr`` \| -``llvmConstantExpr`` \| A constant expression \| -``ConstantAggregateZero``\ \| ``llvmConstantAggregateZero``\ \| All-zero -constant \| ``ConstantInt``\ \| ``llvmConstantInt``\ \| An integer -constant \| ``ConstantFP``\ \| ``llvmConstantFP``\ \| A floating-point -constant \| ``ConstantArray``\ \| ``llvmConstantArray``\ \| An array -constant \| ``ConstantStruct``\ \| ``llvmConstantStruct``\ \| A -structure constant \| ``ConstantVector``\ \| ``llvmConstantVector``\ \| -A vector constant \| ``ConstantPointerNull``\ \| -``llvmConstantPointerNull``\ \| All-zero pointer constant \| -``UndefValue``\ \| ``llvmUndefValue``\ \| corresponds to ``undef`` of -LLVM IR \| - -These types are helpful in ``isinstance`` checks, like so: - -{% highlight python %} ti = Type.int(32) k1 = Constant.int(ti, 42) # -int32\_t k1 = 42; k2 = Constant.array(ti, [k1, k1]) # int32\_t k2[] = { -k1, k1 }; - -assert isinstance(k1, ConstantInt) assert isinstance(k2, ConstantArray) -{% endhighlight %} + + Static factory methods + ---------------------- + + ``null(ty)`` + ~~~~~~~~~~~~ + + A null value (all zeros) of type ``ty`` + + ``all_ones(ty)`` + ~~~~~~~~~~~~~~~~ + + All 1's value of type ``ty`` + + ``undef(ty)`` + ~~~~~~~~~~~~~ + + An undefined value of type ``ty`` + + ``int(ty, value)`` + ~~~~~~~~~~~~~~~~~~ + + Integer of type ``ty``, with value ``value`` (a Python int or long) + + ``int_signextend(ty, value)`` + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Integer of signed type ``ty`` (use for signed types) + + ``real(ty, value)`` + ~~~~~~~~~~~~~~~~~~~ + + Floating point value of type ``ty``, with value ``value`` (a Python + float) + + ``stringz(value)`` + ~~~~~~~~~~~~~~~~~~ + + A null-terminated string. ``value`` is a Python string + + ``string(value)`` + ~~~~~~~~~~~~~~~~~ + + As ``string(ty)``, but not null terminated + + ``array(ty, consts)`` + ~~~~~~~~~~~~~~~~~~~~~ + + Array of type ``ty``, initialized with ``consts`` (an iterable yielding + ``Constant`` objects of the appropriate type) + + ``struct(ty, consts)`` + ~~~~~~~~~~~~~~~~~~~~~~ + + Struct (unpacked) of type ``ty``, initialized with ``consts`` (an + iterable yielding ``Constant`` objects of the appropriate type) + + ``packed_struct(ty, consts)`` + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + As ``struct(ty, consts)`` but packed + + ``vector(consts)`` + ~~~~~~~~~~~~~~~~~~ + + Vector, initialized with ``consts`` (an iterable yielding ``Constant`` + objects of the appropriate type) + + ``sizeof(ty)`` + ~~~~~~~~~~~~~~ + + Constant value representing the sizeof the type ``ty`` + + Methods + ------- + + The following operations on constants are supported. For more details on + any operation, consult the `Constant + Expressions `_ + section of the LLVM Language Reference. + + ``k.neg()`` + ~~~~~~~~~~~ + + negation, same as ``0 - k`` + + ``k.not_()`` + ~~~~~~~~~~~~ + + 1's complement of ``k``. Note trailing underscore. + + ``k.add(k2)`` + ~~~~~~~~~~~~~ + + ``k + k2``, where ``k`` and ``k2`` are integers. + + ``k.fadd(k2)`` + ~~~~~~~~~~~~~~ + + ``k + k2``, where ``k`` and ``k2`` are floating-point. + + ``k.sub(k2)`` + ~~~~~~~~~~~~~ + + ``k - k2``, where ``k`` and ``k2`` are integers. + + ``k.fsub(k2)`` + ~~~~~~~~~~~~~~ + + ``k - k2``, where ``k`` and ``k2`` are floating-point. + + ``k.mul(k2)`` + ~~~~~~~~~~~~~ + + ``k * k2``, where ``k`` and ``k2`` are integers. + + ``k.fmul(k2)`` + ~~~~~~~~~~~~~~ + + ``k * k2``, where ``k`` and ``k2`` are floating-point. + + ``k.udiv(k2)`` + ~~~~~~~~~~~~~~ + + Quotient of unsigned division of ``k`` with ``k2`` + + ``k.sdiv(k2)`` + ~~~~~~~~~~~~~~ + + Quotient of signed division of ``k`` with ``k2`` + + ``k.fdiv(k2)`` + ~~~~~~~~~~~~~~ + + Quotient of floating point division of ``k`` with ``k2`` + + ``k.urem(k2)`` + ~~~~~~~~~~~~~~ + + Reminder of unsigned division of ``k`` with ``k2`` + + ``k.srem(k2)`` + ~~~~~~~~~~~~~~ + + Reminder of signed division of ``k`` with ``k2`` + + ``k.frem(k2)`` + ~~~~~~~~~~~~~~ + + Reminder of floating point division of ``k`` with ``k2`` + + ``k.and_(k2)`` + ~~~~~~~~~~~~~~ + + Bitwise and of ``k`` and ``k2``. Note trailing underscore. + + ``k.or_(k2)`` + ~~~~~~~~~~~~~ + + Bitwise or of ``k`` and ``k2``. Note trailing underscore. + + ``k.xor(k2)`` + ~~~~~~~~~~~~~ + + Bitwise exclusive-or of ``k`` and ``k2``. + + ``k.icmp(icmp, k2)`` + ~~~~~~~~~~~~~~~~~~~~ + + Compare ``k`` with ``k2`` using the predicate ``icmp``. See + `here `_ for list of predicates for integer + operands. + + ``k.fcmp(fcmp, k2)`` + ~~~~~~~~~~~~~~~~~~~~ + + Compare ``k`` with ``k2`` using the predicate ``fcmp``. See + `here `_ for list of predicates for real + operands. + + ``k.shl(k2)`` + ~~~~~~~~~~~~~ + + Shift ``k`` left by ``k2`` bits. + + ``k.lshr(k2)`` + ~~~~~~~~~~~~~~ + + Shift ``k`` logically right by ``k2`` bits (new bits are 0s). + + ``k.ashr(k2)`` + ~~~~~~~~~~~~~~ + + Shift ``k`` arithmetically right by ``k2`` bits (new bits are same as + previous sign bit). + + ``k.gep(indices)`` + ~~~~~~~~~~~~~~~~~~ + + GEP, see `LLVM docs `_. + + ``k.trunc(ty)`` + ~~~~~~~~~~~~~~~ + + Truncate ``k`` to a type ``ty`` of lower bitwidth. + + ``k.sext(ty)`` + ~~~~~~~~~~~~~~ + + Sign extend ``k`` to a type ``ty`` of higher bitwidth, while extending + the sign bit. + + ``k.zext(ty)`` + ~~~~~~~~~~~~~~ + + Sign extend ``k`` to a type ``ty`` of higher bitwidth, all new bits are + 0s. + + ``k.fptrunc(ty)`` + ~~~~~~~~~~~~~~~~~ + + Truncate floating point constant ``k`` to floating point type ``ty`` of + lower size than k's. + + ``k.fpext(ty)`` + ~~~~~~~~~~~~~~~ + + Extend floating point constant ``k`` to floating point type ``ty`` of + higher size than k's. + + ``k.uitofp(ty)`` + ~~~~~~~~~~~~~~~~ + + Convert an unsigned integer constant ``k`` to floating point constant of + type ``ty``. + + ``k.sitofp(ty)`` + ~~~~~~~~~~~~~~~~ + + Convert a signed integer constant ``k`` to floating point constant of + type ``ty``. + + ``k.fptoui(ty)`` + ~~~~~~~~~~~~~~~~ + + Convert a floating point constant ``k`` to an unsigned integer constant + of type ``ty``. + + ``k.fptosi(ty)`` + ~~~~~~~~~~~~~~~~ + + Convert a floating point constant ``k`` to a signed integer constant of + type ``ty``. + + ``k.ptrtoint(ty)`` + ~~~~~~~~~~~~~~~~~~ + + Convert a pointer constant ``k`` to an integer constant of type ``ty``. + + ``k.inttoptr(ty)`` + ~~~~~~~~~~~~~~~~~~ + + Convert an integer constant ``k`` to a pointer constant of type ``ty``. + + ``k.bitcast(ty)`` + ~~~~~~~~~~~~~~~~~ + + Convert ``k`` to a (equal-width) constant of type ``ty``. + + ``k.select(cond,k2,k3)`` + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Replace value with ``k2`` if the 1-bit integer constant ``cond`` is 1, + else with ``k3``. + + ``k.extract_element(idx)`` + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Extract value at ``idx`` (integer constant) from a vector constant + ``k``. + + ``k.insert_element(k2,idx)`` + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Insert value ``k2`` (scalar constant) at index ``idx`` (integer + constant) of vector constant ``k``. + + ``k.shuffle_vector(k2,mask)`` + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Shuffle vector constant ``k`` based on vector constants ``k2`` and + ``mask``. + + -------------- + + # Other Constant Classes + The following subclasses of ``Constant`` do not provide additional + methods, **they serve only to provide richer type information.** + + Subclass \| LLVM C++ Class \| Remarks \| + ---------\|----------------\|---------\| ``ConstantExpr`` \| + ``llvmConstantExpr`` \| A constant expression \| + ``ConstantAggregateZero``\ \| ``llvmConstantAggregateZero``\ \| All-zero + constant \| ``ConstantInt``\ \| ``llvmConstantInt``\ \| An integer + constant \| ``ConstantFP``\ \| ``llvmConstantFP``\ \| A floating-point + constant \| ``ConstantArray``\ \| ``llvmConstantArray``\ \| An array + constant \| ``ConstantStruct``\ \| ``llvmConstantStruct``\ \| A + structure constant \| ``ConstantVector``\ \| ``llvmConstantVector``\ \| + A vector constant \| ``ConstantPointerNull``\ \| + ``llvmConstantPointerNull``\ \| All-zero pointer constant \| + ``UndefValue``\ \| ``llvmUndefValue``\ \| corresponds to ``undef`` of + LLVM IR \| + + These types are helpful in ``isinstance`` checks, like so: + + {% highlight python %} ti = Type.int(32) k1 = Constant.int(ti, 42) # + int32_t k1 = 42; k2 = Constant.array(ti, [k1, k1]) # int32_t k2[] = { + k1, k1 }; + + assert isinstance(k1, ConstantInt) assert isinstance(k2, ConstantArray) + diff --git a/docs/source/doc/llvm.core.FunctionType.rst b/docs/source/doc/llvm.core.FunctionType.rst index 6704ebe..8d40009 100644 --- a/docs/source/doc/llvm.core.FunctionType.rst +++ b/docs/source/doc/llvm.core.FunctionType.rst @@ -39,14 +39,10 @@ Returns an iterable object that yields `Type `_ objects that represent, in order, the types of the arguments accepted by the function. Used like this: -{% highlight python %} func\_type = Type.function( Type.int(), [ -Type.int(), Type.int() ] ) for arg in func\_type.args: assert arg.kind -== TYPE\_INTEGER assert arg == Type.int() assert func\_type.arg\_count -== len(func\_type.args) {% endhighlight %} -``arg_count`` -~~~~~~~~~~~~~ +.. code-block:: python -[read-only] - -The number of arguments. Same as ``len(obj.args)``, but faster. + func_type = Type.function( Type.int(), [ + Type.int(), Type.int() ] ) for arg in func_type.args: assert arg.kind + == TYPE_INTEGER assert arg == Type.int() assert func_type.arg_count + == len(func_type.args) diff --git a/docs/source/doc/llvm.core.GlobalVariable.rst b/docs/source/doc/llvm.core.GlobalVariable.rst index bee596d..392a6a0 100644 --- a/docs/source/doc/llvm.core.GlobalVariable.rst +++ b/docs/source/doc/llvm.core.GlobalVariable.rst @@ -11,93 +11,29 @@ marked as constants. Global variables can be created either by using the ``add_global_variable`` method of the `Module `_ class, or by using the static method ``GlobalVariable.new``. -{% highlight python %} # create a global variable using -add\_global\_variable method gv1 = -module\_obj.add\_global\_variable(Type.int(), "gv1") -or equivalently, using a static constructor method -================================================== +.. code-block:: python -gv2 = GlobalVariable.new(module\_obj, Type.int(), "gv2") {% endhighlight -%} - -Existing global variables of a module can be accessed by name using -``module_obj.get_global_variable_named(name)`` or -``GlobalVariable.get``. All existing global variables can be enumerated -via iterating over the property ``module_obj.global_variables``. - -{% highlight python %} # retrieve a reference to the global variable -gv1, # using the get\_global\_variable\_named method gv1 = -module\_obj.get\_global\_variable\_named("gv1") - -or equivalently, using the static ``get`` method: -================================================= - -gv2 = GlobalVariable.get(module\_obj, "gv2") - -list all global variables in a module -===================================== - -for gv in module\_obj.global\_variables: print gv.name, "of type", -gv.type {% endhighlight %} - -The initializer for a global variable can be set by assigning to the -``initializer`` property of the object. The ``is_global_constant`` -property can be used to indicate that the variable is a global constant. - -Global variables can be delete using the ``delete`` method. Do not use -the object after calling ``delete`` on it. - -{% highlight python %} # add an initializer 10 (32-bit integer) -gv.initializer = Constant.int( Type.int(), 10 ) - -delete the global -================= - -gv.delete() # DO NOT dereference \`gv' beyond this point! gv = None {% -endhighlight %} - -llvm.core.GlobalVariable -======================== - -Base Class ----------- - -- `llvm.core.GlobalValue `_ - -Static Constructors -------------------- - -``new(module_obj, ty, name)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Create a global variable named ``name`` of type ``ty`` in the module -``module_obj`` and return a ``GlobalVariable`` object that represents -it. - -``get(module_obj, name)`` -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Return a ``GlobalVariable`` object to represent the global variable -named ``name`` in the module ``module_obj`` or raise ``LLVMException`` -if such a variable does not exist. - -Properties ----------- - -``initializer`` -~~~~~~~~~~~~~~~ - -The intializer of the variable. Set to -`llvm.core.Constant `_ (or derived). Gets the -initializer constant, or ``None`` if none exists. ``global_constant`` -``True`` if the variable is a global constant, ``False`` otherwise. - -Methods -------- - -``delete()`` -~~~~~~~~~~~~ - -Deletes the global variable from it's module. **Do not hold any -references to this object after calling ``delete`` on it.** + # create a global variable using + add_global_variable method gv1 = + module_obj.add_global_variable(Type.int(), "gv1") + + # or equivalently, using a static constructor method + gv2 = GlobalVariable.new(module_obj, Type.int(), "gv2") {% endhighlight + %} + + Existing global variables of a module can be accessed by name using + ``module_obj.get_global_variable_named(name)`` or + ``GlobalVariable.get``. All existing global variables can be enumerated + via iterating over the property ``module_obj.global_variables``. + + {% highlight python %} # retrieve a reference to the global variable + gv1, # using the get_global_variable_named method gv1 = + module_obj.get_global_variable_named("gv1") + + # or equivalently, using the static ``get`` method: + gv2 = GlobalVariable.get(module_obj, "gv2") + + # list all global variables in a module + for gv in module_obj.global_variables: print gv.name, "of type", + gv.type diff --git a/docs/source/doc/llvm.core.Module.rst b/docs/source/doc/llvm.core.Module.rst index 19b810a..3dbd5c8 100644 --- a/docs/source/doc/llvm.core.Module.rst +++ b/docs/source/doc/llvm.core.Module.rst @@ -8,226 +8,12 @@ Modules are top-level container objects. You need to create a module object first, before you can add global variables, aliases or functions. Modules are created using the static method ``Module.new``: -{% highlight python %} #!/usr/bin/env python -from llvm import \* from llvm.core import \* +.. code-block:: python -create a module -=============== - -my\_module = Module.new('my\_module') {% endhighlight %} - -The constructor of the Module class should *not* be used to instantiate -a Module object. This is a common feature for all llvmpy classes. - - **Convention** - - *All* llvmpy objects are instantiated using static methods of - corresponding classes. Constructors *should not* be used. - - The argument ``my_module`` is a module identifier (a plain string). - A module can also be constructed via deserialization from a bit code - file, using the static method ``from_bitcode``. This method takes a - file-like object as argument, i.e., it should have a ``read()`` - method that returns the entire data in a single call, as is the case - with the builtin file object. Here is an example: - -{% highlight python %} # create a module from a bit code file bcfile = -file("test.bc") my\_module = Module.from\_bitcode(bcfile) {% -endhighlight %} - -There is corresponding serialization method also, called ``to_bitcode``: - -{% highlight python %} # write out a bit code file from the module -bcfile = file("test.bc", "w") my\_module.to\_bitcode(bcfile) {% -endhighlight %} - -Modules can also be constructed from LLVM assembly files (``.ll`` -files). The static method ``from_assembly`` can be used for this. -Similar to the ``from_bitcode`` method, this one also takes a file-like -object as argument: - -{% highlight python %} # create a module from an assembly file llfile = -file("test.ll") my\_module = Module.from\_assembly(llfile) {% -endhighlight %} - -Modules can be converted into their assembly representation by -stringifying them (see below). - --------------- - -llvm.core.Module -================ - -- This will become a table of contents (this text will be scraped). - {:toc} - -Static Constructors -------------------- - -``new(module_id)`` -~~~~~~~~~~~~~~~~~~ - -Create a new ``Module`` instance with given ``module_id``. The -``module_id`` should be a string. - -``from_bitcode(fileobj)`` -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Create a new ``Module`` instance by deserializing the bitcode file -represented by the file-like object ``fileobj``. - -``from_assembly(fileobj)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Create a new ``Module`` instance by parsing the LLVM assembly file -represented by the file-like object ``fileobj``. - -Properties ----------- - -``data_layout`` -~~~~~~~~~~~~~~~ - -A string representing the ABI of the platform. - -``target`` -~~~~~~~~~~ - -A string like ``i386-pc-linux-gnu`` or ``i386-pc-solaris2.8``. - -``pointer_size`` -~~~~~~~~~~~~~~~~ - -[read-only] - -The size in bits of pointers, of the target platform. A value of zero -represents ``llvm::Module::AnyPointerSize``. - -``global_variables`` -~~~~~~~~~~~~~~~~~~~~ - -[read-only] - -An iterable that yields -`GlobalVariable `_ objects, that -represent the global variables of the module. - -``functions`` -~~~~~~~~~~~~~ - -[read-only] - -An iterable that yields `Function `_ objects, -that represent functions in the module. - -``id`` -~~~~~~ - -A string that represents the module identifier (name). - -Methods -------- - -``get_type_named(name)`` -~~~~~~~~~~~~~~~~~~~~~~~~ - -Return a `StructType `_ object for the given -name. - -The definition of this method was changed to work with LLVM 3.0+, in -which the type system was rewritten. See `LLVM -Blog `_. - -{% comment %} ++++++++REMOVED+++++++++++ ### ``add_type_name(name, ty)`` - -Add an alias (typedef) for the type ``ty`` with the name ``name``. - -``delete_type_name(name)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Delete an alias with the name ``name``. ++++++++END-REMOVED+++++++++++ -{% endcomment %} - -``add_global_variable(ty, name)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Add a global variable of the type ``ty`` with the name ``name``. Returns -a `GlobalVariable `_ object. - -``get_global_variable_named(name)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Get a `GlobalVariable `_ object -corresponding to the global variable with the name ``name``. Raises -``LLVMException`` if such a variable does not exist. - -``add_library(name)`` -~~~~~~~~~~~~~~~~~~~~~ - -Add a dependent library to the Module. This only adds a name to a list -of dependent library. **No linking is performed**. - -``add_function(ty, name)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Add a function named ``name`` with the function type ``ty``. ``ty`` must -of an object of type `FunctionType `_. - -``get_function_named(name)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Get a `Function `_ object corresponding to the -function with the name ``name``. Raises ``LLVMException`` if such a -function does not exist. - -``get_or_insert_function(ty, name)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Like ``get_function_named``, but adds the function first, if not present -(like ``add_function``). - -``verify()`` -~~~~~~~~~~~~ - -Verify the correctness of the module. Raises ``LLVMException`` on -errors. - -``to_bitcode(fileobj)`` -~~~~~~~~~~~~~~~~~~~~~~~ - -Write the bitcode representation of the module to the file-like object -``fileobj``. - -``link_in(other)`` -~~~~~~~~~~~~~~~~~~ - -Link in another module ``other`` into this module. Global variables, -functions etc. are matched and resolved. The ``other`` module is no -longer valid and should not be used after this operation. This API might -be replaced with a full-fledged Linker class in the future. - -Special Methods ---------------- - -``__str__`` -~~~~~~~~~~~ - -``Module`` objects can be stringified into it's LLVM assembly language -representation. - -``__eq__`` -~~~~~~~~~~ - -``Module`` objects can be compared for equality. Internally, this -converts both arguments into their LLVM assembly representations and -compares the resultant strings. - - **Convention** - - *All* llvmpy objects (where it makes sense), when stringified, - return the LLVM assembly representation. ``print module_obj`` for - example, prints the LLVM assembly form of the entire module. - - Such objects, when compared for equality, internally compare these - string representations. + #!/usr/bin/env python + + from llvm import \* from llvm.core import \* + + # create a module + my_module = Module.new('my_module') diff --git a/docs/source/doc/llvm.core.StructType.rst b/docs/source/doc/llvm.core.StructType.rst index df0df17..e69de29 100644 --- a/docs/source/doc/llvm.core.StructType.rst +++ b/docs/source/doc/llvm.core.StructType.rst @@ -1,84 +0,0 @@ -+---------------------------------+ -| layout: page | -+---------------------------------+ -| title: StructType (llvm.core) | -+---------------------------------+ - -llvm.core.StructType -==================== - -Base Class ----------- - -- `llvm.core.Type `_ - -Methods -------- - -``set_body(self, elems, packed=False)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Define the body for opaque identified structure. - -``elems`` is an iterable of `llvm.core.Type `_ If -``packed`` is ``True``, creates a packed structure. - -Properties ----------- - -``is_identified`` -~~~~~~~~~~~~~~~~~ - -[read-only] - -``True`` if this is an identified structure. - -``is_literal`` -~~~~~~~~~~~~~~ - -[read-only] - -``True`` if this is a literal structure. - -``is_opaque`` -~~~~~~~~~~~~~ - -[read-only] - -``True`` if this is an opaque structure. Only identified structure can -be opaque. - -``packed`` -~~~~~~~~~~ - -[read-only] - -``True`` if the structure is packed (no padding between elements). - -``name`` -~~~~~~~~ - -Use in identified structure. If set to empty, the identified structure -is removed from the global context. - -``elements`` -~~~~~~~~~~~~ - -[read-only] - -Returns an iterable object that yields `Type `_ -objects that represent, in order, the types of the elements of the -structure. Used like this: - -{% highlight python %} struct\_type = Type.struct( [ Type.int(), -Type.int() ] ) for elem in struct\_type.elements: assert elem.kind == -TYPE\_INTEGER assert elem == Type.int() assert -struct\_type.element\_count == len(struct\_type.elements) {% -endhighlight %} - -``element_count`` -~~~~~~~~~~~~~~~~~ - -[read-only] - -The number of elements. Same as ``len(obj.elements)``, but faster. diff --git a/docs/source/doc/llvm.core.Type.rst b/docs/source/doc/llvm.core.Type.rst index 7cd43ae..58883d7 100644 --- a/docs/source/doc/llvm.core.Type.rst +++ b/docs/source/doc/llvm.core.Type.rst @@ -106,40 +106,23 @@ Properties A value (enum) representing the "type" of the object. It will be one of the following constants defined in ``llvm.core``: -{% highlight python %} # Warning: do not rely on actual numerical -values! TYPE\_VOID = 0 TYPE\_FLOAT = 1 TYPE\_DOUBLE = 2 TYPE\_X86\_FP80 -= 3 TYPE\_FP128 = 4 TYPE\_PPC\_FP128 = 5 TYPE\_LABEL = 6 TYPE\_INTEGER = -7 TYPE\_FUNCTION = 8 TYPE\_STRUCT = 9 TYPE\_ARRAY = 10 TYPE\_POINTER = -11 TYPE\_OPAQUE = 12 TYPE\_VECTOR = 13 TYPE\_METADATA = 14 TYPE\_UNION = -15 {% endhighlight %} + +.. code-block:: python + + # Warning: do not rely on actual numerical + values! TYPE_VOID = 0 TYPE_FLOAT = 1 TYPE_DOUBLE = 2 TYPE_X86_FP80 + = 3 TYPE_FP128 = 4 TYPE_PPC_FP128 = 5 TYPE_LABEL = 6 TYPE_INTEGER = + 7 TYPE_FUNCTION = 8 TYPE_STRUCT = 9 TYPE_ARRAY = 10 TYPE_POINTER = + 11 TYPE_OPAQUE = 12 TYPE_VECTOR = 13 TYPE_METADATA = 14 TYPE_UNION = + 15 + + Example: ^^^^^^^^ -{% highlight python %} assert Type.int().kind == TYPE\_INTEGER assert -Type.void().kind == TYPE\_VOID {% endhighlight %} -Methods -------- +.. code-block:: python -``refine`` -~~~~~~~~~~ - -Used for constructing self-referencing types. See the documentation of -`TypeHandle `_ objects. - -Special Methods ---------------- - -``__str__`` -~~~~~~~~~~~ - -``Type`` objects can be stringified into it's LLVM assembly language -representation. - -``__eq__`` -~~~~~~~~~~ - -``Type`` objects can be compared for equality. Internally, this converts -both arguments into their LLVM assembly representations and compares the -resultant strings. + assert Type.int().kind == TYPE_INTEGER assert + Type.void().kind == TYPE_VOID diff --git a/docs/source/doc/llvmpy_package.rst b/docs/source/doc/llvmpy_package.rst index fc690fc..0ac603a 100644 --- a/docs/source/doc/llvmpy_package.rst +++ b/docs/source/doc/llvmpy_package.rst @@ -80,15 +80,8 @@ Pythonically, modules are imported with the statement ``import llvm.core``. However, you might find it more convenient to import llvmpy modules thus: -{% highlight python %} from llvm import \* from llvm.core import \* from -llvm.ee import \* from llvm.passes import \* {% endhighlight %} -This avoids quite some typing. Both conventions work, however. +.. code-block:: python - **Tip** - - Python-style documentation strings (``__doc__``) are present in - llvmpy. You can use the ``help()`` of the interactive Python - interpreter or the ``object?`` of - `IPython `_ to get online help. - (Note: not complete yet!) + from llvm import \* from llvm.core import \* from + llvm.ee import \* from llvm.passes import \* diff --git a/docs/source/doc/types.rst b/docs/source/doc/types.rst index 2ca095c..f3b9da2 100644 --- a/docs/source/doc/types.rst +++ b/docs/source/doc/types.rst @@ -60,50 +60,43 @@ An Example Here is an example that demonstrates the creation of types: -{% highlight python %} #!/usr/bin/env python -integers -======== +.. code-block:: python -int\_ty = Type.int() bool\_ty = Type.int(1) int\_64bit = Type.int(64) + #!/usr/bin/env python + + # integers + int_ty = Type.int() bool_ty = Type.int(1) int_64bit = Type.int(64) + + # floats + sprec_real = Type.float() dprec_real = Type.double() + + # arrays and vectors + intar_ty = Type.array( int_ty, 10 ) # "typedef int intar_ty[10];" + twodim = Type.array( intar_ty , 10 ) # "typedef int twodim[10][10];" + vec = Type.array( int_ty, 10 ) + + # structures + s1_ty = Type.struct( [ int_ty, sprec_real ] ) # "struct s1_ty { int + v1; float v2; };" + + # pointers + intptr_ty = Type.pointer(int_ty) # "typedef int \*intptr_ty;" + + # functions + f1 = Type.function( int_ty, [ int_ty ] ) # functions that take 1 + int_ty and return 1 int_ty + + f2 = Type.function( Type.void(), [ int_ty, int_ty ] ) # functions that + take 2 int_tys and return nothing + + f3 = Type.function( Type.void(), ( int_ty, int_ty ) ) # same as f2; + any iterable can be used + + fnargs = [ Type.pointer( Type.int(8) ) ] printf = Type.function( + Type.int(), fnargs, True ) # variadic function -floats -====== -sprec\_real = Type.float() dprec\_real = Type.double() - -arrays and vectors -================== - -intar\_ty = Type.array( int\_ty, 10 ) # "typedef int intar\_ty[10];" -twodim = Type.array( intar\_ty , 10 ) # "typedef int twodim[10][10];" -vec = Type.array( int\_ty, 10 ) - -structures -========== - -s1\_ty = Type.struct( [ int\_ty, sprec\_real ] ) # "struct s1\_ty { int -v1; float v2; };" - -pointers -======== - -intptr\_ty = Type.pointer(int\_ty) # "typedef int \*intptr\_ty;" - -functions -========= - -f1 = Type.function( int\_ty, [ int\_ty ] ) # functions that take 1 -int\_ty and return 1 int\_ty - -f2 = Type.function( Type.void(), [ int\_ty, int\_ty ] ) # functions that -take 2 int\_tys and return nothing - -f3 = Type.function( Type.void(), ( int\_ty, int\_ty ) ) # same as f2; -any iterable can be used - -fnargs = [ Type.pointer( Type.int(8) ) ] printf = Type.function( -Type.int(), fnargs, True ) # variadic function {% endhighlight %} -------------- @@ -123,16 +116,8 @@ The following code defines a opaque structure, named "mystruct". The body is defined after the construction using ``StructType.set_body``. The second subtype is a pointer to a "mystruct" type. -{% highlight python %} ts = Type.opaque('mystruct') -ts.set\_body([Type.int(), Type.pointer(ts)]) {% endhighlight %} --------------- +.. code-block:: python -**Related Links** `llvm.core.Type `_, -`llvm.core.IntegerType `_, -`llvm.core.FunctionType `_, -`llvm.core.StructType `_, -`llvm.core.ArrayType `_, -`llvm.core.PointerType `_, -`llvm.core.VectorType `_, -`llvm.core.TypeHandle `_ + ts = Type.opaque('mystruct') + ts.set_body([Type.int(), Type.pointer(ts)])