diff --git a/docs/_build/doctrees/doc/comparision.doctree b/docs/_build/doctrees/doc/comparision.doctree new file mode 100644 index 0000000..9c7f121 Binary files /dev/null and b/docs/_build/doctrees/doc/comparision.doctree differ diff --git a/docs/_build/doctrees/doc/examples.doctree b/docs/_build/doctrees/doc/examples.doctree new file mode 100644 index 0000000..bde8a5b Binary files /dev/null and b/docs/_build/doctrees/doc/examples.doctree differ diff --git a/docs/_build/doctrees/doc/examples/JITTutorial1.doctree b/docs/_build/doctrees/doc/examples/JITTutorial1.doctree new file mode 100644 index 0000000..2820583 Binary files /dev/null and b/docs/_build/doctrees/doc/examples/JITTutorial1.doctree differ diff --git a/docs/_build/doctrees/doc/examples/JITTutorial2.doctree b/docs/_build/doctrees/doc/examples/JITTutorial2.doctree new file mode 100644 index 0000000..ad688aa Binary files /dev/null and b/docs/_build/doctrees/doc/examples/JITTutorial2.doctree differ diff --git a/docs/_build/doctrees/doc/examples/index.doctree b/docs/_build/doctrees/doc/examples/index.doctree new file mode 100644 index 0000000..aaeddba Binary files /dev/null and b/docs/_build/doctrees/doc/examples/index.doctree differ diff --git a/docs/_build/doctrees/doc/firstexample.doctree b/docs/_build/doctrees/doc/firstexample.doctree new file mode 100644 index 0000000..b6154f2 Binary files /dev/null and b/docs/_build/doctrees/doc/firstexample.doctree differ diff --git a/docs/_build/doctrees/doc/functions.doctree b/docs/_build/doctrees/doc/functions.doctree new file mode 100644 index 0000000..5739787 Binary files /dev/null and b/docs/_build/doctrees/doc/functions.doctree differ diff --git a/docs/_build/doctrees/doc/getting_started.doctree b/docs/_build/doctrees/doc/getting_started.doctree new file mode 100644 index 0000000..e5c6510 Binary files /dev/null and b/docs/_build/doctrees/doc/getting_started.doctree differ diff --git a/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl1.doctree b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl1.doctree new file mode 100644 index 0000000..1912cd7 Binary files /dev/null and b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl1.doctree differ diff --git a/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl2.doctree b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl2.doctree new file mode 100644 index 0000000..11a5b39 Binary files /dev/null and b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl2.doctree differ diff --git a/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl3.doctree b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl3.doctree new file mode 100644 index 0000000..a768db3 Binary files /dev/null and b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl3.doctree differ diff --git a/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl4.doctree b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl4.doctree new file mode 100644 index 0000000..c48a099 Binary files /dev/null and b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl4.doctree differ diff --git a/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl5.doctree b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl5.doctree new file mode 100644 index 0000000..7429f26 Binary files /dev/null and b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl5.doctree differ diff --git a/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl6.doctree b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl6.doctree new file mode 100644 index 0000000..2f66cdb Binary files /dev/null and b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl6.doctree differ diff --git a/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl7.doctree b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl7.doctree new file mode 100644 index 0000000..9a7253d Binary files /dev/null and b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl7.doctree differ diff --git a/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl8.doctree b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl8.doctree new file mode 100644 index 0000000..3ff1a6a Binary files /dev/null and b/docs/_build/doctrees/doc/kaleidoscope/PythonLangImpl8.doctree differ diff --git a/docs/_build/doctrees/doc/kaleidoscope/index.doctree b/docs/_build/doctrees/doc/kaleidoscope/index.doctree new file mode 100644 index 0000000..eba7b1d Binary files /dev/null and b/docs/_build/doctrees/doc/kaleidoscope/index.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.Argument.doctree b/docs/_build/doctrees/doc/llvm.core.Argument.doctree new file mode 100644 index 0000000..59e3c57 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.Argument.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.ArrayType.doctree b/docs/_build/doctrees/doc/llvm.core.ArrayType.doctree new file mode 100644 index 0000000..d639324 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.ArrayType.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.BasicBlock.doctree b/docs/_build/doctrees/doc/llvm.core.BasicBlock.doctree new file mode 100644 index 0000000..80ff468 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.BasicBlock.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.Builder.doctree b/docs/_build/doctrees/doc/llvm.core.Builder.doctree new file mode 100644 index 0000000..0fe5aba Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.Builder.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.Constant.doctree b/docs/_build/doctrees/doc/llvm.core.Constant.doctree new file mode 100644 index 0000000..29dccee Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.Constant.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.Function.doctree b/docs/_build/doctrees/doc/llvm.core.Function.doctree new file mode 100644 index 0000000..ca3420a Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.Function.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.FunctionType.doctree b/docs/_build/doctrees/doc/llvm.core.FunctionType.doctree new file mode 100644 index 0000000..a9bba68 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.FunctionType.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.GlobalValue.doctree b/docs/_build/doctrees/doc/llvm.core.GlobalValue.doctree new file mode 100644 index 0000000..78175b7 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.GlobalValue.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.GlobalVariable.doctree b/docs/_build/doctrees/doc/llvm.core.GlobalVariable.doctree new file mode 100644 index 0000000..4633258 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.GlobalVariable.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.Instruction.doctree b/docs/_build/doctrees/doc/llvm.core.Instruction.doctree new file mode 100644 index 0000000..e2925bb Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.Instruction.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.IntegerType.doctree b/docs/_build/doctrees/doc/llvm.core.IntegerType.doctree new file mode 100644 index 0000000..268fb10 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.IntegerType.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.Module.doctree b/docs/_build/doctrees/doc/llvm.core.Module.doctree new file mode 100644 index 0000000..0e4100b Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.Module.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.PointerType.doctree b/docs/_build/doctrees/doc/llvm.core.PointerType.doctree new file mode 100644 index 0000000..4eac041 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.PointerType.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.StructType.doctree b/docs/_build/doctrees/doc/llvm.core.StructType.doctree new file mode 100644 index 0000000..4bdce11 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.StructType.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.Type.doctree b/docs/_build/doctrees/doc/llvm.core.Type.doctree new file mode 100644 index 0000000..a45218c Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.Type.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.User.doctree b/docs/_build/doctrees/doc/llvm.core.User.doctree new file mode 100644 index 0000000..39fa2f2 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.User.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.Value.doctree b/docs/_build/doctrees/doc/llvm.core.Value.doctree new file mode 100644 index 0000000..140fd8f Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.Value.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.core.VectorType.doctree b/docs/_build/doctrees/doc/llvm.core.VectorType.doctree new file mode 100644 index 0000000..243cc07 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.core.VectorType.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.ee.EngineBuilder.doctree b/docs/_build/doctrees/doc/llvm.ee.EngineBuilder.doctree new file mode 100644 index 0000000..99140f6 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.ee.EngineBuilder.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.ee.ExecutionEngine.doctree b/docs/_build/doctrees/doc/llvm.ee.ExecutionEngine.doctree new file mode 100644 index 0000000..945c178 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.ee.ExecutionEngine.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.ee.GenericValue.doctree b/docs/_build/doctrees/doc/llvm.ee.GenericValue.doctree new file mode 100644 index 0000000..05da898 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.ee.GenericValue.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.ee.TargetData.doctree b/docs/_build/doctrees/doc/llvm.ee.TargetData.doctree new file mode 100644 index 0000000..34860a0 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.ee.TargetData.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.passes.FunctionPassManager.doctree b/docs/_build/doctrees/doc/llvm.passes.FunctionPassManager.doctree new file mode 100644 index 0000000..b5a2935 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.passes.FunctionPassManager.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.passes.PassManager.doctree b/docs/_build/doctrees/doc/llvm.passes.PassManager.doctree new file mode 100644 index 0000000..8d5dcc5 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.passes.PassManager.doctree differ diff --git a/docs/_build/doctrees/doc/llvm.passes.PassManagerBuilder.doctree b/docs/_build/doctrees/doc/llvm.passes.PassManagerBuilder.doctree new file mode 100644 index 0000000..465bd17 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm.passes.PassManagerBuilder.doctree differ diff --git a/docs/_build/doctrees/doc/llvm_concepts.doctree b/docs/_build/doctrees/doc/llvm_concepts.doctree new file mode 100644 index 0000000..8cc93e0 Binary files /dev/null and b/docs/_build/doctrees/doc/llvm_concepts.doctree differ diff --git a/docs/_build/doctrees/doc/llvmpy_package.doctree b/docs/_build/doctrees/doc/llvmpy_package.doctree new file mode 100644 index 0000000..d195832 Binary files /dev/null and b/docs/_build/doctrees/doc/llvmpy_package.doctree differ diff --git a/docs/_build/doctrees/doc/types.doctree b/docs/_build/doctrees/doc/types.doctree new file mode 100644 index 0000000..b73c7d2 Binary files /dev/null and b/docs/_build/doctrees/doc/types.doctree differ diff --git a/docs/_build/doctrees/doc/userguide.doctree b/docs/_build/doctrees/doc/userguide.doctree new file mode 100644 index 0000000..9772bda Binary files /dev/null and b/docs/_build/doctrees/doc/userguide.doctree differ diff --git a/docs/_build/doctrees/doc/values.doctree b/docs/_build/doctrees/doc/values.doctree new file mode 100644 index 0000000..b3f1491 Binary files /dev/null and b/docs/_build/doctrees/doc/values.doctree differ diff --git a/docs/_build/doctrees/environment.pickle b/docs/_build/doctrees/environment.pickle new file mode 100644 index 0000000..bfba40d Binary files /dev/null and b/docs/_build/doctrees/environment.pickle differ diff --git a/docs/_build/doctrees/index.doctree b/docs/_build/doctrees/index.doctree new file mode 100644 index 0000000..e216ed0 Binary files /dev/null and b/docs/_build/doctrees/index.doctree differ diff --git a/docs/_build/html/.buildinfo b/docs/_build/html/.buildinfo new file mode 100644 index 0000000..f0c2afc --- /dev/null +++ b/docs/_build/html/.buildinfo @@ -0,0 +1,4 @@ +# Sphinx build info version 1 +# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. +config: 37862f52ab1a5aee0638e82db6615f29 +tags: fbb0d17656682115ca4d033fb2f83ba1 diff --git a/docs/_build/html/_sources/doc/comparision.txt b/docs/_build/html/_sources/doc/comparision.txt new file mode 100644 index 0000000..2f1b87b --- /dev/null +++ b/docs/_build/html/_sources/doc/comparision.txt @@ -0,0 +1,147 @@ ++--------------------------------+ +| layout: page | ++--------------------------------+ +| title: Comparison Operations | ++--------------------------------+ + +Integer Comparision # {#icmp} +============================= + +Predicates for use with ``icmp`` instruction are listed below. All of +these are integer constants defined in the ``llvm.core`` module. + +``ICMP_EQ`` +----------- + +Equality + +``ICMP_NE`` +----------- + +Inequality + +``ICMP_UGT`` +------------ + +Unsigned greater than + +``ICMP_UGE`` +------------ + +Unsigned greater than or equal + +``ICMP_ULT`` +------------ + +Unsigned less than + +``ICMP_ULE`` +------------ + +Unsigned less than or equal + +``ICMP_SGT`` +------------ + +Signed greater than + +``ICMP_SGE`` +------------ + +Signed greater than or equal + +``ICMP_SLT`` +------------ + +Signed less than + +``ICMP_SLE`` +------------ + +Signed less than or equal + +Float Comparision # {#fcmp} +=========================== + +Predicates for use with ``fcmp`` instruction are listed below. All of +these are integer constants defined in the ``llvm.core`` module. + +``FCMP_FALSE`` +-------------- + +Always false + +``FCMP_OEQ`` +------------ + +True if ordered and equal + +``FCMP_OGT`` +------------ + +True if ordered and greater than + +``FCMP_OGE`` +------------ + +True if ordered and greater than or equal + +``FCMP_OLT`` +------------ + +True if ordered and less than + +``FCMP_OLE`` +------------ + +True if ordered and less than or equal + +``FCMP_ONE`` +------------ + +True if ordered and operands are unequal + +``FCMP_ORD`` +------------ + +True if ordered (no NaNs) + +``FCMP_UNO`` +------------ + +True if unordered: ``isnan(X) | isnan(Y)`` + +``FCMP_UEQ`` +------------ + +True if unordered or equal + +``FCMP_UGT`` +------------ + +True if unordered or greater than + +``FCMP_UGE`` +------------ + +True if unordered, greater than or equal + +``FCMP_ULT`` +------------ + +True if unordered, or less than + +``FCMP_ULE`` +------------ + +True if unordered, less than or equal + +``FCMP_UNE`` +------------ + +True if unordered or not equal + +``FCMP_TRUE`` +------------- + +Always true diff --git a/docs/_build/html/_sources/doc/examples.txt b/docs/_build/html/_sources/doc/examples.txt new file mode 100644 index 0000000..9e01700 --- /dev/null +++ b/docs/_build/html/_sources/doc/examples.txt @@ -0,0 +1,12 @@ +******************************** +Examples and LLVM Tutorials +******************************** + +.. toctree:: + :maxdepth: 1 + + firstexample.rst + examples/index.rst + kaleidoscope/index.rst + + diff --git a/docs/_build/html/_sources/doc/examples/JITTutorial1.txt b/docs/_build/html/_sources/doc/examples/JITTutorial1.txt new file mode 100644 index 0000000..09f3c33 --- /dev/null +++ b/docs/_build/html/_sources/doc/examples/JITTutorial1.txt @@ -0,0 +1,36 @@ +A First Function +================== + +.. code-block:: python + + #!/usr/bin/env python + + from llvm.core import * + + #create a module + module = Module.new("tut1") + + #create a function type taking 3 32-bit integers, return a 32-bit integer + ty_int = Type.int(32) + func_type = Type.function(ty_int, (ty_int,)*3) + + #create a function of that type + mul_add = Function.new (module, func_type, "mul_add") + mul_add.calling_convention = CC_C + x = mul_add.args[0]; x.name = "x" + y = mul_add.args[1]; y.name = "y" + z = mul_add.args[2]; z.name = "z" + + #implement the function + + #new block + blk = mul_add.append_basic_block("entry") + + #IR builder + bldr = Builder.new(blk) + tmp_1 = bldr.mul(x, y, "tmp_1") + tmp_2 = bldr.add(tmp_1, z, "tmp_2") + + bldr.ret(tmp_2) + + print module diff --git a/docs/_build/html/_sources/doc/examples/JITTutorial2.txt b/docs/_build/html/_sources/doc/examples/JITTutorial2.txt new file mode 100644 index 0000000..a9a9979 --- /dev/null +++ b/docs/_build/html/_sources/doc/examples/JITTutorial2.txt @@ -0,0 +1,55 @@ +A More Complicated Function +==================== + +.. code-block:: python + + #!/usr/bin/env python + + from llvm.core import * + + #create a module + module = Module.new("tut2") + + #create a function type taking 2 integers, return a 32-bit integer + ty_int = Type.int(32) + func_type = Type.function(ty_int, (ty_int, ty_int)) + + #create a function of that type + gcd = Function.new(module, func_type, "gcd") + + #name function args + x = gcd.args[0]; x.name = "x" + y = gcd.args[1]; y.name = "y" + + #implement the function + + #blocks... + entry = gcd.append_basic_block("entry") + ret = gcd.append_basic_block("return") + cond_false = gcd.append_basic_block("cond_false") + cond_true = gcd.append_basic_block("cond_true") + cond_false_2 = gcd.append_basic_block("cond_false_2") + + #create a llvm::IRBuilder + bldr = Builder.new(entry) + x_eq_y = bldr.icmp(IPRED_EQ, x, y, "tmp") + bldr.cbranch(x_eq_y, ret, cond_false) + + bldr.position_at_end (ret) + bldr.ret(x) + + bldr.position_at_end(cond_false) + x_lt_y = bldr.icmp(IPRED_ULT, x, y, "tmp") + bldr.cbranch(x_lt_y, cond_true, cond_false_2) + + bldr.position_at_end(cond_true) + y_sub_x = bldr.sub(y, x, "tmp") + recur_1 = bldr.call(gcd, (x, y_sub_x,), "tmp") + bldr.ret(recur_1) + + bldr.position_at_end(cond_false_2) + x_sub_y = bldr.sub(x, y, "x_sub_y") + recur_2 = bldr.call(gcd, (x_sub_y, y,), "tmp") + bldr.ret(recur_2) + + print module diff --git a/docs/_build/html/_sources/doc/examples/index.txt b/docs/_build/html/_sources/doc/examples/index.txt new file mode 100644 index 0000000..0cd3532 --- /dev/null +++ b/docs/_build/html/_sources/doc/examples/index.txt @@ -0,0 +1,10 @@ +LLVM Tutorials +============== + +The following JIT tutorials were contributed by Sebastien Binet. + +.. toctree:: + :titlesonly: + + JITTutorial1.rst + JITTutorial2.rst diff --git a/docs/_build/html/_sources/doc/firstexample.txt b/docs/_build/html/_sources/doc/firstexample.txt new file mode 100644 index 0000000..0f6a4d0 --- /dev/null +++ b/docs/_build/html/_sources/doc/firstexample.txt @@ -0,0 +1,125 @@ +Examples +======== + +A Simple Function +----------------- + +Let's create a (LLVM) module containing a single function, corresponding +to the ``C`` function: + +.. code-block:: c + + int sum(int a, int b) + { + return a + b; + } + +Here's how it looks in llvmpy: + +.. code-block:: python + + #!/usr/bin/env python + + # Import the llvmpy modules. + from llvm import * + from llvm.core import * + + # Create an (empty) module. + my_module = Module.new('my_module') + + # All the types involved here are "int"s. This type is represented + # by an object of the llvm.core.Type class: + ty_int = Type.int() # by default 32 bits + + # We need to represent the class of functions that accept two integers + # and return an integer. This is represented by an object of the + # function type (llvm.core.FunctionType): + ty_func = Type.function(ty_int, [ty_int, ty_int]) + + # Now we need a function named 'sum' of this type. Functions are not + # free-standing (in llvmpy); it needs to be contained in a module. + + f_sum = my_module.add_function(ty_func, "sum") + + # Let's name the function arguments as 'a' and 'b'. + f_sum.args[0].name = "a" + f_sum.args[1].name = "b" + + # Our function needs a "basic block" -- a set of instructions that + # end with a terminator (like return, branch etc.). By convention + # the first block is called "entry". + bb = f_sum.append_basic_block("entry") + + # Let's add instructions into the block. For this, we need an + # instruction builder: + builder = Builder.new(bb) + + # OK, now for the instructions themselves. We'll create an add + # instruction that returns the sum as a value, which we'll use + # a ret instruction to return. + tmp = builder.add(f_sum.args[0], f_sum.args[1], "tmp") + builder.ret(tmp) + + # We've completed the definition now! Let's see the LLVM assembly + # language representation of what we've created: + + print my_module + +Here is the output: + +.. code-block:: llvm + + ; ModuleID = 'my_module' + + define i32 @sum(i32 %a, i32 %b) { + entry: + %tmp = add i32 %a, %b ; [#uses=1] + ret i32 %tmp + } + +Adding JIT Compilation +---------------------- + +Let's compile this function in-memory and run it. + +.. code-block:: python + + #!/usr/bin/env python + + # Import the llvmpy modules. + + from llvm import * + from llvm.core import * + from llvm.ee import * # new import: ee = Execution Engine + + #Create a module, as in the previous example. + my_module = Module.new('my_module') + ty_int = Type.int() # by default 32 bits + ty_func = Type.function(ty_int, [ty_int, ty_int]) + f_sum = my_module.add_function(ty_func, "sum") + f_sum.args[0].name = "a" + f_sum.args[1].name = "b" + bb = f_sum.append_basic_block("entry") + builder = Builder.new(bb) + tmp = builder.add(f_sum.args[0], f_sum.args[1], "tmp") + builder.ret(tmp) + + # Create an execution engine object. This will create a JIT compiler + # on platforms that support it, or an interpreter otherwise. + ee = ExecutionEngine.new(my_module) + + # The arguments needs to be passed as "GenericValue" objects. + arg1 = GenericValue.int(ty_int, 100) + arg2 = GenericValue.int(ty_int, 42) + + # Now let's compile and run! + retval = ee.run_function(f_sum, [arg1, arg2]) + + # The return value is also GenericValue. Let's print it. + print "returned", retval.as_int() + +And here's the output: + +:: + + returned 142 diff --git a/docs/_build/html/_sources/doc/functions.txt b/docs/_build/html/_sources/doc/functions.txt new file mode 100644 index 0000000..8922dd0 --- /dev/null +++ b/docs/_build/html/_sources/doc/functions.txt @@ -0,0 +1,149 @@ ++--------------------+ +| layout: page | ++--------------------+ +| title: Functions | ++--------------------+ + +Functions are represented by +`llvm.core.Function `_ objects. They are +contained within modules, and can be created either with the method +``module_obj.add_function`` or the static constructor ``Function.new``. +References to functions already present in a module can be retrieved via +``module.get_function_named`` or by the static constructor method +``Function.get``. All functions in a module can be enumerated by +iterating over ``module_obj.functions``. + + +.. code-block:: python + + # create a type, representing functions that take + an integer and return # a floating point value. ft = Type.function( + Type.float(), [ Type.int() ] ) + + # create a function of this type + f1 = module_obj.add_function(ft, "func1") + + # or equivalently, like this: + f2 = Function.new(module_obj, ft, "func2") + + # get a reference to an existing function + f3 = module_obj.get_function_named("func3") + + # or like this: + f4 = Function.get(module_obj, "func4") + + # list all function names in a module + for f in module_obj.functions: print f.name + + + +Intrinsic +========= + +References to intrinsic functions can be got via the static constructor +``intrinsic``. This returns a ``Function`` object, calling which is +equivalent to invoking the intrinsic. The ``intrinsic`` method has to be +called with a module object, an intrinsic ID (which is a numeric +constant) and a list of the types of arguments (which LLVM uses to +resolve overloaded intrinsic functions). + + +.. code-block:: python + + # get a reference to the llvm.bswap intrinsic + bswap = Function.intrinsic(mod, INTR_BSWAP, [Type.int()]) + + # call it + builder.call(bswap, [value]) + + + +Here, the constant ``INTR_BSWAP``, available from ``llvm.core``, +represents the LLVM intrinsic +`llvm.bswap `_. The +``[Type.int()]`` selects the version of ``llvm.bswap`` that has a single +32-bit integer argument. The list of intrinsic IDs defined as integer +constants in ``llvm.core``. These are: + +{% include intrinsics.csv %} + +There are also target-specific intrinsics (which correspond to that +target's CPU instructions) available, but are omitted here for brevity. +Full list can be seen from +[*intrinsic\_ids.py](https://github.com/numba/llvmpy/blob/master/llvm/*\ intrinsic\_ids.py). +See the `LLVM Language +Reference `_ for more information +on the intrinsics, and the +`test `_ +directory in the source distribution for more examples. The intrinsic ID +can be retrieved from a function object with the read-only property +``intrinsic_id``. + + **Auto-generation of Intrinsic IDs** + + A script (tool/intrgen.py in source tree) generates the intrinsic + IDs automatically. This is necessary when compiling llvmpy with a + different version of LLVM. + +Calling Convention # {#callconv} +================================ + +The function's calling convention can be set using the +``calling_convention`` property. The following (integer) constants +defined in ``llvm.core`` can be used as values: + +Value \| Equivalent LLVM Assembly Keyword \| +------\|----------------------------------\| ``CC_C`` \| ``ccc`` \| +``CC_FASTCALL`` \| ``fastcc`` \| ``CC_COLDCALL`` \| ``coldcc`` \| +``CC_X86_STDCALL`` \| ``x86_stdcallcc`` \| ``CC_X86_FASTCALL`` \| +``x86_fastcallcc`` \| + +See the `LLVM docs `_ +for more information on each. Backend-specific numbered conventions can +be directly passed as integers. + +An arbitrary string identifying which garbage collector to use can be +set or got with the property ``collector``. + +The value objects corresponding to the arguments of a function can be +got using the read-only property ``args``. These can be iterated over, +and also be indexed via integers. An example: + + +.. code-block:: python + + # list all argument names and types for arg in + fn.args: print arg.name, "of type", arg.type + + # change the name of the first argument + fn.args[0].name = "objptr" + + + +Basic blocks (see later) are contained within functions. When newly +created, a function has no basic blocks. They have to be added +explicitly, using the ``append_basic_block`` method, which adds a new, +empty basic block as the last one in the function. The first basic block +of the function can be retrieved using the ``get_entry_basic_block`` +method. The existing basic blocks can be enumerated by iterating over +using the read-only property ``basic_blocks``. The number of basic +blocks can be got via ``basic_block_count`` method. Note that +``get_entry_basic_block`` is slightly faster than ``basic_blocks[0]`` +and so is ``basic_block_count``, over ``len(f.basic_blocks)``. + + +.. code-block:: python + + # add a basic block b1 = + fn.append_basic_block("entry") + + # get the first one + b2 = fn.get_entry_basic_block() b2 = fn.basic_mdblocks[0] # slower + than previous method + + # print names of all basic blocks + for b in fn.basic_blocks: print b.name + + # get number of basic blocks + n = fn.basic_block_count n = len(fn.basic_blocks) # slower than + previous method diff --git a/docs/_build/html/_sources/doc/getting_started.txt b/docs/_build/html/_sources/doc/getting_started.txt new file mode 100644 index 0000000..d3a470b --- /dev/null +++ b/docs/_build/html/_sources/doc/getting_started.txt @@ -0,0 +1,112 @@ + +Introduction +============ + +`LLVM `_ (Low-Level Virtual Machine) provides +enough infrastructure to use it as the backend for your compiled, or +JIT-compiled language. It provides extensive optimization support, and +static and dynamic (JIT) backends for many platforms. See the website at +http://www.llvm.org/ to discover more. + +Python bindings for LLVM provides a gentler learning curve for working +with the LLVM APIs. It should also be easier to create working +prototypes and experimental languages using this medium. + +Together with `clang `_ or +`llvm-gcc `_ it also a provides a +means to quickly instrument C and C++ sources. For e.g., llvm-gcc can be +used to generate the LLVM assembly for a given C source file, which can +then be loaded and manipulated (adding profiling code to every function, +say) using a llvmpy based Python script. + +License +------- + +Both LLVM and llvmpy are distributed under (different) permissive open +source licenses. llvmpy uses the `new BSD +license `_. More +information is available +`here `_. + +Platforms +--------- + +llvmpy has been built/tested/reported to work on various GNU/Linux +flavours, BSD, Mac OS X; on i386 and amd64 architectures. Windows is not +supported, for a variety of reasons. + +Versions +-------- + +llvmpy 0.8.2 requires version 3.1 of LLVM. It may not work with +previous versions. + +llvmpy has been built and tested with Python 2.7. It should work with +earlier versions. It has not been tried with Python 3.x (patches +welcome). + + +Installation +============ + +The Git repo of llvmpy is at https://github.com/numba/llvmpy.git. +You'll need to build and install it before it can be used. At least the +following will be required for this: + +- C and C++ compilers (gcc/g++) +- Python itself +- Python development files (headers and libraries) +- LLVM, either installed or built + +On debian-based systems, the first three can be installed with the +command ``sudo apt-get install gcc g++ python python-dev``. Ensure that +your distro's repository has the appropriate version of LLVM! + +It does not matter which compiler LLVM itself was built with (``g++``, +``llvm-g++`` or any other); llvmpy can be built with any compiler. It +has been tried only with gcc/g++ though. + + +LLVM and ``--enable-pic`` +------------------------- + +The result of an LLVM build is a set of static libraries and object +files. The llvmpy contains an extension package that is built into a +shared object (\_core.so) which links to these static libraries and +object files. It is therefore required that the LLVM libraries and +object files be built with the ``-fPIC`` option (generate position +independent code). Be sure to use the ``--enable-pic`` option while +configuring LLVM (default is no PIC), like this: + +.. code-block:: bash + + $ ~/llvm ./configure --enable-pic --enable-optimized + +llvm-config +----------- + +In order to build llvmpy, it's build script needs to know from where it +can invoke the llvm helper program, ``llvm-config``. If you've installed +LLVM, then this will be available in your ``PATH``, and nothing further +needs to be done. If you've built LLVM yourself, or for any reason +``llvm-config`` is not in your ``PATH``, you'll need to pass the full +path of ``llvm-config`` to the build script. + +You'll need to be 'root' to install llvmpy. Remember that your ``PATH`` +is different from that of 'root', so even if ``llvm-config`` is in your +``PATH``, it may not be available when you do ``sudo``. + +Steps +----- + +Get 3.1 version of LLVM, build it. Make sure '--enable-pic' is passed to +LLVM's 'configure'. + +Get llvmpy and install it: + + +.. code-block:: bash + + $ git clone git@github.com:numba/llvmpy.git + $ cd llvmpy + $ python setup.py install diff --git a/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl1.txt b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl1.txt new file mode 100644 index 0000000..9259d42 --- /dev/null +++ b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl1.txt @@ -0,0 +1,323 @@ +************************************************* +Chapter 1: Tutorial Introduction and the Lexer +************************************************* + +Written by `Chris Lattner `_ and `Max +Shawabkeh `_ + +Introduction +========= + +Welcome to the "Implementing a language with LLVM" tutorial. This +tutorial runs through the implementation of a simple language, showing +how fun and easy it can be. This tutorial will get you up and started as +well as help to build a framework you can extend to other languages. The +code in this tutorial can also be used as a playground to hack on other +LLVM specific things. + +It is useful to point out ahead of time that this tutorial is really +about teaching compiler techniques and LLVM specifically, *not* about +teaching modern and sane software engineering principles. In practice, +this means that we'll take a number of shortcuts to simplify the +exposition. If you dig in and use the code as a basis for future +projects, fixing its deficiencies shouldn't be hard. + +We've tried to put this tutorial together in a way that makes chapters +easy to skip over if you are already familiar with or are uninterested +in the various pieces. The structure of the tutorial is: + +- **`Chapter 1 <#language>`_: Introduction to the Kaleidoscope + language, and the definition of its Lexer** -- This shows where we + are going and the basic functionality that we want it to do. In order + to make this tutorial maximally understandable and hackable, we + choose to implement everything in Python instead of using lexer and + parser generators. LLVM obviously works just fine with such tools, + feel free to use one if you prefer. + +- **`Chapter 2 `_: Implementing a Parser and + AST** -- With the lexer in place, we can talk about parsing + techniques and basic AST construction. This tutorial describes + recursive descent parsing and operator precedence parsing. Nothing in + Chapters 1 or 2 is LLVM-specific, the code doesn't even import the + LLVM modules at this point. :) + +- **`Chapter 3 `_: Code generation to LLVM IR** + -- With the AST ready, we can show off how easy generation of LLVM IR + really is. + +- **`Chapter 4 `_: Adding JIT and Optimizer + support** -- Because a lot of people are interested in using LLVM as + a JIT, we'll dive right into it and show you the 3 lines it takes to + add JIT support. LLVM is also useful in many other ways, but this is + one simple and "sexy" way to shows off its power. :) + +- **`Chapter 5 `_: Extending the Language: + Control Flow** -- With the language up and running, we show how to + extend it with control flow operations (if/then/else and a 'for' + loop). This gives us a chance to talk about simple SSA construction + and control flow. + +- **`Chapter 6 `_: Extending the Language: + User-defined Operators** -- This is a silly but fun chapter that + talks about extending the language to let the user program define + their own arbitrary unary and binary operators (with assignable + precedence!). This lets us build a significant piece of the + "language" as library routines. + +- **`Chapter 7 `_: Extending the Language: + Mutable Variables** -- This chapter talks about adding user-defined + local variables along with an assignment operator. The interesting + part about this is how easy and trivial it is to construct SSA form + in LLVM: no, LLVM does *not* require your front-end to construct SSA + form! + +- **`Chapter 8 `_: Conclusion and other useful + LLVM tidbits** -- This chapter wraps up the series by talking about + potential ways to extend the language, but also includes a bunch of + pointers to info about "special topics" like adding garbage + collection support, exceptions, debugging, support for "spaghetti + stacks", and a bunch of other tips and tricks. + +By the end of the tutorial, we'll have written a bit less than 540 lines +of non-comment, non-blank, lines of code. With this small amount of +code, we'll have built up a very reasonable compiler for a non-trivial +language including a hand-written lexer, parser, AST, as well as code +generation support with a JIT compiler. While other systems may have +interesting "hello world" tutorials, I think the breadth of this +tutorial is a great testament to the strengths of LLVM and why you +should consider it if you're interested in language or compiler design. + +A note about this tutorial: we expect you to extend the language and +play with it on your own. Take the code and go crazy hacking away at it, +compilers don't need to be scary creatures - it can be a lot of fun to +play with languages! + +-------------- + +The Basic Language # {#language} +================================ + +This tutorial will be illustrated with a toy language that we'll call +"`Kaleidoscope `_\ " (derived +from "meaning beautiful, form, and view"). Kaleidoscope is a procedural +language that allows you to define functions, use conditionals, math, +etc. Over the course of the tutorial, we'll extend Kaleidoscope to +support the if/then/else construct, a for loop, user defined operators, +JIT compilation with a simple command line interface, etc. + +Because we want to keep things simple, the only datatype in Kaleidoscope +is a 64-bit floating point type. As such, all values are implicitly +double precision and the language doesn't require type declarations. +This gives the language a very nice and simple syntax. For example, the +following simple example computes `Fibonacci +numbers `_: + + +.. code-block:: + + # Compute the x'th fibonacci number. + def fib(x): + if x < 3: + return 1 + else: + return fib(x-1)+fib(x-2) + + # This expression will compute the 40th number. + fib(40) + + + +We also allow Kaleidoscope to call into standard library functions (the +LLVM JIT makes this completely trivial). This means that you can use the +'extern' keyword to define a function before you use it (this is also +useful for mutually recursive functions). For example: + + +.. code-block:: + + extern sin(arg); + extern cos(arg); + extern atan2(arg1 arg2); + + atan2(sin(0.4), cos(42)) + + + +A more interesting example is included in Chapter 6 where we write a +little Kaleidoscope application that +`displays `_ a Mandelbrot Set at various +levels of magnification. + +Lets dive into the implementation of this language! + +-------------- + +The Lexer # {#lexer} +==================== + +When it comes to implementing a language, the first thing needed is the +ability to process a text file and recognize what it says. The +traditional way to do this is to use a +`lexer `_" (aka +'scanner') to break the input up into "tokens". Each token returned by +the lexer includes a token type and potentially some metadata (e.g. the +numeric value of a number). First, we define the possibilities: + + +.. code-block:: python + + # The lexer yields one of these types for each token. + class EOFToken(object): pass + + class DefToken(object): pass + + class ExternToken(object): pass + + class IdentifierToken(object): + def __init__(self, name): + self.name = name + + class NumberToken(object): + def __init__(self, value): + self.value = value + + class CharacterToken(object): + def __init__(self, char): + self.char = char + def __eq__(self, other): + return isinstance(other, CharacterToken) and self.char == other.char + def __ne__(self, other): + return not self == other + + +Each token yielded by our lexer will be of one of the above types. For +simple tokens that are always the same, like the "def" keyword, the +lexer will yield ``DefToken()``>. Identifiers, numbers and characters, +on the other hand, have extra data, so when the lexer encounteres the +number 123.45, it will emit it as ``NumberToken(123.45)``. An identifier +``foo`` will be emitted as ``IdentifierToken('foo')``. And finally, an +unknown character like '+' will be returned as ``CharacterToken('+')``. +You may notice that we overload the equality and inequality operators +for the characters; this will later simplify character comparisons in +the parser code. + +The actual implementation of the lexer is a single function called +``Tokenize``, which takes a string and +`yields `_ +tokens. For simplicity, we will use `regular +expressions `_ to parse out the +tokens. This is terribly inefficient, but perfectly sufficient for our +needs. + +First, we define the regular expressions for our tokens. Numbers and +strings of digits, optionally followed by a period and another string of +digits. Identifiers (and keywords) are alphanumeric string starting with +a letter and comments are anything between a hash (``#``) and the end of +the line. + + +.. code-block:: python + + import re + + ... + + # Regular expressions that tokens and comments of our language. + REGEX_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') + REGEX_IDENTIFIER = re.compile('[a-zA-Z][a-zA-Z0-9]\ *') + REGEX_COMMENT = re.compile('#.*') + + +Next, let's start defining the ``Tokenize`` function itself. The first +thing we need to do is set up a loop that scans the string, while +ignoring whitespace between tokens: + + +.. code-block:: python + + def Tokenize(string): + while string: # Skip whitespace. + if string[0].isspace(): + string = string[1:] + continue + + :: + + ... + + + + + +Next we want to find out what the next token is. For this we run the +regexes we defined above on the remainder of the string. To simplify the +rest of the code, we run all three regexes each time. As mentioned +above, inefficiencies are ignored for the purpose of this tutorial: + + +.. code-block:: python + + # Run regexes. + comment_match = REGEX_COMMENT.match(string) + number_match = REGEX_NUMBER.match(string) + identifier_match = REGEX_IDENTIFIER.match(string) + + +Now we check if any of the regexes matched. For comments, we simply +ignore the captured match: + + +.. code-block:: python + + # Check if any of the regexes matched and yield + # the appropriate result. + if comment_match: + comment = comment_match.group(0) + string = string[len(comment):] + +For numbers, we yield the captured match, converted to a float and +tagged with the appropriate token type: + +.. code-block:: python + + elif number_match: + number = number_match.group(0) + yield NumberToken(float(number)) + string = string[len(number):] + +The identifier case is a little more complex. We have to check for +keywords to decide whether we have captured an identifier or a keyword: + +.. code-block:: python + + elif identifier_match: + identifier = identifier_match.group(0) + # Check if we matched a keyword. + if identifier == 'def': + yield DefToken() + elif identifier == 'extern': + yield ExternToken() + else: + yield IdentifierToken(identifier) + string = string[len(identifier):] + + +Finally, if we haven't recognized a comment, a number of an identifier, +we yield the current character as an "unknown character" token. This is +used, for example, for operators like ``+`` or ``*``: + + +.. code-block:: python + + else: # Yield the unknown character. + yield CharacterToken(string[0]) + string = string[1:] + + +Once we're done with the loop, we return a final end-of-file token: + + +.. code-block:: python + + yield EOFToken() + diff --git a/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl2.txt b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl2.txt new file mode 100644 index 0000000..3a05ff8 --- /dev/null +++ b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl2.txt @@ -0,0 +1,963 @@ +*************************************************** +Chapter 2: Implementing a Parser and AST +*************************************************** + +Written by `Chris Lattner `_ and `Max +Shawabkeh `_ + +Introduction # {#intro} +======================= + +Welcome to Chapter 2 of the `Implementing a language with +LLVM `_ tutorial. This +chapter shows you how to use the lexer, built in `Chapter +1 `_, to build a full +`parser `_ for our Kaleidoscope +language. Once we have a parser, we'll define and build an `Abstract +Syntax Tree `_ (AST). + +The parser we will build uses a combination of `Recursive Descent +Parsing `_ and +`Operator-Precedence +Parsing `_ to +parse the Kaleidoscope language (the latter for binary expressions and +the former for everything else). Before we get to parsing though, lets +talk about the output of the parser: the Abstract Syntax Tree. + +-------------- + +The Abstract Syntax Tree (AST) # {#ast} +======================================= + +The AST for a program captures its behavior in such a way that it is +easy for later stages of the compiler (e.g. code generation) to +interpret. We basically want one object for each construct in the +language, and the AST should closely model the language. In +Kaleidoscope, we have expressions, a prototype, and a function object. +We'll start with expressions first: + + +.. code-block:: python + + # Base class for all expression nodes. class + ExpressionNode(object): pass + + # Expression class for numeric literals like "1.0". + class NumberExpressionNode(ExpressionNode): def **init**\ (self, value): + self.value = value + + + + + +The code above shows the definition of the base ExpressionNode class and +one subclass which we use for numeric literals. The important thing to +note about this code is that the NumberExpressionNode class captures the +numeric value of the literal as an instance variable. This allows later +phases of the compiler to know what the stored numeric value is. + +Right now we only create the AST, so there are no useful methods on +them. It would be very easy to add a virtual method to pretty print the +code, for example. Here are the other expression AST node definitions +that we'll use in the basic form of the Kaleidoscope language: + + +.. code-block:: python + + # Expression class for referencing a variable, + like "a". class VariableExpressionNode(ExpressionNode): def + **init**\ (self, name): self.name = name + + # Expression class for a binary operator. + class BinaryOperatorExpressionNode(ExpressionNode): def **init**\ (self, + operator, left, right): self.operator = operator self.left = left + self.right = right + + # Expression class for function calls. + class CallExpressionNode(ExpressionNode): def **init**\ (self, callee, + args): self.callee = callee self.args = args + + + +This is all (intentionally) rather straight-forward: variables capture +the variable name, binary operators capture their opcode (e.g. '+'), and +calls capture a function name as well as a list of any argument +expressions. One thing that is nice about our AST is that it captures +the language features without talking about the syntax of the language. +Note that there is no discussion about precedence of binary operators, +lexical structure, etc. + +For our basic language, these are all of the expression nodes we'll +define. Because it doesn't have conditional control flow, it isn't +Turing-complete; we'll fix that in a later installment. The two things +we need next are a way to talk about the interface to a function, and a +way to talk about functions themselves: + + +.. code-block:: python + + # This class represents the "prototype" for a + function, which captures its name, # and its argument names (thus + implicitly the number of arguments the function # takes). class + PrototypeNode(object): def **init**\ (self, name, args): self.name = + name self.args = args + + # This class represents a function definition itself. + class FunctionNode(object): def **init**\ (self, prototype, body): + self.prototype = prototype self.body = body + + + +In Kaleidoscope, functions are typed with just a count of their +arguments. Since all values are double precision floating point, the +type of each argument doesn't need to be stored anywhere. In a more +aggressive and realistic language, the ``ExpressionNode`` class would +probably have a type field. + +With this scaffolding, we can now talk about parsing expressions and +function bodies in Kaleidoscope. + +-------------- + +Parser Basics # {#parserbasics} +=============================== + +Now that we have an AST to build, we need to define the parser code to +build it. The idea here is that we want to parse something like +``x + y`` (which is returned as three tokens by the lexer) into an AST +that could be generated with calls like this: + + +.. code-block:: python + + x = VariableExpressionNode('x') y = + VariableExpressionNode('y') result = BinaryOperatorExpressionNode('+', + x, y) + + + +In order to do this, we'll start by defining a lightweight ``Parser`` +class with some basic helper routines: + + +.. code-block:: python + + class Parser(object): + + def **init**\ (self, tokens, binop_precedence): self.tokens = tokens + self.binop_precedence = binop_precedence self.Next() + + # Provide a simple token buffer. Parser.current is the current token the + # parser is looking at. Parser.Next() reads another token from the lexer + and # updates Parser.current with its results. def Next(self): + self.current = self.tokens.next() + + + +This implements a simple token buffer around the lexer. This allows us +to look one token ahead at what the lexer is returning. Every function +in our parser will assume that ``self.current`` is the current token +that needs to be parsed. Note that the first token is read as soon as +the parser is instantiated. Let us ignore the ``binop_precedence`` +parameter for now. It will be explained when we start `parsing binary +operators <#parserbinops>`_. + +With these basic helper functions, we can implement the first piece of +our grammar: numeric literals. + +-------------- + +Basic Expression Parsing # {#parserprimexprs} +============================================= + +We start with numeric literals, because they are the simplest to +process. For each production in our grammar, we'll define a function +which parses that production. For numeric literals, we have: + + +.. code-block:: python + + # numberexpr ::= number def + ParseNumberExpr(self): result = NumberExpressionNode(self.current.value) + self.Next() # consume the number. return result + + + +This method is very simple: it expects to be called when the current +token is a ``NumberToken``. It takes the current number value, creates a +``NumberExpressionNode``, advances to the next token, and finally +returns. + +There are some interesting aspects to this. The most important one is +that this routine eats all of the tokens that correspond to the +production and returns the lexer buffer with the next token (which is +not part of the grammar production) ready to go. This is a fairly +standard way to go for recursive descent parsers. For a better example, +the parenthesis operator is defined like this: + + +.. code-block:: python + + # parenexpr ::= '(' expression ')' def + ParseParenExpr(self): self.Next() # eat '('. + + :: + + contents = self.ParseExpression() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")".') + self.Next() # eat ')'. + + return contents + + + + + +This function illustrates an interesting aspect of the parser. The +function uses recursion by calling ``ParseExpression`` (we will soon see +that ``ParseExpression`` can call ``ParseParenExpr``). This is powerful +because it allows us to handle recursive grammars, and keeps each +production very simple. Note that parentheses do not cause construction +of AST nodes themselves. While we could do it this way, the most +important role of parentheses are to guide the parser and provide +grouping. Once the parser constructs the AST, parentheses are not +needed. + +The next simple production is for handling variable references and +function calls: + + +.. code-block:: python + + # identifierexpr ::= identifier \| identifier '(' + expression\* ')' def ParseIdentifierExpr(self): identifier_name = + self.current.name self.Next() # eat identifier. + + :: + + if self.current != CharacterToken('('): # Simple variable reference. + return VariableExpressionNode(identifier_name); + + # Call. + self.Next() # eat '('. + args = [] + if self.current != CharacterToken(')'): + while True: + args.append(self.ParseExpression()) + if self.current == CharacterToken(')'): + break + elif self.current != CharacterToken(','): + raise RuntimeError('Expected ")" or "," in argument list.') + self.Next() + + self.Next() # eat ')'. + return CallExpressionNode(identifier_name, args) + + + + + +This routine follows the same style as the other routines. It expects to +be called if the current token is an ``IdentifierToken``. It also has +recursion and error handling. One interesting aspect of this is that it +uses *look-ahead* to determine if the current identifier is a stand +alone variable reference or if it is a function call expression. It +handles this by checking to see if the token after the identifier is a +'(' token, constructing either a ``VariableExpressionNode`` or +``CallExpressionNode`` as appropriate. + +Now that we have all of our simple expression-parsing logic in place, we +can define a helper function to wrap it together into one entry point. +We call this class of expressions "primary" expressions, for reasons +that will become more clear `later in the +tutorial `_. In order to parse an arbitrary +primary expression, we need to determine what sort of expression it is: + + +.. code-block:: python + + # primary ::= identifierexpr \| numberexpr \| + parenexpr def ParsePrimary(self): if isinstance(self.current, + IdentifierToken): return self.ParseIdentifierExpr() elif + isinstance(self.current, NumberToken): return self.ParseNumberExpr(); + elif self.current == CharacterToken('('): return self.ParseParenExpr() + else: raise RuntimeError('Unknown token when expecting an expression.') + + + + +Now that you see the definition of this function, it is more obvious why +we can assume the state of ``Parser.current`` in the various functions. +This uses look-ahead to determine which sort of expression is being +inspected, and then parses it with a function call. + +Now that basic expressions are handled, we need to handle binary +expressions. They are a bit more complex. + +-------------- + +Binary Expression Parsing # {#parserbinops} +=========================================== + +Binary expressions are significantly harder to parse because they are +often ambiguous. For example, when given the string ``x+y*z``, the +parser can choose to parse it as either ``(x+y)*z`` or ``x+(y*z)``. With +common definitions from mathematics, we expect the later parse, because +``*`` (multiplication) has higher *precedence* than ``+`` (addition). + +There are many ways to handle this, but an elegant and efficient way is +to use `Operator-Precedence +Parsing `_. +This parsing technique uses the precedence of binary operators to guide +recursion. To start with, we need a table of precedences. Remember the +``binop_precedence`` parameter we passed to the ``Parser`` constructor? +Now is the time to use it: + + +.. code-block:: python + + def main(): # Install standard binary operators. + # 1 is lowest possible precedence. 40 is the highest. + operator_precedence = { '<': 10, '+': 20, '-': 20, '\*': 40 } + + # Run the main ``interpreter loop``. while True: + + :: + + ... + + parser = Parser(Tokenize(raw), operator_precedence) + + + + + +For the basic form of Kaleidoscope, we will only support 4 binary +operators (this can obviously be extended by you, our brave and intrepid +reader). Having a dictionary makes it easy to add new operators and +makes it clear that the algorithm doesn't depend on the specific +operators involved, but it would be easy enough to eliminate the map and +hardcode the comparisons. + +We also define a helper function to get the precedence of the current +token, or -1 if the token is not a binary operator: + + +.. code-block:: python + + # Gets the precedence of the current token, or -1 + if the token is not a binary # operator. def + GetCurrentTokenPrecedence(self): if isinstance(self.current, + CharacterToken): return self.binop_precedence.get(self.current.char, + -1) else: return -1 + + + +With the helper above defined, we can now start parsing binary +expressions. The basic idea of operator precedence parsing is to break +down an expression with potentially ambiguous binary operators into +pieces. Consider, for example, the expression ``a+b+(c+d)*e*f+g``. +Operator precedence parsing considers this as a stream of primary +expressions separated by binary operators. As such, it will first parse +the leading primary expression ``a``, then it will see the pairs +``[+, b] [+, (c+d)] [*, e] [*, f] and [+, g]``. Note that because +parentheses are primary expressions, the binary expression parser +doesn't need to worry about nested subexpressions like (c+d) at all. + +To start, an expression is a primary expression potentially followed by +a sequence of ``[binop,primaryexpr]`` pairs: + + +.. code-block:: python + + # expression ::= primary binoprhs def + ParseExpression(self): left = self.ParsePrimary() return + self.ParseBinOpRHS(left, 0) + + + +``ParseBinOpRHS`` is the function that parses the sequence of pairs for +us. It takes a precedence and a pointer to an expression for the part +that has been parsed so far. Note that ``x`` is a perfectly valid +expression: As such, ``binoprhs`` is allowed to be empty, in which case +it returns the expression that is passed into it. In our example above, +the code passes the expression for ``a`` into ``ParseBinOpRHS`` and the +current token is ``+``. + +The precedence value passed into ``ParseBinOpRHS`` indicates the \* +minimal operator precedence\* that the function is allowed to eat. For +example, if the current pair stream is ``[+, x]`` and ``ParseBinOpRHS`` +is passed in a precedence of 40, it will not consume any tokens (because +the precedence of '+' is only 20). With this in mind, ``ParseBinOpRHS`` +starts with: + + +.. code-block:: python + + # binoprhs ::= (operator primary)\* def + ParseBinOpRHS(self, left, left_precedence): # If this is a binary + operator, find its precedence. while True: precedence = + self.GetCurrentTokenPrecedence() + + :: + + # If this is a binary operator that binds at least as tightly as the + # current one, consume it; otherwise we are done. + if precedence < left_precedence: + return left + + + + + +This code gets the precedence of the current token and checks to see if +if is too low. Because we defined invalid tokens to have a precedence of +-1, this check implicitly knows that the pair-stream ends when the token +stream runs out of binary operators. If this check succeeds, we know +that the token is a binary operator and that it will be included in this +expression: + + +.. code-block:: python + + binary_operator = self.current.char self.Next() + # eat the operator. + + :: + + # Parse the primary expression after the binary operator. + right = self.ParsePrimary() + + + + + +As such, this code eats (and remembers) the binary operator and then +parses the primary expression that follows. This builds up the whole +pair, the first of which is ``[+, b]`` for the running example. + +Now that we parsed the left-hand side of an expression and one pair of +the RHS sequence, we have to decide which way the expression associates. +In particular, we could have ``(a+b) binop unparsed`` or +``a + (b binop unparsed)``. To determine this, we look ahead at +``binop`` to determine its precedence and compare it to BinOp's +precedence (which is '+' in this case): + + +.. code-block:: python + + # If binary_operator binds less tightly with + right than the operator after # right, let the pending operator take + right as its left. next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: + + + +If the precedence of the binop to the right of ``RHS`` is lower or equal +to the precedence of our current operator, then we know that the +parentheses associate as ``(a+b) binop ...``. In our example, the +current operator is ``+`` and the next operator is ``+``, we know that +they have the same precedence. In this case we'll create the AST node +for ``a+b``, and then continue parsing: + + +.. code-block:: python + + if precedence < next_precedence: ... if body + omitted ... + + :: + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right); + + + + + +In our example above, this will turn ``a+b+`` into ``(a+b)`` and execute +the next iteration of the loop, with ``+`` as the current token. The +code above will eat, remember, and parse ``(c+d)`` as the primary +expression, which makes the current pair equal to ``[+, (c+d)]``. It +will then evaluate the 'if' conditional above with ``*`` as the binop to +the right of the primary. In this case, the precedence of ``*`` is +higher than the precedence of ``+`` so the if condition will be entered. + +The critical question left here is +``how can the if condition parse the right hand side in full``? In +particular, to build the AST correctly for our example, it needs to get +all of ``( c + d ) * e * f`` as the RHS expression variable. The code to +do this is surprisingly simple (code from the above two blocks +duplicated for context): + + +.. code-block:: python + + # If binary_operator binds less tightly with + right than the operator after # right, let the pending operator take + right as its left. next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: right = self.ParseBinOpRHS(right, + precedence + 1) + + :: + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right) + + + + + +At this point, we know that the binary operator to the RHS of our +primary has higher precedence than the binop we are currently parsing. +As such, we know that any sequence of pairs whose operators are all +higher precedence than ``+`` should be parsed together and returned as +``RHS``. To do this, we recursively invoke the ``ParseBinOpRHS`` +function specifying ``precedence + 1`` as the minimum precedence +required for it to continue. In our example above, this will cause it to +return the AST node for ``(c+d)*e*f`` as RHS, which is then set as the +RHS of the '+' expression. + +Finally, on the next iteration of the while loop, the ``+g`` piece is +parsed and added to the AST. With this little bit of code (11 +non-trivial lines), we correctly handle fully general binary expression +parsing in a very elegant way. This was a whirlwind tour of this code, +and it is somewhat subtle. I recommend running through it with a few +tough examples to see how it works. + +This wraps up handling of expressions. At this point, we can point the +parser at an arbitrary token stream and build an expression from it, +stopping at the first token that is not part of the expression. Next up +we need to handle function definitions, etc. + +-------------- + +Parsing the Rest # {#parsertop} +=============================== + +The next thing missing is handling of function prototypes. In +Kaleidoscope, these are used both for 'extern' function declarations as +well as function body definitions. The code to do this is +straight-forward and not very interesting (once you've survived +expressions): + + +.. code-block:: python + + # prototype ::= id '(' id\* ')' def + ParsePrototype(self): if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected function name in prototype.') + + :: + + function_name = self.current.name + self.Next() # eat function name. + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + return PrototypeNode(function_name, arg_names) + + + + + +Given this, a function definition is very simple, just a prototype plus +an expression to implement the body: + + +.. code-block:: python + + # definition ::= 'def' prototype expression def + ParseDefinition(self): self.Next() # eat def. proto = + self.ParsePrototype() body = self.ParseExpression() return + FunctionNode(proto, body) + + + +In addition, we support 'extern' to declare functions like 'sin' and +'cos' as well as to support forward declaration of user functions. These +'extern's are just prototypes with no body: + + +.. code-block:: python + + # external ::= 'extern' prototype def + ParseExtern(self): self.Next() # eat extern. return + self.ParsePrototype() + + + +Finally, we'll also let the user type in arbitrary top-level expressions +and evaluate them on the fly. We will handle this by defining anonymous +nullary (zero argument) functions for them: + + +.. code-block:: python + + # toplevelexpr ::= expression def + ParseTopLevelExpr(self): proto = PrototypeNode('', []) return + FunctionNode(proto, self.ParseExpression()) + + + +Now that we have all the pieces, let's build a little driver that will +let us actually *execute* this code we've built! + +-------------- + +The Driver # {#driver} +====================== + +The driver for this simply invokes all of the parsing pieces with a +top-level dispatch loop. There isn't much interesting here, so I'll just +include the top-level loop. See `below <#code>`_ for full code. + + +.. code-block:: python + + # Run the main "interpreter loop". while True: + print 'ready>', try: raw = raw_input() except KeyboardInterrupt: return + + :: + + parser = Parser(Tokenize(raw), operator_precedence) + while True: + # top ::= definition | external | expression | EOF + if isinstance(parser.current, EOFToken): + break + if isinstance(parser.current, DefToken): + parser.HandleDefinition() + elif isinstance(parser.current, ExternToken): + parser.HandleExtern() + else: + parser.HandleTopLevelExpression() + + + + + +Here we create a new ``Parser`` for each line read, and try to parse out +all the expressions, declarations and definitions in the line. We also +allow the user to quit using Ctrl+C. + +-------------- + +Conclusions # {#conclusions} +============================ + +With just under 330 lines of commented code (200 lines of non-comment, +non-blank code), we fully defined our minimal language, including a +lexer, parser, and AST builder. With this done, the executable will +validate Kaleidoscope code and tell us if it is grammatically invalid. +For example, here is a sample interaction: + + +.. code-block:: python + + $ python kaleidoscope.py ready> def foo(x y) + x+foo(y, 4.0) Parsed a function definition. ready> def foo(x y) x+y y + Parsed a function definition. Parsed a top-level expression. ready> def + foo(x y) x+y ) Parsed a function definition. Error: Unknown token when + expecting an expression. ready> extern sin(a); Parsed an extern. ready> + ^C $ + + + +There is a lot of room for extension here. You can define new AST nodes, +extend the language in many ways, etc. In the `next +installment `_, we will describe how to generate +LLVM Intermediate Representation (IR) from the AST. + +-------------- + +Full Code Listing # {#code} +=========================== + +Here is the complete code listing for this and the previous chapter. +Note that it is fully self-contained: you don't need LLVM or any +external libraries at all for this. + + +.. code-block:: python + + #!/usr/bin/env python + + import re + + Lexer + ----- + + # The lexer yields one of these types for each token. + class EOFToken(object): pass + + class DefToken(object): pass + + class ExternToken(object): pass + + class IdentifierToken(object): def **init**\ (self, name): self.name = + name + + class NumberToken(object): def **init**\ (self, value): self.value = + value + + class CharacterToken(object): def **init**\ (self, char): self.char = + char def **eq**\ (self, other): return isinstance(other, CharacterToken) + and self.char == other.char def **ne**\ (self, other): return not self + == other + + # Regular expressions that tokens and comments of our language. + REGEX_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX_IDENTIFIER = + re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX_COMMENT = re.compile('#.*') + + def Tokenize(string): while string: # Skip whitespace. if + string[0].isspace(): string = string[1:] continue + + :: + + # Run regexes. + comment_match = REGEX_COMMENT.match(string) + number_match = REGEX_NUMBER.match(string) + identifier_match = REGEX_IDENTIFIER.match(string) + + # Check if any of the regexes matched and yield the appropriate result. + if comment_match: + comment = comment_match.group(0) + string = string[len(comment):] + elif number_match: + number = number_match.group(0) + yield NumberToken(float(number)) + string = string[len(number):] + elif identifier_match: + identifier = identifier_match.group(0) + # Check if we matched a keyword. + if identifier == 'def': + yield DefToken() + elif identifier == 'extern': + yield ExternToken() + else: + yield IdentifierToken(identifier) + string = string[len(identifier):] + else: + # Yield the ASCII value of the unknown character. + yield CharacterToken(string[0]) + string = string[1:] + + yield EOFToken() + + Abstract Syntax Tree (aka Parse Tree) + ------------------------------------- + + # Base class for all expression nodes. + class ExpressionNode(object): pass + + # Expression class for numeric literals like "1.0". + class NumberExpressionNode(ExpressionNode): def **init**\ (self, value): + self.value = value + + # Expression class for referencing a variable, like "a". + class VariableExpressionNode(ExpressionNode): def **init**\ (self, + name): self.name = name + + # Expression class for a binary operator. + class BinaryOperatorExpressionNode(ExpressionNode): def **init**\ (self, + operator, left, right): self.operator = operator self.left = left + self.right = right + + # Expression class for function calls. + class CallExpressionNode(ExpressionNode): def **init**\ (self, callee, + args): self.callee = callee self.args = args + + # This class represents the "prototype" for a function, which captures its name, + # and its argument names (thus implicitly the number of arguments the function + # takes). + class PrototypeNode(object): def **init**\ (self, name, args): self.name + = name self.args = args + + # This class represents a function definition itself. + class FunctionNode(object): def **init**\ (self, prototype, body): + self.prototype = prototype self.body = body + + Parser + ------ + + class Parser(object): + + def **init**\ (self, tokens, binop_precedence): self.tokens = tokens + self.binop_precedence = binop_precedence self.Next() + + # Provide a simple token buffer. Parser.current is the current token the + # parser is looking at. Parser.Next() reads another token from the lexer + and # updates Parser.current with its results. def Next(self): + self.current = self.tokens.next() + + # Gets the precedence of the current token, or -1 if the token is not a + binary # operator. def GetCurrentTokenPrecedence(self): if + isinstance(self.current, CharacterToken): return + self.binop_precedence.get(self.current.char, -1) else: return -1 + + # identifierexpr ::= identifier \| identifier '(' expression\* ')' def + ParseIdentifierExpr(self): identifier_name = self.current.name + self.Next() # eat identifier. + + :: + + if self.current != CharacterToken('('): # Simple variable reference. + return VariableExpressionNode(identifier_name) + + # Call. + self.Next() # eat '('. + args = [] + if self.current != CharacterToken(')'): + while True: + args.append(self.ParseExpression()) + if self.current == CharacterToken(')'): + break + elif self.current != CharacterToken(','): + raise RuntimeError('Expected ")" or "," in argument list.') + self.Next() + + self.Next() # eat ')'. + return CallExpressionNode(identifier_name, args) + + # numberexpr ::= number def ParseNumberExpr(self): result = + NumberExpressionNode(self.current.value) self.Next() # consume the + number. return result + + # parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() + # eat '('. + + :: + + contents = self.ParseExpression() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")".') + self.Next() # eat ')'. + + return contents + + # primary ::= identifierexpr \| numberexpr \| parenexpr def + ParsePrimary(self): if isinstance(self.current, IdentifierToken): return + self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): + return self.ParseNumberExpr() elif self.current == CharacterToken('('): + return self.ParseParenExpr() else: raise RuntimeError('Unknown token + when expecting an expression.') + + # binoprhs ::= (operator primary)\* def ParseBinOpRHS(self, left, + left_precedence): # If this is a binary operator, find its precedence. + while True: precedence = self.GetCurrentTokenPrecedence() + + :: + + # If this is a binary operator that binds at least as tightly as the + # current one, consume it; otherwise we are done. + if precedence < left_precedence: + return left + + binary_operator = self.current.char + self.Next() # eat the operator. + + # Parse the primary expression after the binary operator. + right = self.ParsePrimary() + + # If binary_operator binds less tightly with right than the operator after + # right, let the pending operator take right as its left. + next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: + right = self.ParseBinOpRHS(right, precedence + 1) + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right) + + # expression ::= primary binoprhs def ParseExpression(self): left = + self.ParsePrimary() return self.ParseBinOpRHS(left, 0) + + # prototype ::= id '(' id\* ')' def ParsePrototype(self): if not + isinstance(self.current, IdentifierToken): raise RuntimeError('Expected + function name in prototype.') + + :: + + function_name = self.current.name + self.Next() # eat function name. + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + return PrototypeNode(function_name, arg_names) + + # definition ::= 'def' prototype expression def ParseDefinition(self): + self.Next() # eat def. proto = self.ParsePrototype() body = + self.ParseExpression() return FunctionNode(proto, body) + + # toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = + PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) + + # external ::= 'extern' prototype def ParseExtern(self): self.Next() # + eat extern. return self.ParsePrototype() + + # Top-Level parsing def HandleDefinition(self): + self.Handle(self.ParseDefinition, 'Parsed a function definition.') + + def HandleExtern(self): self.Handle(self.ParseExtern, 'Parsed an + extern.') + + def HandleTopLevelExpression(self): self.Handle(self.ParseTopLevelExpr, + 'Parsed a top-level expression.') + + def Handle(self, function, message): try: function() print message + except Exception, e: print 'Error:', e try: self.Next() # Skip for error + recovery. except: pass + + Main driver code. + ----------------- + + def main(): # Install standard binary operators. # 1 is lowest possible + precedence. 40 is the highest. operator_precedence = { '<': 10, '+': + 20, '-': 20, '\*': 40 } + + # Run the main "interpreter loop". while True: print 'ready>', try: raw + = raw_input() except KeyboardInterrupt: return + + :: + + parser = Parser(Tokenize(raw), operator_precedence) + while True: + # top ::= definition | external | expression | EOF + if isinstance(parser.current, EOFToken): + break + if isinstance(parser.current, DefToken): + parser.HandleDefinition() + elif isinstance(parser.current, ExternToken): + parser.HandleExtern() + else: + parser.HandleTopLevelExpression() + + if **name** == '**main**\ ': main() diff --git a/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl3.txt b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl3.txt new file mode 100644 index 0000000..549cb7b --- /dev/null +++ b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl3.txt @@ -0,0 +1,979 @@ +******************************************* +Chapter 3: Code generation to LLVM IR +******************************************* + +Written by `Chris Lattner `_ and `Max +Shawabkeh `_ + +Introduction # {#intro} +======================= + +Welcome to Chapter 3 of the `Implementing a language with +LLVM `_ tutorial. This +chapter shows you how to transform the `Abstract Syntax +Tree `_, built in Chapter 2, into LLVM IR. This +will teach you a little bit about how LLVM does things, as well as +demonstrate how easy it is to use. It's much more work to build a lexer +and parser than it is to generate LLVM IR code. :) + +**Please note**: the code in this chapter and later requires llvmpy 0.6 +and LLVM 2.7. Earlier versions will most likely not work with it. Also +note that you need to use a version of this tutorial that matches your +llvmpy release: If you are using an official llvmpy release, use the +version of the documentation on the `llvmpy examples +page `_ + +-------------- + +Code Generation Setup # {#basics} +================================= + +In order to generate LLVM IR, we want some simple setup to get started. +First we define code generation methods in each AST node class: + + +.. code-block:: python + + # Expression class for numeric literals like + "1.0". class NumberExpressionNode(ExpressionNode): + + def **init**\ (self, value): self.value = value + + def CodeGen(self): ... + + # Expression class for referencing a variable, like "a". + class VariableExpressionNode(ExpressionNode): + + def **init**\ (self, name): self.name = name + + def CodeGen(self): ... + + ... + + + +The ``CodeGen`` method says to emit IR for that AST node along with all +the things it depends on, and they all return an LLVM Value object. +"Value" is the class used to represent a "`Static Single Assignment +(SSA) `_ +register" or "SSA value" in LLVM. The most distinct aspect of SSA values +is that their value is computed as the related instruction executes, and +it does not get a new value until (and if) the instruction re-executes. +In other words, there is no way to "change" an SSA value. For more +information, please read up on `Static Single +Assignment `_ +- the concepts are really quite natural once you grok them. + +We will also need to define some global variables which we will be used +during code generation: + + +.. code-block:: python + + # The LLVM module, which holds all the IR code. + g_llvm_module = Module.new('my cool jit') + + # The LLVM instruction builder. Created whenever a new function is entered. + g_llvm_builder = None + + # A dictionary that keeps track of which values are defined in the current scope + # and what their LLVM representation is. + g_named_values = {} + + + +``g_llvm_module`` is the LLVM construct that contains all of the +functions and global variables in a chunk of code. In many ways, it is +the top-level structure that the LLVM IR uses to contain code. + +``g_llvm_builder`` is a helper object that makes it easy to generate +LLVM instructions. Instances of the +`llvm.core.Builder `_ class keep track of the +current place to insert instructions and have methods to create new +instructions. Note that we do not initialize this variable; instead, it +will be initialized whenever we start generating code for a function. + +Finally, ``g_named_values`` is a dictionary that keeps track of which +values are defined in the current scope and what their LLVM +representation is. In other words, it is a symbol table for the code. In +this form of Kaleidoscope, the only things that can be referenced are +function parameters. As such, function parameters will be in this map +when generating code for their function body. + +With these basics in place, we can start talking about how to generate +code for each expression. Note that this assumes that ``g_llvm_builder`` +has been set up to generate code *into* something. For now, we'll assume +that this has already been done, and we'll just use it to emit code. + +-------------- + +Expression Code Generation # {#exprs} +===================================== + +Generating LLVM code for expression nodes is very straightforward: less +than 35 lines of commented code for all four of our expression nodes. +First we'll do numeric literals: + + +.. code-block:: python + + def CodeGen(self): return + Constant.real(Type.double(), self.value) + + + +In llvmpy, floating point numeric constants are represented with the +``llvm.core.ConstantFP`` class. To create one, we can use the static +``real()`` method in the ``llvm.core.Constant`` class. This code +basically just creates and returns a ``ConstantFP``. Note that in the +LLVM IR constants are all uniqued together and shared. For this reason, +we create the constant through a factory method instead of instantiating +one directly. + + +.. code-block:: python + + def CodeGen(self): if self.name in + g_named_values: return g_named_values[self.name] else: raise + RuntimeError('Unknown variable name: ' + self.name) + + + +References to variables are also quite simple using LLVM. In the simple +version of Kaleidoscope, we assume that the variable has already been +emitted somewhere and its value is available. In practice, the only +values that can be in the ``g_named_values`` dictionary are function +arguments. This code simply checks to see that the specified name is in +the map (if not, an unknown variable is being referenced) and returns +the value for it. In future chapters, we'll add support for `loop +induction variables `_ in the symbol table, +and for `local variables `_. + + +.. code-block:: python + + def CodeGen(self): left = self.left.CodeGen() + right = self.right.CodeGen() + + :: + + if self.operator == '+': + return g_llvm_builder.fadd(left, right, 'addtmp') + elif self.operator == '-': + return g_llvm_builder.fsub(left, right, 'subtmp') + elif self.operator == '*': + return g_llvm_builder.fmul(left, right, 'multmp') + elif self.operator == '<': + result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') + # Convert bool 0 or 1 to double 0.0 or 1.0. + return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') + else: + raise RuntimeError('Unknown binary operator.') + + + + + +Binary operators start to get more interesting. The basic idea here is +that we recursively emit code for the left-hand side of the expression, +then the right-hand side, then we compute the result of the binary +expression depending on which operator is being used. + +In the example above, the LLVM builder class is starting to show its +value. ``g_llvm_builder`` knows where to insert the newly created +instruction, all you have to do is specify what instruction to create +(e.g. with ``add``), which operands to use (``left`` and ``right`` here) +and optionally provide a name for the generated instruction. + +One nice thing about LLVM is that the name is just a hint. For instance, +if the code above emits multiple "addtmp" variables, LLVM will +automatically provide each one with an increasing, unique numeric +suffix. Local value names for instructions are purely optional, but it +makes it much easier to read the IR dumps. + +`LLVM instructions `_ are +constrained by strict rules: for example, the Left and Right operators +of an `add instruction `_ +must have the same type, and the result type of the add must match the +operand types. Because all values in Kaleidoscope are doubles, this +makes for very simple code for add, sub and mul. + +On the other hand, LLVM specifies that the `fcmp +instruction `_ always +returns an 'i1' value (a one bit integer). The problem with this is that +Kaleidoscope wants the value to be a 0.0 or 1.0 value. In order to get +these semantics, we combine the fcmp instruction with a `uitofp +instruction `_. This +instruction converts its input integer into a floating point value by +treating the input as an unsigned value. In contrast, if we used the +`sitofp instruction `_, +the Kaleidoscope ``<`` operator would return 0.0 and -1.0, depending on +the input value. + + +.. code-block:: python + + def CodeGen(self): # Look up the name in the + global module table. callee = + g_llvm_module.get_function_named(self.callee) + + :: + + # Check for argument mismatch error. + if len(callee.args) != len(self.args): + raise RuntimeError('Incorrect number of arguments passed.') + + arg_values = [i.CodeGen() for i in self.args] + + return g_llvm_builder.call(callee, arg_values, 'calltmp') + + + + + +Code generation for function calls is quite straightforward with LLVM. +The code above initially does a function name lookup in the LLVM +Module's symbol table. Recall that the LLVM Module is the container that +holds all of the functions we are JIT'ing. By giving each function the +same name as what the user specifies, we can use the LLVM symbol table +to resolve function names for us. + +Once we have the function to call, we codegen each argument that is to +be passed in, and create an LLVM `call +instruction `_. Note that +LLVM uses the native C calling conventions by default, allowing these +calls to also call into standard library functions like "sin" and "cos", +with no additional effort. + +This wraps up our handling of the four basic expressions that we have so +far in Kaleidoscope. Feel free to go in and add some more. For example, +by browsing the `LLVM language +reference `_ you'll find several +other interesting instructions that are really easy to plug into our +basic framework. + +-------------- + +Function Code Generation # {#funcs} +=================================== + +Code generation for prototypes and functions must handle a number of +details, which make their code less beautiful than expression code +generation, but allows us to illustrate some important points. First, +let's talk about code generation for prototypes: they are used both for +function bodies and external function declarations. The code starts +with: + + +.. code-block:: python + + def CodeGen(self): # Make the function type, eg. + double(double,double). funct_type = Type.function( Type.double(), + [Type.double()] \* len(self.args), False) + + :: + + function = Function.new(g_llvm_module, funct_type, self.name) + + + + + +The call to ``Type.function`` creates the ``FunctionType`` that should +be used for a given Prototype. Since all function arguments in +Kaleidoscope are of type double, the first line creates a list of "N" +LLVM double types. It then uses the ``Type.function`` method to create a +function type that takes "N" doubles as arguments, returns one double as +a result, and that is not vararg (the False parameter indicates this). +Note that Types in LLVM are uniqued just like Constants are, so you +don't instantiate them directly. + +The final line above actually creates the function that the prototype +will correspond to. This indicates the type and name to use, as well as +which module to insert into. Note that by default, the function will +have `external +linkage <`_, which means +that the function may be defined outside the current module and/or that +it is callable by functions outside the module. The name passed in is +the name the user specified: since ``g_llvm_module`` is specified, this +name is registered in ``g_llvm_module``'s symbol table, which is used by +the function call code above. + + +.. code-block:: python + + # If the name conflicted, there was already + something with the same name. # If it has a body, don't allow + redefinition or reextern. if function.name != self.name: + function.delete() function = + g_llvm_module.get_function_named(self.name) + + + +The Module symbol table works just like the Function symbol table when +it comes to name conflicts: if a new function is created with a name was +previously added to the symbol table, it will get implicitly renamed +when added to the Module. The code above exploits this fact to determine +if there was a previous definition of this function. + +In Kaleidoscope, we choose to allow redefinitions of functions in two +cases: first, we want to allow 'extern'ing a function more than once, as +long as the prototypes for the externs match (since all arguments have +the same type, we just have to check that the number of arguments +match). Second, we want to allow 'extern'ing a function and then +defining a body for it. This is useful when defining mutually recursive +functions. + +In order to implement this, the code above first checks to see if there +is a collision on the name of the function. If so, it deletes the +function we just created (by calling ``delete``) and then calling +``get_function_named`` to get the existing function with the specified +name. + + +.. code-block:: python + + # If the function already has a body, reject + this. if not function.is_declaration: raise RuntimeError('Redefinition + of function.') + + :: + + # If F took a different number of args, reject. + if len(callee.args) != len(self.args): + raise RuntimeError('Redeclaration of a function with different number ' + 'of args.') + + + + + +In order to verify the logic above, we first check to see if the +pre-existing function is a forward declaration. Since we don't allow +anything after a full definition of the function, the code rejects this +case. If the previous reference to a function was an 'extern', we simply +verify that the number of arguments for that definition and this one +match up. If not, we emit an error. + + +.. code-block:: python + + # Set names for all arguments and add them to the + variables symbol table. for arg, arg_name in zip(function.args, + self.args): arg.name = arg_name # Add arguments to variable symbol + table. g_named_values[arg_name] = arg + + :: + + return function + + + + + +The last bit of code for prototypes loops over all of the arguments in +the function, setting the name of the LLVM Argument objects to match, +and registering the arguments in the ``g_named_values`` map for future +use by the ``VariableExpressionNode``. Note that we don't check for +conflicting argument names here (e.g. "extern foo(a b a)"). Doing so +would be very straight-forward with the mechanics we have already used +above. Once this is all set up, it returns the Function object to the +caller. + + +.. code-block:: python + + def CodeGen(self): # Clear scope. + g_named_values.clear() + + :: + + # Create a function object. + function = self.prototype.CodeGen() + + + + + +Code generation for function definitions starts out simply enough: we +just clear out the ``g_named_values`` dictionary to make sure that there +isn't anything in it from the last function we compiled and codegen the +prototype. Code generation of the prototype ensures that there is an +LLVM Function object that is ready to go for us. + + +.. code-block:: python + + # Create a new basic block to start insertion + into. block = function.append_basic_block('entry') global + g_llvm_builder g_llvm_builder = Builder.new(block) {% endhighlight + %} + + Now we get to the point where ``g_llvm_builder`` is set up. The first + line creates a new `basic + block `_ (named "entry"), + which is inserted into the function. The second line declares that the + global ``g_llvm_builder`` object is to be changed. The last line creates + a new builder that is set up to insert new instructions into the basic + block we just created. Basic blocks in LLVM are an important part of + functions that define the `Control Flow + Graph `_. Since we + don't have any control flow, our functions will only contain one block + at this point. We'll fix this in `Chapter 5 `_ :). + + {% highlight python %} # Finish off the function. try: return_value = + self.body.CodeGen() g_llvm_builder.ret(return_value) + + :: + + # Validate the generated code, checking for consistency. + function.verify() + + + + + +Once the insertion point is set up, we call the ``CodeGen`` method for +the root expression of the function. If no error happens, this emits +code to compute the expression into the entry block and returns the +value that was computed. Assuming no error, we then create an LLVM `ret +instruction `_, which +completes the function. Once the function is built, we call ``verify``, +which is provided by LLVM. This function does a variety of consistency +checks on the generated code, to determine if our compiler is doing +everything right. Using this is important: it can catch a lot of bugs. +Once the function is finished and validated, we return it. + + +.. code-block:: python + + except: function.delete() raise + + :: + + return function + + + + + +The only piece left here is handling of the error case. For simplicity, +we handle this by merely deleting the function we produced with the +``delete`` method. This allows the user to redefine a function that they +incorrectly typed in before: if we didn't delete it, it would live in +the symbol table, with a body, preventing future redefinition. + +This code does have a bug, though. Since the ``PrototypeNode::CodeGen`` +can return a previously defined forward declaration, our code can +actually delete a forward declaration. There are a number of ways to fix +this bug; see what you can come up with! Here is a testcase: + + +.. code-block:: python + + extern foo(a b) # ok, defines foo. def foo(a b) c + # error, 'c' is invalid. def bar() foo(1, 2) # error, unknown function + "foo" + + + +-------------- + +Driver Changes and Closing Thoughts # {#driver} +=============================================== + +For now, code generation to LLVM doesn't really get us much, except that +we can look at the pretty IR calls. The sample code inserts calls to +CodeGen into the ``Handle*`` functions, and then dumps out the LLVM IR. +This gives a nice way to look at the LLVM IR for simple functions. For +example: + + +.. code-block:: bash + + ready> 4+5 Read a top-level expression: define + double @0() { entry: ret double 9.000000e+00 } + + + +Note how the parser turns the top-level expression into anonymous +functions for us. This will be handy when we add JIT support in the next +chapter. Also note that the code is very literally transcribed, no +optimizations are being performed except simple constant folding done by +the Builder. We will add optimizations explicitly in the next chapter. + + +.. code-block:: bash + + ready> def foo(a b) a\ *a + 2*\ a\ *b + b*\ b Read + a function definition: define double @foo(double %a, double %b) { entry: + %multmp = fmul double %a, %a ; [#uses=1] %multmp1 = fmul double + 2.000000e+00, %a ; [#uses=1] %multmp2 = fmul double %multmp1, %b ; + [#uses=1] %addtmp = fadd double %multmp, %multmp2 ; [#uses=1] %multmp3 = + fmul double %b, %b ; [#uses=1] %addtmp4 = fadd double %addtmp, %multmp3 + ; [#uses=1] ret double %addtmp4 } + + + +This shows some simple arithmetic. Notice the striking similarity to the +LLVM builder calls that we use to create the instructions. + + +.. code-block:: bash + + ready> def bar(a) foo(a, 4.0) + bar(31337) Read a + function definition: define double @bar(double %a) { entry: %calltmp = + call double @foo(double %a, double 4.000000e+00) ; [#uses=1] %calltmp1 = + call double @bar(double 3.133700e+04) ; [#uses=1] %addtmp = fadd double + %calltmp, %calltmp1 ; [#uses=1] ret double %addtmp } + + + +This shows some function calls. Note that this function will take a long +time to execute if you call it. In the future we'll add conditional +control flow to actually make recursion useful :). + + +.. code-block:: bash + + ready> extern cos(x) Read extern: declare double + @cos(double) + + ready> cos(1.234) Read a top-level expression: define double @1() { + entry: %calltmp = call double @cos(double 1.234000e+00) ; [#uses=1] ret + double %calltmp } + + + +This shows an extern for the libm "cos" function, and a call to it. + + +.. code-block:: bash + + ready> ^C ; ModuleID = 'my cool jit' + + define double @0() { entry: ret double 9.000000e+00 } + + define double @foo(double %a, double %b) { entry: %multmp = fmul double + %a, %a ; [#uses=1] %multmp1 = fmul double 2.000000e+00, %a ; [#uses=1] + %multmp2 = fmul double %multmp1, %b ; [#uses=1] %addtmp = fadd double + %multmp, %multmp2 ; [#uses=1] %multmp3 = fmul double %b, %b ; [#uses=1] + %addtmp4 = fadd double %addtmp, %multmp3 ; [#uses=1] ret double %addtmp4 + } + + define double @bar(double %a) { entry: %calltmp = call double + @foo(double %a, double 4.000000e+00) ; [#uses=1] %calltmp1 = call double + @bar(double 3.133700e+04) ; [#uses=1] %addtmp = fadd double %calltmp, + %calltmp1 ; [#uses=1] ret double %addtmp } + + declare double @cos(double) + + define double @1() { entry: %calltmp = call double @cos(double + 1.234000e+00) ; [#uses=1] ret double %calltmp } + + + +When you quit the current demo, it dumps out the IR for the entire +module generated. Here you can see the big picture with all the +functions referencing each other. + +This wraps up the third chapter of the Kaleidoscope tutorial. Up next, +we'll describe how to `add JIT codegen and optimizer +support `_ to this so we can actually start +running code! + +-------------- + +Full Code Listing # {#code} +=========================== + +Here is the complete code listing for our running example, enhanced with +the LLVM code generator. Because this uses the llvmpy libraries, you +need to `download <../download.html>`_ and +`install <../userguide.html#install>`_ them. + + +.. code-block:: python + + #!/usr/bin/env python + + import re from llvm.core import Module, Constant, Type, Function, + Builder, FCMP_ULT + + Globals + ------- + + # The LLVM module, which holds all the IR code. + g_llvm_module = Module.new('my cool jit') + + # The LLVM instruction builder. Created whenever a new function is entered. + g_llvm_builder = None + + # A dictionary that keeps track of which values are defined in the current scope + # and what their LLVM representation is. + g_named_values = {} + + Lexer + ----- + + # The lexer yields one of these types for each token. + class EOFToken(object): pass + + class DefToken(object): pass + + class ExternToken(object): pass + + class IdentifierToken(object): def **init**\ (self, name): self.name = + name + + class NumberToken(object): def **init**\ (self, value): self.value = + value + + class CharacterToken(object): def **init**\ (self, char): self.char = + char def **eq**\ (self, other): return isinstance(other, CharacterToken) + and self.char == other.char def **ne**\ (self, other): return not self + == other + + # Regular expressions that tokens and comments of our language. + REGEX_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX_IDENTIFIER = + re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX_COMMENT = re.compile('#.*') + + def Tokenize(string): while string: # Skip whitespace. if + string[0].isspace(): string = string[1:] continue + + :: + + # Run regexes. + comment_match = REGEX_COMMENT.match(string) + number_match = REGEX_NUMBER.match(string) + identifier_match = REGEX_IDENTIFIER.match(string) + + # Check if any of the regexes matched and yield the appropriate result. + if comment_match: + comment = comment_match.group(0) + string = string[len(comment):] + elif number_match: + number = number_match.group(0) + yield NumberToken(float(number)) + string = string[len(number):] + elif identifier_match: + identifier = identifier_match.group(0) + # Check if we matched a keyword. + if identifier == 'def': + yield DefToken() + elif identifier == 'extern': + yield ExternToken() + else: + yield IdentifierToken(identifier) + string = string[len(identifier):] + else: + # Yield the ASCII value of the unknown character. + yield CharacterToken(string[0]) + string = string[1:] + + yield EOFToken() + + Abstract Syntax Tree (aka Parse Tree) + ------------------------------------- + + # Base class for all expression nodes. + class ExpressionNode(object): pass + + # Expression class for numeric literals like "1.0". + class NumberExpressionNode(ExpressionNode): + + def **init**\ (self, value): self.value = value + + def CodeGen(self): return Constant.real(Type.double(), self.value) + + # Expression class for referencing a variable, like "a". + class VariableExpressionNode(ExpressionNode): + + def **init**\ (self, name): self.name = name + + def CodeGen(self): if self.name in g_named_values: return + g_named_values[self.name] else: raise RuntimeError('Unknown variable + name: ' + self.name) + + # Expression class for a binary operator. + class BinaryOperatorExpressionNode(ExpressionNode): + + def **init**\ (self, operator, left, right): self.operator = operator + self.left = left self.right = right + + def CodeGen(self): left = self.left.CodeGen() right = + self.right.CodeGen() + + :: + + if self.operator == '+': + return g_llvm_builder.fadd(left, right, 'addtmp') + elif self.operator == '-': + return g_llvm_builder.fsub(left, right, 'subtmp') + elif self.operator == '*': + return g_llvm_builder.fmul(left, right, 'multmp') + elif self.operator == '<': + result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') + # Convert bool 0 or 1 to double 0.0 or 1.0. + return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') + else: + raise RuntimeError('Unknown binary operator.') + + # Expression class for function calls. + class CallExpressionNode(ExpressionNode): + + def **init**\ (self, callee, args): self.callee = callee self.args = + args + + def CodeGen(self): # Look up the name in the global module table. callee + = g_llvm_module.get_function_named(self.callee) + + :: + + # Check for argument mismatch error. + if len(callee.args) != len(self.args): + raise RuntimeError('Incorrect number of arguments passed.') + + arg_values = [i.CodeGen() for i in self.args] + + return g_llvm_builder.call(callee, arg_values, 'calltmp') + + # This class represents the "prototype" for a function, which captures its name, + # and its argument names (thus implicitly the number of arguments the function + # takes). + class PrototypeNode(object): + + def **init**\ (self, name, args): self.name = name self.args = args + + def CodeGen(self): # Make the function type, eg. double(double,double). + funct_type = Type.function( Type.double(), [Type.double()] \* + len(self.args), False) + + :: + + function = Function.new(g_llvm_module, funct_type, self.name) + + # If the name conflicted, there was already something with the same name. + # If it has a body, don't allow redefinition or reextern. + if function.name != self.name: + function.delete() + function = g_llvm_module.get_function_named(self.name) + + # If the function already has a body, reject this. + if not function.is_declaration: + raise RuntimeError('Redefinition of function.') + + # If F took a different number of args, reject. + if len(callee.args) != len(self.args): + raise RuntimeError('Redeclaration of a function with different number ' + 'of args.') + + # Set names for all arguments and add them to the variables symbol table. + for arg, arg_name in zip(function.args, self.args): + arg.name = arg_name + # Add arguments to variable symbol table. + g_named_values[arg_name] = arg + + return function + + # This class represents a function definition itself. + class FunctionNode(object): + + def **init**\ (self, prototype, body): self.prototype = prototype + self.body = body + + def CodeGen(self): # Clear scope. g_named_values.clear() + + :: + + # Create a function object. + function = self.prototype.CodeGen() + + # Create a new basic block to start insertion into. + block = function.append_basic_block('entry') + global g_llvm_builder + g_llvm_builder = Builder.new(block) + + # Finish off the function. + try: + return_value = self.body.CodeGen() + g_llvm_builder.ret(return_value) + + # Validate the generated code, checking for consistency. + function.verify() + except: + function.delete() + raise + + return function + + Parser + ------ + + class Parser(object): + + def **init**\ (self, tokens, binop_precedence): self.tokens = tokens + self.binop_precedence = binop_precedence self.Next() + + # Provide a simple token buffer. Parser.current is the current token the + # parser is looking at. Parser.Next() reads another token from the lexer + and # updates Parser.current with its results. def Next(self): + self.current = self.tokens.next() + + # Gets the precedence of the current token, or -1 if the token is not a + binary # operator. def GetCurrentTokenPrecedence(self): if + isinstance(self.current, CharacterToken): return + self.binop_precedence.get(self.current.char, -1) else: return -1 + + # identifierexpr ::= identifier \| identifier '(' expression\* ')' def + ParseIdentifierExpr(self): identifier_name = self.current.name + self.Next() # eat identifier. + + :: + + if self.current != CharacterToken('('): # Simple variable reference. + return VariableExpressionNode(identifier_name) + + # Call. + self.Next() # eat '('. + args = [] + if self.current != CharacterToken(')'): + while True: + args.append(self.ParseExpression()) + if self.current == CharacterToken(')'): + break + elif self.current != CharacterToken(','): + raise RuntimeError('Expected ")" or "," in argument list.') + self.Next() + + self.Next() # eat ')'. + return CallExpressionNode(identifier_name, args) + + # numberexpr ::= number def ParseNumberExpr(self): result = + NumberExpressionNode(self.current.value) self.Next() # consume the + number. return result + + # parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() + # eat '('. + + :: + + contents = self.ParseExpression() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")".') + self.Next() # eat ')'. + + return contents + + # primary ::= identifierexpr \| numberexpr \| parenexpr def + ParsePrimary(self): if isinstance(self.current, IdentifierToken): return + self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): + return self.ParseNumberExpr() elif self.current == CharacterToken('('): + return self.ParseParenExpr() else: raise RuntimeError('Unknown token + when expecting an expression.') + + # binoprhs ::= (operator primary)\* def ParseBinOpRHS(self, left, + left_precedence): # If this is a binary operator, find its precedence. + while True: precedence = self.GetCurrentTokenPrecedence() + + :: + + # If this is a binary operator that binds at least as tightly as the + # current one, consume it; otherwise we are done. + if precedence < left_precedence: + return left + + binary_operator = self.current.char + self.Next() # eat the operator. + + # Parse the primary expression after the binary operator. + right = self.ParsePrimary() + + # If binary_operator binds less tightly with right than the operator after + # right, let the pending operator take right as its left. + next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: + right = self.ParseBinOpRHS(right, precedence + 1) + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right) + + # expression ::= primary binoprhs def ParseExpression(self): left = + self.ParsePrimary() return self.ParseBinOpRHS(left, 0) + + # prototype ::= id '(' id\* ')' def ParsePrototype(self): if not + isinstance(self.current, IdentifierToken): raise RuntimeError('Expected + function name in prototype.') + + :: + + function_name = self.current.name + self.Next() # eat function name. + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + return PrototypeNode(function_name, arg_names) + + # definition ::= 'def' prototype expression def ParseDefinition(self): + self.Next() # eat def. proto = self.ParsePrototype() body = + self.ParseExpression() return FunctionNode(proto, body) + + # toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = + PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) + + # external ::= 'extern' prototype def ParseExtern(self): self.Next() # + eat extern. return self.ParsePrototype() + + # Top-Level parsing def HandleDefinition(self): + self.Handle(self.ParseDefinition, 'Read a function definition:') + + def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') + + def HandleTopLevelExpression(self): self.Handle(self.ParseTopLevelExpr, + 'Read a top-level expression:') + + def Handle(self, function, message): try: print message, + function().CodeGen() except Exception, e: print 'Error:', e try: + self.Next() # Skip for error recovery. except: pass + + Main driver code. + ----------------- + + def main(): # Install standard binary operators. # 1 is lowest possible + precedence. 40 is the highest. operator_precedence = { '<': 10, '+': + 20, '-': 20, '\*': 40 } + + # Run the main "interpreter loop". while True: print 'ready>', try: raw + = raw_input() except KeyboardInterrupt: break + + :: + + parser = Parser(Tokenize(raw), operator_precedence) + while True: + # top ::= definition | external | expression | EOF + if isinstance(parser.current, EOFToken): + break + if isinstance(parser.current, DefToken): + parser.HandleDefinition() + elif isinstance(parser.current, ExternToken): + parser.HandleExtern() + else: + parser.HandleTopLevelExpression() + + # Print out all of the generated code. print '', g_llvm_module + + if **name** == '**main**\ ': main() diff --git a/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl4.txt b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl4.txt new file mode 100644 index 0000000..da6a5d8 --- /dev/null +++ b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl4.txt @@ -0,0 +1,808 @@ +************************************************* +Chapter 4: Adding JIT and Optimizer Support +************************************************* + +Written by `Chris Lattner `_ and `Max +Shawabkeh `_ + +Introduction # {#intro} +======================= + +Welcome to Chapter 4 of the `Implementing a language with +LLVM `_ tutorial. Chapters +1-3 described the implementation of a simple language and added support +for generating LLVM IR. This chapter describes two new techniques: +adding optimizer support to your language, and adding JIT compiler +support. These additions will demonstrate how to get nice, efficient +code for the Kaleidoscope language. + +-------------- + +Trivial Constant Folding # {#trivialconstfold} +============================================== + +Our demonstration for Chapter 3 is elegant and easy to extend. +Unfortunately, it does not produce wonderful code. The LLVM Builder, +however, does give us obvious optimizations when compiling simple code: + + +.. code-block:: bash + + ready> def test(x) 1+2+x Read function definition: + define double @test(double %x) { entry: %addtmp = fadd double + 3.000000e+00, %x ret double %addtmp } + + + +This code is not a literal transcription of the AST built by parsing the +input. That would be: + + +.. code-block:: bash + + ready> def test(x) 1+2+x Read function definition: + define double @test(double %x) { entry: %addtmp = fadd double + 2.000000e+00, 1.000000e+00 %addtmp1 = fadd double %addtmp, %x ret double + %addtmp1 } + + + +Constant folding, as seen above, in particular, is a very common and +very important optimization: so much so that many language implementors +implement constant folding support in their AST representation. + +With LLVM, you don't need this support in the AST. Since all calls to +build LLVM IR go through the LLVM IR builder, the builder itself checked +to see if there was a constant folding opportunity when you call it. If +so, it just does the constant fold and return the constant instead of +creating an instruction. + +Well, that was easy :). In practice, we recommend always using +``llvm.core.Builder`` when generating code like this. It has no +"syntactic overhead" for its use (you don't have to uglify your compiler +with constant checks everywhere) and it can dramatically reduce the +amount of LLVM IR that is generated in some cases (particular for +languages with a macro preprocessor or that use a lot of constants). + +On the other hand, the ``Builder`` is limited by the fact that it does +all of its analysis inline with the code as it is built. If you take a +slightly more complex example: + + +.. code-block:: bash + + ready> def test(x) (1+2+x)\*(x+(1+2)) Read a + function definition: define double @test(double %x) { entry: %addtmp = + fadd double 3.000000e+00, %x ; [#uses=1] %addtmp1 = fadd double %x, + 3.000000e+00 ; [#uses=1] %multmp = fmul double %addtmp, %addtmp1 ; + [#uses=1] ret double %multmp } + + + +In this case, the LHS and RHS of the multiplication are the same value. +We'd really like to see this generate"``tmp = x+3; result = tmp*tmp;`` +instead of computing ``x+3`` twice. + +Unfortunately, no amount of local analysis will be able to detect and +correct this. This requires two transformations: reassociation of +expressions (to make the add's lexically identical) and Common +Subexpression Elimination (CSE) to delete the redundant add instruction. +Fortunately, LLVM provides a broad range of optimizations that you can +use, in the form of "passes". + +-------------- + +LLVM Optimization Passes # {#optimizerpasses} +============================================= + +LLVM provides many optimization passes, which do many different sorts of +things and have different tradeoffs. Unlike other systems, LLVM doesn't +hold to the mistaken notion that one set of optimizations is right for +all languages and for all situations. LLVM allows a compiler implementor +to make complete decisions about what optimizations to use, in which +order, and in what situation. + +As a concrete example, LLVM supports both "whole module" passes, which +look across as large of body of code as they can (often a whole file, +but if run at link time, this can be a substantial portion of the whole +program). It also supports and includes "per-function" passes which just +operate on a single function at a time, without looking at other +functions. For more information on passes and how they are run, see the +`How to Write a Pass `_ +document and the `List of LLVM +Passes `_. + +For Kaleidoscope, we are currently generating functions on the fly, one +at a time, as the user types them in. We aren't shooting for the +ultimate optimization experience in this setting, but we also want to +catch the easy and quick stuff where possible. As such, we will choose +to run a few per-function optimizations as the user types the function +in. If we wanted to make a "static Kaleidoscope compiler", we would use +exactly the code we have now, except that we would defer running the +optimizer until the entire file has been parsed. + +In order to get per-function optimizations going, we need to set up a +`FunctionPassManager `_ +to hold and organize the LLVM optimizations that we want to run. Once we +have that, we can add a set of optimizations to run. The code looks like +this: + + +.. code-block:: python + + # The function optimization passes manager. + g_llvm_pass_manager = FunctionPassManager.new(g_llvm_module) + + # The LLVM execution engine. + g_llvm_executor = ExecutionEngine.new(g_llvm_module) + + ... + + def main(): # Set up the optimizer pipeline. Start with registering info + about how the # target lays out data structures. + g_llvm_pass_manager.add(g_llvm_executor.target_data) # Do simple + "peephole" optimizations and bit-twiddling optzns. + g_llvm_pass_manager.add(PASS_INSTRUCTION_COMBINING) # Reassociate + expressions. g_llvm_pass_manager.add(PASS_REASSOCIATE) # Eliminate + Common SubExpressions. g_llvm_pass_manager.add(PASS_GVN) # Simplify + the control flow graph (deleting unreachable blocks, etc). + g_llvm_pass_manager.add(PASS_CFG_SIMPLIFICATION) + + g_llvm_pass_manager.initialize() + + + +This code defines a ``FunctionPassManager``, ``g_llvm_pass_manager``. +Once it is set up, we use a series of "add" calls to add a bunch of LLVM +passes. The first pass is basically boilerplate, it adds a pass so that +later optimizations know how the data structures in the program are laid +out. (The "``g_llvm_executor``\ " variable is related to the JIT, which +we will get to in the next section.) In this case, we choose to add 4 +optimization passes. The passes we chose here are a pretty standard set +of "cleanup" optimizations that are useful for a wide variety of code. I +won't delve into what they do but, believe me, they are a good starting +place :). + +Once the pass manager is set up, we need to make use of it. We do this +by running it after our newly created function is constructed (in +``FunctionNode.CodeGen``), but before it is returned to the client: + + +.. code-block:: python + + return_value = self.body.CodeGen() + g_llvm_builder.ret(return_value) + + :: + + # Validate the generated code, checking for consistency. + function.verify() + + # Optimize the function. + g_llvm_pass_manager.run(function) + + + + + +As you can see, this is pretty straightforward. The +``FunctionPassManager`` optimizes and updates the LLVM Function in +place, improving (hopefully) its body. With this in place, we can try +our test above again: + + +.. code-block:: bash + + ready> def test(x) (1+2+x)\*(x+(1+2)) Read a + function definition: define double @test(double %x) { entry: %addtmp = + fadd double %x, 3.000000e+00 ; [#uses=2] %multmp = fmul double %addtmp, + %addtmp ; [#uses=1] ret double %multmp } + + + +As expected, we now get our nicely optimized code, saving a floating +point add instruction from every execution of this function. + +LLVM provides a wide variety of optimizations that can be used in +certain circumstances. Some `documentation about the various +passes `_ is available, but it +isn't very complete. Another good source of ideas can come from looking +at the passes that ``llvm-gcc`` or ``llvm-ld`` run to get started. The +``opt`` tool allows you to experiment with passes from the command line, +so you can see if they do anything. + +Now that we have reasonable code coming out of our front-end, lets talk +about executing it! + +-------------- + +Adding a JIT Compiler # {#jit} +============================== + +Code that is available in LLVM IR can have a wide variety of tools +applied to it. For example, you can run optimizations on it (as we did +above), you can dump it out in textual or binary forms, you can compile +the code to an assembly file (.s) for some target, or you can JIT +compile it. The nice thing about the LLVM IR representation is that it +is the "common currency" between many different parts of the compiler. + +In this section, we'll add JIT compiler support to our interpreter. The +basic idea that we want for Kaleidoscope is to have the user enter +function bodies as they do now, but immediately evaluate the top-level +expressions they type in. For example, if they type in "1 + 2", we +should evaluate and print out 3. If they define a function, they should +be able to call it from the command line. + +In order to do this, we first declare and initialize the JIT. This is +done by adding and initializing a global variable: + + +.. code-block:: python + + # The LLVM execution engine. g_llvm_executor = + ExecutionEngine.new(g_llvm_module) + + + +This creates an abstract "Execution Engine" which can be either a JIT +compiler or the LLVM interpreter. LLVM will automatically pick a JIT +compiler for you if one is available for your platform, otherwise it +will fall back to the interpreter. + +Once the ``ExecutionEngine`` is created, the JIT is ready to be used. We +can use the ``run_function`` method of the execution engine to execute a +compiled function and get its return value. In our case, this means that +we can change the code that parses a top-level expression to look like +this: + + +.. code-block:: python + + def HandleTopLevelExpression(self): try: function + = self.ParseTopLevelExpr().CodeGen() result = + g_llvm_executor.run_function(function, []) print 'Evaluated to:', + result.as_real(Type.double()) except Exception, e: print 'Error:', e + try: self.Next() # Skip for error recovery. except: pass {% endhighlight + %} + + Recall that we compile top-level expressions into a self-contained LLVM + function that takes no arguments and returns the computed double. + + With just these two changes, lets see how Kaleidoscope works now! + + {% highlight python %} ready> 4+5 Read a top level expression: define + double @0() { entry: ret double 9.000000e+00 } + + Evaluated to: 9.0 + + + +Well this looks like it is basically working. The dump of the function +shows the "no argument function that always returns double" that we +synthesize for each top-level expression that is typed in. This +demonstrates very basic functionality, but can we do more? + + +.. code-block:: python + + ready> def testfunc(x y) x + y\*2 Read a function + definition: define double @testfunc(double %x, double %y) { entry: + %multmp = fmul double %y, 2.000000e+00 ; [#uses=1] %addtmp = fadd double + %multmp, %x ; [#uses=1] ret double %addtmp } + + ready> testfunc(4, 10) Read a top level expression: define double @0() { + entry: %calltmp = call double @testfunc(double 4.000000e+00, double + 1.000000e+01) ; [#uses=1] ret double %calltmp } + + *Evaluated to: 24.0* + + + +This illustrates that we can now call user code, but there is something +a bit subtle going on here. Note that we only invoke the JIT on the +anonymous functions that *call testfunc*, but we never invoked it on +*testfunc* itself. What actually happened here is that the JIT scanned +for all non-JIT'd functions transitively called from the anonymous +function and compiled all of them before returning from +``run_function()``. + +The JIT provides a number of other more advanced interfaces for things +like freeing allocated machine code, rejit'ing functions to update them, +etc. However, even with this simple code, we get some surprisingly +powerful capabilities - check this out (I removed the dump of the +anonymous functions, you should get the idea by now :) : + + +.. code-block:: bash + + ready> extern sin(x) Read an extern: declare double + @sin(double) + + ready> extern cos(x) Read an extern: declare double @cos(double) + + ready> sin(1.0) *Evaluated to: 0.841470984808* + + ready> def foo(x) sin(x)\ *sin(x) + cos(x)*\ cos(x) Read a function + definition: define double @foo(double %x) { entry: %calltmp = call + double @sin(double %x) ; [#uses=1] %calltmp1 = call double @sin(double + %x) ; [#uses=1] %multmp = fmul double %calltmp, %calltmp1 ; [#uses=1] + %calltmp2 = call double @cos(double %x) ; [#uses=1] %calltmp3 = call + double @cos(double %x) ; [#uses=1] %multmp4 = fmul double %calltmp2, + %calltmp3 ; [#uses=1] %addtmp = fadd double %multmp, %multmp4 ; + [#uses=1] ret double %addtmp } + + ready> foo(4.0) *Evaluated to: 1.000000* + + + +Whoa, how does the JIT know about sin and cos? The answer is +surprisingly simple: in this example, the JIT started execution of a +function and got to a function call. It realized that the function was +not yet JIT compiled and invoked the standard set of routines to resolve +the function. In this case, there is no body defined for the function, +so the JIT ended up calling ``dlsym("sin")`` on the Python process that +is hosting our Kaleidoscope prompt. Since ``sin`` is defined within the +JIT's address space, it simply patches up calls in the module to call +the libm version of ``sin`` directly. + +One interesting application of this is that we can now extend the +language by writing arbitrary C++ code to implement operations. For +example, we can create a C file with the following simple function: + + +.. code-block:: c + + #include + + double putchard(double x) { putchar((char)x); return 0; } {% + endhighlight %} + + We can then compile this into a shared library with GCC: + + {% highlight bash %} gcc -shared -fPIC -o putchard.so putchard.c {% + endhighlight %} + + Now we can load this library into the Python process using + ``llvm.core.load_library_permanently`` and access it from Kaleidoscope + to produce simple output to the console: + + {% highlight python %} >>> import llvm.core >>> + llvm.core.load_library_permanently('/home/max/llvmpy-tutorial/putchard.so') + >>> import kaleidoscope >>> kaleidoscope.main() ready> extern + putchard(x) Read an extern: declare double @putchard(double) + + ready> putchard(65) + putchard(66) + putchard(67) + putchard(10) *ABC* + Evaluated to: 0.0 + + + +Similar code could be used to implement file I/O, console input, and +many other capabilities in Kaleidoscope. + +This completes the JIT and optimizer chapter of the Kaleidoscope +tutorial. At this point, we can compile a non-Turing-complete +programming language, optimize and JIT compile it in a user-driven way. +Next up we'll look into `extending the language with control flow +constructs `_, tackling some interesting LLVM IR +issues along the way. + +-------------- + +Full Code Listing # {#code} +=========================== + +Here is the complete code listing for our running example, enhanced with +the LLVM JIT and optimizer: + + +.. code-block:: python + + #!/usr/bin/env python + + import re from llvm.core import Module, Constant, Type, Function, + Builder, FCMP_ULT from llvm.ee import ExecutionEngine, TargetData from + llvm.passes import FunctionPassManager from llvm.passes import + (PASS_INSTRUCTION_COMBINING, PASS_REASSOCIATE, PASS_GVN, + PASS_CFG_SIMPLIFICATION) + + Globals + ------- + + # The LLVM module, which holds all the IR code. + g_llvm_module = Module.new('my cool jit') + + # The LLVM instruction builder. Created whenever a new function is entered. + g_llvm_builder = None + + # A dictionary that keeps track of which values are defined in the current scope + # and what their LLVM representation is. + g_named_values = {} + + # The function optimization passes manager. + g_llvm_pass_manager = FunctionPassManager.new(g_llvm_module) + + # The LLVM execution engine. + g_llvm_executor = ExecutionEngine.new(g_llvm_module) + + Lexer + ----- + + # The lexer yields one of these types for each token. + class EOFToken(object): pass + + class DefToken(object): pass + + class ExternToken(object): pass + + class IdentifierToken(object): def **init**\ (self, name): self.name = + name + + class NumberToken(object): def **init**\ (self, value): self.value = + value + + class CharacterToken(object): def **init**\ (self, char): self.char = + char def **eq**\ (self, other): return isinstance(other, CharacterToken) + and self.char == other.char def **ne**\ (self, other): return not self + == other + + # Regular expressions that tokens and comments of our language. + REGEX_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX_IDENTIFIER = + re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX_COMMENT = re.compile('#.*') + + def Tokenize(string): while string: # Skip whitespace. if + string[0].isspace(): string = string[1:] continue + + :: + + # Run regexes. + comment_match = REGEX_COMMENT.match(string) + number_match = REGEX_NUMBER.match(string) + identifier_match = REGEX_IDENTIFIER.match(string) + + # Check if any of the regexes matched and yield the appropriate result. + if comment_match: + comment = comment_match.group(0) + string = string[len(comment):] + elif number_match: + number = number_match.group(0) + yield NumberToken(float(number)) + string = string[len(number):] + elif identifier_match: + identifier = identifier_match.group(0) + # Check if we matched a keyword. + if identifier == 'def': + yield DefToken() + elif identifier == 'extern': + yield ExternToken() + else: + yield IdentifierToken(identifier) + string = string[len(identifier):] + else: + # Yield the ASCII value of the unknown character. + yield CharacterToken(string[0]) + string = string[1:] + + yield EOFToken() + + Abstract Syntax Tree (aka Parse Tree) + ------------------------------------- + + # Base class for all expression nodes. + class ExpressionNode(object): pass + + # Expression class for numeric literals like "1.0". + class NumberExpressionNode(ExpressionNode): + + def **init**\ (self, value): self.value = value + + def CodeGen(self): return Constant.real(Type.double(), self.value) + + # Expression class for referencing a variable, like "a". + class VariableExpressionNode(ExpressionNode): + + def **init**\ (self, name): self.name = name + + def CodeGen(self): if self.name in g_named_values: return + g_named_values[self.name] else: raise RuntimeError('Unknown variable + name: ' + self.name) + + # Expression class for a binary operator. + class BinaryOperatorExpressionNode(ExpressionNode): + + def **init**\ (self, operator, left, right): self.operator = operator + self.left = left self.right = right + + def CodeGen(self): left = self.left.CodeGen() right = + self.right.CodeGen() + + :: + + if self.operator == '+': + return g_llvm_builder.fadd(left, right, 'addtmp') + elif self.operator == '-': + return g_llvm_builder.fsub(left, right, 'subtmp') + elif self.operator == '*': + return g_llvm_builder.fmul(left, right, 'multmp') + elif self.operator == '<': + result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') + # Convert bool 0 or 1 to double 0.0 or 1.0. + return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') + else: + raise RuntimeError('Unknown binary operator.') + + # Expression class for function calls. + class CallExpressionNode(ExpressionNode): + + def **init**\ (self, callee, args): self.callee = callee self.args = + args + + def CodeGen(self): # Look up the name in the global module table. callee + = g_llvm_module.get_function_named(self.callee) + + :: + + # Check for argument mismatch error. + if len(callee.args) != len(self.args): + raise RuntimeError('Incorrect number of arguments passed.') + + arg_values = [i.CodeGen() for i in self.args] + + return g_llvm_builder.call(callee, arg_values, 'calltmp') + + # This class represents the "prototype" for a function, which captures its name, + # and its argument names (thus implicitly the number of arguments the function + # takes). + class PrototypeNode(object): + + def **init**\ (self, name, args): self.name = name self.args = args + + def CodeGen(self): # Make the function type, eg. double(double,double). + funct_type = Type.function( Type.double(), [Type.double()] \* + len(self.args), False) + + :: + + function = Function.new(g_llvm_module, funct_type, self.name) + + # If the name conflicted, there was already something with the same name. + # If it has a body, don't allow redefinition or reextern. + if function.name != self.name: + function.delete() + function = g_llvm_module.get_function_named(self.name) + + # If the function already has a body, reject this. + if not function.is_declaration: + raise RuntimeError('Redefinition of function.') + + # If F took a different number of args, reject. + if len(callee.args) != len(self.args): + raise RuntimeError('Redeclaration of a function with different number ' + 'of args.') + + # Set names for all arguments and add them to the variables symbol table. + for arg, arg_name in zip(function.args, self.args): + arg.name = arg_name + # Add arguments to variable symbol table. + g_named_values[arg_name] = arg + + return function + + # This class represents a function definition itself. + class FunctionNode(object): + + def **init**\ (self, prototype, body): self.prototype = prototype + self.body = body + + def CodeGen(self): # Clear scope. g_named_values.clear() + + :: + + # Create a function object. + function = self.prototype.CodeGen() + + # Create a new basic block to start insertion into. + block = function.append_basic_block('entry') + global g_llvm_builder + g_llvm_builder = Builder.new(block) + + # Finish off the function. + try: + return_value = self.body.CodeGen() + g_llvm_builder.ret(return_value) + + # Validate the generated code, checking for consistency. + function.verify() + + # Optimize the function. + g_llvm_pass_manager.run(function) + except: + function.delete() + raise + + return function + + Parser + ------ + + class Parser(object): + + def **init**\ (self, tokens, binop_precedence): self.tokens = tokens + self.binop_precedence = binop_precedence self.Next() + + # Provide a simple token buffer. Parser.current is the current token the + # parser is looking at. Parser.Next() reads another token from the lexer + and # updates Parser.current with its results. def Next(self): + self.current = self.tokens.next() + + # Gets the precedence of the current token, or -1 if the token is not a + binary # operator. def GetCurrentTokenPrecedence(self): if + isinstance(self.current, CharacterToken): return + self.binop_precedence.get(self.current.char, -1) else: return -1 + + # identifierexpr ::= identifier \| identifier '(' expression\* ')' def + ParseIdentifierExpr(self): identifier_name = self.current.name + self.Next() # eat identifier. + + :: + + if self.current != CharacterToken('('): # Simple variable reference. + return VariableExpressionNode(identifier_name) + + # Call. + self.Next() # eat '('. + args = [] + if self.current != CharacterToken(')'): + while True: + args.append(self.ParseExpression()) + if self.current == CharacterToken(')'): + break + elif self.current != CharacterToken(','): + raise RuntimeError('Expected ")" or "," in argument list.') + self.Next() + + self.Next() # eat ')'. + return CallExpressionNode(identifier_name, args) + + # numberexpr ::= number def ParseNumberExpr(self): result = + NumberExpressionNode(self.current.value) self.Next() # consume the + number. return result + + # parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() + # eat '('. + + :: + + contents = self.ParseExpression() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")".') + self.Next() # eat ')'. + + return contents + + # primary ::= identifierexpr \| numberexpr \| parenexpr def + ParsePrimary(self): if isinstance(self.current, IdentifierToken): return + self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): + return self.ParseNumberExpr() elif self.current == CharacterToken('('): + return self.ParseParenExpr() else: raise RuntimeError('Unknown token + when expecting an expression.') + + # binoprhs ::= (operator primary)\* def ParseBinOpRHS(self, left, + left_precedence): # If this is a binary operator, find its precedence. + while True: precedence = self.GetCurrentTokenPrecedence() + + :: + + # If this is a binary operator that binds at least as tightly as the + # current one, consume it; otherwise we are done. + if precedence < left_precedence: + return left + + binary_operator = self.current.char + self.Next() # eat the operator. + + # Parse the primary expression after the binary operator. + right = self.ParsePrimary() + + # If binary_operator binds less tightly with right than the operator after + # right, let the pending operator take right as its left. + next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: + right = self.ParseBinOpRHS(right, precedence + 1) + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right) + + # expression ::= primary binoprhs def ParseExpression(self): left = + self.ParsePrimary() return self.ParseBinOpRHS(left, 0) + + # prototype ::= id '(' id\* ')' def ParsePrototype(self): if not + isinstance(self.current, IdentifierToken): raise RuntimeError('Expected + function name in prototype.') + + :: + + function_name = self.current.name + self.Next() # eat function name. + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + return PrototypeNode(function_name, arg_names) + + # definition ::= 'def' prototype expression def ParseDefinition(self): + self.Next() # eat def. proto = self.ParsePrototype() body = + self.ParseExpression() return FunctionNode(proto, body) + + # toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = + PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) + + # external ::= 'extern' prototype def ParseExtern(self): self.Next() # + eat extern. return self.ParsePrototype() + + # Top-Level parsing def HandleDefinition(self): + self.Handle(self.ParseDefinition, 'Read a function definition:') + + def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') + + def HandleTopLevelExpression(self): try: function = + self.ParseTopLevelExpr().CodeGen() result = + g_llvm_executor.run_function(function, []) print 'Evaluated to:', + result.as_real(Type.double()) except Exception, e: print 'Error:', e + try: self.Next() # Skip for error recovery. except: pass + + def Handle(self, function, message): try: print message, + function().CodeGen() except Exception, e: print 'Error:', e try: + self.Next() # Skip for error recovery. except: pass + + Main driver code. + ----------------- + + def main(): # Set up the optimizer pipeline. Start with registering info + about how the # target lays out data structures. + g_llvm_pass_manager.add(g_llvm_executor.target_data) # Do simple + "peephole" optimizations and bit-twiddling optzns. + g_llvm_pass_manager.add(PASS_INSTRUCTION_COMBINING) # Reassociate + expressions. g_llvm_pass_manager.add(PASS_REASSOCIATE) # Eliminate + Common SubExpressions. g_llvm_pass_manager.add(PASS_GVN) # Simplify + the control flow graph (deleting unreachable blocks, etc). + g_llvm_pass_manager.add(PASS_CFG_SIMPLIFICATION) + + g_llvm_pass_manager.initialize() + + # Install standard binary operators. # 1 is lowest possible precedence. + 40 is the highest. operator_precedence = { '<': 10, '+': 20, '-': 20, + '\*': 40 } + + # Run the main "interpreter loop". while True: print 'ready>', try: raw + = raw_input() except KeyboardInterrupt: break + + :: + + parser = Parser(Tokenize(raw), operator_precedence) + while True: + # top ::= definition | external | expression | EOF + if isinstance(parser.current, EOFToken): + break + if isinstance(parser.current, DefToken): + parser.HandleDefinition() + elif isinstance(parser.current, ExternToken): + parser.HandleExtern() + else: + parser.HandleTopLevelExpression() + + # Print out all of the generated code. print '', g_llvm_module + + if **name** == '**main**\ ': main() diff --git a/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl5.txt b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl5.txt new file mode 100644 index 0000000..d4d01fa --- /dev/null +++ b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl5.txt @@ -0,0 +1,1411 @@ +***************************************************** +Chapter 5: Extending the Language: Control Flow +***************************************************** + +Written by `Chris Lattner `_ and `Max +Shawabkeh `_ + +Introduction # {#intro} +======================= + +Welcome to Chapter 5 of the `Implementing a language with +LLVM `_ tutorial. Parts +1-4 described the implementation of the simple Kaleidoscope language and +included support for generating LLVM IR, followed by optimizations and a +JIT compiler. Unfortunately, as presented, Kaleidoscope is mostly +useless: it has no control flow other than call and return. This means +that you can't have conditional branches in the code, significantly +limiting its power. In this episode of "build that compiler", we'll +extend Kaleidoscope to have an if/then/else expression plus a simple +'for' loop. + +-------------- + +If/Then/Else # {#ifthen} +======================== + +Extending Kaleidoscope to support if/then/else is quite straightforward. +It basically requires adding lexer support for this "new" concept to the +lexer, parser, AST, and LLVM code emitter. This example is nice, because +it shows how easy it is to "grow" a language over time, incrementally +extending it as new ideas are discovered. + +Before we get going on "how" we add this extension, lets talk about +"what" we want. The basic idea is that we want to be able to write this +sort of thing: + + +.. code-block:: python + + def fib(x) if x < 3 then 1 else fib(x-1) + + fib(x-2) + + + +In Kaleidoscope, every construct is an expression: there are no +statements. As such, the if/then/else expression needs to return a value +like any other. Since we're using a mostly functional form, we'll have +it evaluate its conditional, then return the 'then' or 'else' value +based on how the condition was resolved. This is very similar to the C +"?:" expression. + +The semantics of the if/then/else expression is that it evaluates the +condition to a boolean equality value: 0.0 is considered to be false and +everything else is considered to be true. If the condition is true, the +first subexpression is evaluated and returned, if the condition is +false, the second subexpression is evaluated and returned. Since +Kaleidoscope allows side-effects, this behavior is important to nail +down. + +Now that we know what we "want", let's break this down into its +constituent pieces. + +Lexer Extensions for If/Then/Else ## {#iflexer} +----------------------------------------------- + +The lexer extensions are straightforward. First we add new token classes +for the relevant tokens: + + +.. code-block:: python + + class IfToken(object): pass class + ThenToken(object): pass class ElseToken(object): pass + + + +Once we have that, we recognize the new keywords in the lexer. This is +pretty simple stuff: + + +.. code-block:: python + + ... if identifier == 'def': yield DefToken() elif + identifier == 'extern': yield ExternToken() elif identifier == 'if': + yield IfToken() elif identifier == 'then': yield ThenToken() elif + identifier == 'else': yield ElseToken() else: yield + IdentifierToken(identifier) + + + +AST Extensions for If/Then/Else ## {#ifast} +------------------------------------------- + +To represent the new expression we add a new AST node for it: + + +.. code-block:: python + + # Expression class for if/then/else. class + IfExpressionNode(ExpressionNode): + + def **init**\ (self, condition, then_branch, else_branch): + self.condition = condition self.then_branch = then_branch + self.else_branch = else_branch + + def CodeGen(self): ... + + + +The AST node just has pointers to the various subexpressions. + +Parser Extensions for If/Then/Else ## {#ifparser} +------------------------------------------------- + +Now that we have the relevant tokens coming from the lexer and we have +the AST node to build, our parsing logic is relatively straightforward. +First we define a new parsing function: + + +.. code-block:: python + + # ifexpr ::= 'if' expression 'then' expression + 'else' expression def ParseIfExpr(self): self.Next() # eat the if. + + :: + + # condition. + condition = self.ParseExpression() + + if not isinstance(self.current, ThenToken): + raise RuntimeError('Expected "then".') + self.Next() # eat the then. + + then_branch = self.ParseExpression() + + if not isinstance(self.current, ElseToken): + raise RuntimeError('Expected "else".') + self.Next() # eat the else. + + else_branch = self.ParseExpression() + + return IfExpressionNode(condition, then_branch, else_branch) + + + + + +Next we hook it up as a primary expression: + + +.. code-block:: python + + def ParsePrimary(self): if + isinstance(self.current, IdentifierToken): return + self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): + return self.ParseNumberExpr(); elif isinstance(self.current, IfToken): + return self.ParseIfExpr() elif self.current == CharacterToken('('): + return self.ParseParenExpr() else: raise RuntimeError('Unknown token + when expecting an expression.') + + + +LLVM IR for If/Then/Else ## {#ifir} +----------------------------------- + +Now that we have it parsing and building the AST, the final piece is +adding LLVM code generation support. This is the most interesting part +of the if/then/else example, because this is where it starts to +introduce new concepts. All of the code above has been thoroughly +described in previous chapters. + +To motivate the code we want to produce, lets take a look at a simple +example. Consider: + + +.. code-block:: python + + extern foo(); extern bar(); def baz(x) if x then + foo() else bar(); + + + +If you disable optimizations, the code you'll (soon) get from +Kaleidoscope looks something like this: + + +.. code-block:: llvm + + declare double @foo() declare double @bar() define + double @baz(double %x) { entry: %ifcond = fcmp one double %x, + 0.000000e+00 br i1 %ifcond, label %then, label %else then: ; preds = + %entry %calltmp1 = call double @bar() else: ; preds = %entry %calltmp1 = + call double @bar() br label %ifcont ifcont: ; preds = %else, %then + %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ] ret double + %iftmp } + + + +To visualize the control flow graph, you can use a nifty feature of the +LLVM `opt `_ tool. If you put this LLVM +IR into "t.ll" and run ``llvm-as < t.ll | opt -analyze -view-cfg``, a +`window will pop +up `_ and +you'll see this graph: + +Another way to get this is to call "``function.viewCFG()``\ " or +"``function.viewCFGOnly()``\ " (where F is a "``llvm.core.Function``\ ") +either by inserting actual calls into the code and recompiling or by +calling these in the debugger. LLVM has many nice features for +visualizing various graphs, but note that these are available only if +your LLVM was built with Graphviz support (accomplished by having +Graphviz and Ghostview installed when building LLVM). + +Getting back to the generated code, it is fairly simple: the entry block +evaluates the conditional expression ("x" in our case here) and compares +the result to 0.0 with the +`fcmp `_ one instruction +('one' is "Ordered and Not Equal"). Based on the result of this +expression, the code jumps to either the "then" or "else" blocks, which +contain the expressions for the true/false cases. + +Once the then/else blocks are finished executing, they both branch back +to the 'ifcont' block to execute the code that happens after the +if/then/else. In this case the only thing left to do is to return to the +caller of the function. The question then becomes: how does the code +know which expression to return? + +The answer to this question involves an important SSA operation: the +`Phi +operation `_. +If you're not familiar with SSA, `the wikipedia +article `_ +is a good introduction and there are various other introductions to it +available on your favorite search engine. The short version is that +"execution" of the Phi operation requires "remembering" which block +control came from. The Phi operation takes on the value corresponding to +the input control block. In this case, if control comes in from the +"then" block, it gets the value of "calltmp". If control comes from the +"else" block, it gets the value of "calltmp1". + +At this point, you are probably starting to think "Oh no! This means my +simple and elegant front-end will have to start generating SSA form in +order to use LLVM!". Fortunately, this is not the case, and we strongly +advise *not* implementing an SSA construction algorithm in your +front-end unless there is an amazingly good reason to do so. In +practice, there are two sorts of values that float around in code +written for your average imperative programming language that might need +Phi nodes: + +1. Code that involves user variables: ``x = 1; x = x + 1;`` +2. Values that are implicit in the structure of your AST, such as the + Phi node in this case. + +In `Chapter 7 `_ of this tutorial ("mutable +variables"), we'll talk about #1 in depth. For now, just believe me that +you don't need SSA construction to handle this case. For #2, you have +the choice of using the techniques that we will describe for #1, or you +can insert Phi nodes directly, if convenient. In this case, it is really +really easy to generate the Phi node, so we choose to do it directly. + +Okay, enough of the motivation and overview, lets generate code! + +Code Generation for If/Then/Else ## {#ifcodegen} +------------------------------------------------ + +In order to generate code for this, we implement the ``Codegen`` method +for ``IfExpressionNode``: + + +.. code-block:: python + + def CodeGen(self): condition = + self.condition.CodeGen() + + :: + + # Convert condition to a bool by comparing equal to 0.0. + condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, condition, Constant.real(Type.double(), 0), 'ifcond') + + + + + +This code is straightforward and similar to what we saw before. We emit +the expression for the condition, then compare that value to zero to get +a truth value as a 1-bit (bool) value. + + +.. code-block:: python + + function = g_llvm_builder.basic_block.function + + :: + + # Create blocks for the then and else cases. Insert the 'then' block at the + # end of the function. + then_block = function.append_basic_block('then') + else_block = function.append_basic_block('else') + merge_block = function.append_basic_block('ifcond') + + g_llvm_builder.cbranch(condition_bool, then_block, else_block) + + + + + +This code creates the basic blocks that are related to the if/then/else +statement, and correspond directly to the blocks in the example above. +The first line gets the current Function object that is being built. It +gets this by asking the builder for the current BasicBlock, and asking +that block for its "parent" (the function it is currently embedded +into). + +Once it has that, it creates three block which are automatically +inserted into the end of the function. Once the blocks are created, we +can emit the conditional branch that chooses between them. Note that +creating new blocks does not implicitly affect the Builder, so it is +still inserting into the block that the condition went into. + + +.. code-block:: python + + # Emit then value. + g_llvm_builder.position_at_end(then_block) then_value = + self.then_branch.CodeGen() g_llvm_builder.branch(merge_block) + + :: + + # Codegen of 'Then' can change the current block; update then_block for the + # PHI node. + then_block = g_llvm_builder.basic_block + + + + + +After the conditional branch is inserted, we move the builder to start +inserting into the "then" block. Strictly speaking, this call moves the +insertion point to be at the end of the specified block. However, since +the "then" block is empty, it also starts out by inserting at the +beginning of the block. :) + +Once the insertion point is set, we recursively codegen the "then" +expression from the AST. To finish off the "then" block, we create an +unconditional branch to the merge block. One interesting (and very +important) aspect of the LLVM IR is that it `requires all basic blocks +to be +"terminated" `_ +with a `control flow +instruction `_ such +as return or branch. This means that all control flow, *including +fallthroughs* must be made explicit in the LLVM IR. If you violate this +rule, the verifier will emit an error. + +The final line here is quite subtle, but is very important. The basic +issue is that when we create the Phi node in the merge block, we need to +set up the block/value pairs that indicate how the Phi will work. +Importantly, the Phi node expects to have an entry for each predecessor +of the block in the CFG. Why then, are we getting the current block when +we just set it to then\_block 5 lines above? The problem is that the +"Then" expression may actually itself change the block that the Builder +is emitting into if, for example, it contains a nested "if/then/else" +expression. Because calling Codegen recursively could arbitrarily change +the notion of the current block, we are required to get an up-to-date +value for code that will set up the Phi node. + + +.. code-block:: python + + # Emit else block. + g_llvm_builder.position_at_end(else_block) else_value = + self.else_branch.CodeGen() g_llvm_builder.branch(merge_block) + + :: + + # Codegen of 'Else' can change the current block, update else_block for the + # PHI node. + else_block = g_llvm_builder.basic_block + + + + + +Code generation for the 'else' block is basically identical to codegen +for the 'then' block. The only significant difference is the first line, +which adds the 'else' block to the function. Recall previously that the +'else' block was created, but not added to the function. Now that the +'then' and 'else' blocks are emitted, we can finish up with the merge +code: + + +.. code-block:: python + + # Emit merge block. + g_llvm_builder.position_at_end(merge_block) phi = + g_llvm_builder.phi(Type.double(), 'iftmp') + phi.add_incoming(then_value, then_block) + phi.add_incoming(else_value, else_block) + + :: + + return phi + + + + + +The first line changes the insertion point so that newly created code +will go into the "merge" block. Once that is done, we need to create the +PHI node and set up the block/value pairs for the PHI. + +Finally, the CodeGen function returns the phi node as the value computed +by the if/then/else expression. In our example above, this returned +value will feed into the code for the top-level function, which will +create the return instruction. + +Overall, we now have the ability to execute conditional code in +Kaleidoscope. With this extension, Kaleidoscope is a fairly complete +language that can calculate a wide variety of numeric functions. Next up +we'll add another useful expression that is familiar from non-functional +languages... + +-------------- + +'for' Loop Expression # {#for} +============================== + +Now that we know how to add basic control flow constructs to the +language, we have the tools to add more powerful things. Lets add +something more aggressive, a 'for' expression: + + +.. code-block:: python + + extern putchard(char) def printstar(n) for i = 1, + i < n, 1.0 in putchard(42) # ascii 42 = '\*' + + :: + + # print 100 '*' characters + printstar(100) + + + + + +This expression defines a new variable (``i`` in this case) which +iterates from a starting value, while the condition (``i < n`` in this +case) is true, incrementing by an optional step value ("1.0" in this +case). If the step value is omitted, it defaults to 1.0. While the loop +is true, it executes its body expression. Because we don't have anything +better to return, we'll just define the loop as always returning 0.0. In +the future when we have mutable variables, it will get more useful. + +As before, lets talk about the changes that we need to Kaleidoscope to +support this. + +Lexer Extensions for the 'for' Loop ## {#forlexer} +-------------------------------------------------- + +The lexer extensions are the same sort of thing as for if/then/else: + + +.. code-block:: python + + ... + + class ThenToken(object): pass class ElseToken(object): pass class + ForToken(object): pass class InToken(object): pass + + ... + + def Tokenize(string): + + :: + + ... + + elif identifier == 'else': + yield ElseToken() + elif identifier == 'for': + yield ForToken() + elif identifier == 'in': + yield InToken() + else: + yield IdentifierToken(identifier) + + + + + +AST Extensions for the 'for' Loop ## {#forast} +---------------------------------------------- + +The AST node is just as simple. It basically boils down to capturing the +variable name and the constituent expressions in the node. + + +.. code-block:: python + + # Expression class for for/in. class + ForExpressionNode(ExpressionNode): + + def **init**\ (self, loop_variable, start, end, step, body): + self.loop_variable = loop_variable self.start = start self.end = end + self.step = step self.body = body + + def CodeGen(self): ... + + + +Parser Extensions for the 'for' Loop ## {#forparser} +---------------------------------------------------- + +The parser code is also fairly standard. The only interesting thing here +is handling of the optional step value. The parser code handles it by +checking to see if the second comma is present. If not, it sets the step +value to null in the AST node: + + +.. code-block:: python + + # forexpr ::= 'for' identifier '=' expr ',' expr + (',' expr)? 'in' expression def ParseForExpr(self): self.Next() # eat + the for. + + :: + + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after for.') + + loop_variable = self.current.name + self.Next() # eat the identifier. + + if self.current != CharacterToken('='): + raise RuntimeError('Expected "=" after for variable.') + self.Next() # eat the '='. + + start = self.ParseExpression() + + if self.current != CharacterToken(','): + raise RuntimeError('Expected "," after for start value.') + self.Next() # eat the ','. + + end = self.ParseExpression() + + # The step value is optional. + if self.current == CharacterToken(','): + self.Next() # eat the ','. + step = self.ParseExpression() + else: + step = None + + if not isinstance(self.current, InToken): + raise RuntimeError('Expected "in" after for variable specification.') + self.Next() # eat 'in'. + + body = self.ParseExpression() + + return ForExpressionNode(loop_variable, start, end, step, body) + + + + + +LLVM IR for the 'for' Loop ## {#forir} +-------------------------------------- + +Now we get to the good part: the LLVM IR we want to generate for this +thing. With the simple example above, we get this LLVM IR (note that +this dump is generated with optimizations disabled for clarity): + + +.. code-block:: llvm + + declare double @putchard(double) define double + @printstar(double %n) { entry: ; initial value = 1.0 (inlined into phi) + br label %loop loop: ; preds = %loop, %entry %i = phi double [ + 1.000000e+00, %entry ], [ %nextvar, %loop ] ; body %calltmp = call + double @putchard(double 4.200000e+01) ; increment %nextvar = fadd double + %i, 1.000000e+00 ; termination test %cmptmp = fcmp ult double %i, %n + %booltmp = uitofp i1 %cmptmp to double %loopcond = fcmp one double + %booltmp, 0.000000e+00 br i1 %loopcond, label %loop, label %afterloop + afterloop: ; preds = %loop ; loop always returns 0.0 ret double + 0.000000e+00 } + + + +This loop contains all the same constructs we saw before: a phi node, +several expressions, and some basic blocks. Lets see how this fits +together. + +Code Generation for the 'for' Loop ## {#forcodegen} +--------------------------------------------------- + +The first part of Codegen is very simple: we just output the start +expression for the loop value: + + +.. code-block:: python + + def CodeGen(self): # Emit the start code first, + without 'variable' in scope. start_value = self.start.CodeGen() {% + endhighlight %} + + With this out of the way, the next step is to set up the LLVM basic + block for the start of the loop body. In the case above, the whole loop + body is one block, but remember that the body code itself could consist + of multiple blocks (e.g. if it contains an if/then/else or a for/in + expression). + + {% highlight python %} # Make the new basic block for the loop header, + inserting after current # block. function = + g_llvm_builder.basic_block.function pre_header_block = + g_llvm_builder.basic_block loop_block = + function.append_basic_block('loop') + + :: + + # Insert an explicit fallthrough from the current block to the loop_block. + g_llvm_builder.branch(loop_block) + + + + + +This code is similar to what we saw for if/then/else. Because we will +need it to create the Phi node, we remember the block that falls through +into the loop. Once we have that, we create the actual block that starts +the loop and create an unconditional branch for the fall-through between +the two blocks. + + +.. code-block:: python + + # Start insertion in loop_block. + g_llvm_builder.position_at_end(loop_block); + + :: + + # Start the PHI node with an entry for start. + variable_phi = g_llvm_builder.phi(Type.double(), self.loop_variable) + variable_phi.add_incoming(start_value, pre_header_block) + + + + + +Now that the "pre\_header\_block" for the loop is set up, we switch to +emitting code for the loop body. To begin with, we move the insertion +point and create the PHI node for the loop induction variable. Since we +already know the incoming value for the starting value, we add it to the +Phi node. Note that the Phi will eventually get a second value for the +backedge, but we can't set it up yet (because it doesn't exist!). + + +.. code-block:: python + + # Within the loop, the variable is defined equal + to the PHI node. If it # shadows an existing variable, we have to + restore it, so save it now. old_value = + g_named_values.get(self.loop_variable, None) + g_named_values[self.loop_variable] = variable_phi + + :: + + # Emit the body of the loop. This, like any other expr, can change the + # current BB. Note that we ignore the value computed by the body. + self.body.CodeGen() + + + + + +Now the code starts to get more interesting. Our 'for' loop introduces a +new variable to the symbol table. This means that our symbol table can +now contain either function arguments or loop variables. To handle this, +before we codegen the body of the loop, we add the loop variable as the +current value for its name. Note that it is possible that there is a +variable of the same name in the outer scope. It would be easy to make +this an error (emit an error and return null if there is already an +entry for VarName) but we choose to allow shadowing of variables. In +order to handle this correctly, we remember the Value that we are +potentially shadowing in ``old_value`` (which will be None if there is +no shadowed variable). + +Once the loop variable is set into the symbol table, the code +recursively codegen's the body. This allows the body to use the loop +variable: any references to it will naturally find it in the symbol +table. + + +.. code-block:: python + + # Emit the step value. if self.step: step_value + = self.step.CodeGen() else: # If not specified, use 1.0. step_value = + Constant.real(Type.double(), 1) + + :: + + next_value = g_llvm_builder.fadd(variable_phi, step_value, 'next') + + + + + +Now that the body is emitted, we compute the next value of the iteration +variable by adding the step value, or 1.0 if it isn't present. +``next_value`` will be the value of the loop variable on the next +iteration of the loop. + + +.. code-block:: python + + # Compute the end condition and convert it to a + bool by comparing to 0.0. end_condition = self.end.CodeGen() + end_condition_bool = g_llvm_builder.fcmp( FCMP_ONE, end_condition, + Constant.real(Type.double(), 0), 'loopcond') + + + +Finally, we evaluate the exit value of the loop, to determine whether +the loop should exit. This mirrors the condition evaluation for the +if/then/else statement. + + +.. code-block:: python + + # Create the "after loop" block and insert it. + loop_end_block = g_llvm_builder.basic_block after_block = + function.append_basic_block('afterloop') + + :: + + # Insert the conditional branch into the end of loop_end_block. + g_llvm_builder.cbranch(end_condition_bool, loop_block, after_block) + + # Any new code will be inserted in after_block. + g_llvm_builder.position_at_end(after_block) + + + + + +With the code for the body of the loop complete, we just need to finish +up the control flow for it. This code remembers the end block (for the +phi node), then creates the block for the loop exit ("afterloop"). Based +on the value of the exit condition, it creates a conditional branch that +chooses between executing the loop again and exiting the loop. Any +future code is emitted in the "afterloop" block, so it sets the +insertion position to it. + + +.. code-block:: python + + # Add a new entry to the PHI node for the + backedge. variable_phi.add_incoming(next_value, loop_end_block) + + :: + + # Restore the unshadowed variable. + if old_value: + g_named_values[self.loop_variable] = old_value + else: + del g_named_values[self.loop_variable] + + # for expr always returns 0.0. + return Constant.real(Type.double(), 0) + + + + + +The final code handles various cleanups: now that we have the +"next\_value", we can add the incoming value to the loop PHI node. After +that, we remove the loop variable from the symbol table, so that it +isn't in scope after the for loop. Finally, code generation of the for +loop always returns 0.0, so that is what we return from +``ForExpressionNode::CodeGen``. + +With this, we conclude the "adding control flow to Kaleidoscope" chapter +of the tutorial. In this chapter we added two control flow constructs, +and used them to motivate a couple of aspects of the LLVM IR that are +important for front-end implementors to know. In the next chapter of our +saga, we will get a bit crazier and add `user-defined +operators `_ to our poor innocent language. + +-------------- + +Full Code Listing # {#code} +=========================== + +Here is the complete code listing for our running example, enhanced with +the if/then/else and for expressions: + + +.. code-block:: python + + #!/usr/bin/env python + + import re from llvm.core import Module, Constant, Type, Function, + Builder from llvm.ee import ExecutionEngine, TargetData from llvm.passes + import FunctionPassManager + + from llvm.core import FCMP_ULT, FCMP_ONE from llvm.passes import + (PASS_INSTRUCTION_COMBINING, PASS_REASSOCIATE, PASS_GVN, + PASS_CFG_SIMPLIFICATION) + + Globals + ------- + + # The LLVM module, which holds all the IR code. + g_llvm_module = Module.new('my cool jit') + + # The LLVM instruction builder. Created whenever a new function is entered. + g_llvm_builder = None + + # A dictionary that keeps track of which values are defined in the current scope + # and what their LLVM representation is. + g_named_values = {} + + # The function optimization passes manager. + g_llvm_pass_manager = FunctionPassManager.new(g_llvm_module) + + # The LLVM execution engine. + g_llvm_executor = ExecutionEngine.new(g_llvm_module) + + Lexer + ----- + + # The lexer yields one of these types for each token. + class EOFToken(object): pass class DefToken(object): pass class + ExternToken(object): pass class IfToken(object): pass class + ThenToken(object): pass class ElseToken(object): pass class + ForToken(object): pass class InToken(object): pass + + class IdentifierToken(object): def **init**\ (self, name): self.name = + name + + class NumberToken(object): def **init**\ (self, value): self.value = + value + + class CharacterToken(object): def **init**\ (self, char): self.char = + char def **eq**\ (self, other): return isinstance(other, CharacterToken) + and self.char == other.char def **ne**\ (self, other): return not self + == other + + # Regular expressions that tokens and comments of our language. + REGEX_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX_IDENTIFIER = + re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX_COMMENT = re.compile('#.*') + + def Tokenize(string): while string: # Skip whitespace. if + string[0].isspace(): string = string[1:] continue + + :: + + # Run regexes. + comment_match = REGEX_COMMENT.match(string) + number_match = REGEX_NUMBER.match(string) + identifier_match = REGEX_IDENTIFIER.match(string) + + # Check if any of the regexes matched and yield the appropriate result. + if comment_match: + comment = comment_match.group(0) + string = string[len(comment):] + elif number_match: + number = number_match.group(0) + yield NumberToken(float(number)) + string = string[len(number):] + elif identifier_match: + identifier = identifier_match.group(0) + # Check if we matched a keyword. + if identifier == 'def': + yield DefToken() + elif identifier == 'extern': + yield ExternToken() + elif identifier == 'if': + yield IfToken() + elif identifier == 'then': + yield ThenToken() + elif identifier == 'else': + yield ElseToken() + elif identifier == 'for': + yield ForToken() + elif identifier == 'in': + yield InToken() + else: + yield IdentifierToken(identifier) + string = string[len(identifier):] + else: + # Yield the ASCII value of the unknown character. + yield CharacterToken(string[0]) + string = string[1:] + + yield EOFToken() + + Abstract Syntax Tree (aka Parse Tree) + ------------------------------------- + + # Base class for all expression nodes. + class ExpressionNode(object): pass + + # Expression class for numeric literals like "1.0". + class NumberExpressionNode(ExpressionNode): + + def **init**\ (self, value): self.value = value + + def CodeGen(self): return Constant.real(Type.double(), self.value) + + # Expression class for referencing a variable, like "a". + class VariableExpressionNode(ExpressionNode): + + def **init**\ (self, name): self.name = name + + def CodeGen(self): if self.name in g_named_values: return + g_named_values[self.name] else: raise RuntimeError('Unknown variable + name: ' + self.name) + + # Expression class for a binary operator. + class BinaryOperatorExpressionNode(ExpressionNode): + + def **init**\ (self, operator, left, right): self.operator = operator + self.left = left self.right = right + + def CodeGen(self): left = self.left.CodeGen() right = + self.right.CodeGen() + + :: + + if self.operator == '+': + return g_llvm_builder.fadd(left, right, 'addtmp') + elif self.operator == '-': + return g_llvm_builder.fsub(left, right, 'subtmp') + elif self.operator == '*': + return g_llvm_builder.fmul(left, right, 'multmp') + elif self.operator == '<': + result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') + # Convert bool 0 or 1 to double 0.0 or 1.0. + return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') + else: + raise RuntimeError('Unknown binary operator.') + + # Expression class for function calls. + class CallExpressionNode(ExpressionNode): + + def **init**\ (self, callee, args): self.callee = callee self.args = + args + + def CodeGen(self): # Look up the name in the global module table. callee + = g_llvm_module.get_function_named(self.callee) + + :: + + # Check for argument mismatch error. + if len(callee.args) != len(self.args): + raise RuntimeError('Incorrect number of arguments passed.') + + arg_values = [i.CodeGen() for i in self.args] + + return g_llvm_builder.call(callee, arg_values, 'calltmp') + + # Expression class for if/then/else. + class IfExpressionNode(ExpressionNode): + + def **init**\ (self, condition, then_branch, else_branch): + self.condition = condition self.then_branch = then_branch + self.else_branch = else_branch + + def CodeGen(self): condition = self.condition.CodeGen() + + :: + + # Convert condition to a bool by comparing equal to 0.0. + condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, condition, Constant.real(Type.double(), 0), 'ifcond') + + function = g_llvm_builder.basic_block.function + + # Create blocks for the then and else cases. Insert the 'then' block at the + # end of the function. + then_block = function.append_basic_block('then') + else_block = function.append_basic_block('else') + merge_block = function.append_basic_block('ifcond') + + g_llvm_builder.cbranch(condition_bool, then_block, else_block) + + # Emit then value. + g_llvm_builder.position_at_end(then_block) + then_value = self.then_branch.CodeGen() + g_llvm_builder.branch(merge_block) + + # Codegen of 'Then' can change the current block; update then_block for the + # PHI node. + then_block = g_llvm_builder.basic_block + + # Emit else block. + g_llvm_builder.position_at_end(else_block) + else_value = self.else_branch.CodeGen() + g_llvm_builder.branch(merge_block) + + # Codegen of 'Else' can change the current block, update else_block for the + # PHI node. + else_block = g_llvm_builder.basic_block + + # Emit merge block. + g_llvm_builder.position_at_end(merge_block) + phi = g_llvm_builder.phi(Type.double(), 'iftmp') + phi.add_incoming(then_value, then_block) + phi.add_incoming(else_value, else_block) + + return phi + + # Expression class for for/in. + class ForExpressionNode(ExpressionNode): + + def **init**\ (self, loop_variable, start, end, step, body): + self.loop_variable = loop_variable self.start = start self.end = end + self.step = step self.body = body + + def CodeGen(self): # Output this as: # ... # start = startexpr # goto + loop # loop: # variable = phi [start, loopheader], [nextvariable, + loopend] # ... # bodyexpr # ... # loopend: # step = stepexpr # + nextvariable = variable + step # endcond = endexpr # br endcond, loop, + endloop # outloop: + + :: + + # Emit the start code first, without 'variable' in scope. + start_value = self.start.CodeGen() + + # Make the new basic block for the loop header, inserting after current + # block. + function = g_llvm_builder.basic_block.function + pre_header_block = g_llvm_builder.basic_block + loop_block = function.append_basic_block('loop') + + # Insert an explicit fallthrough from the current block to the loop_block. + g_llvm_builder.branch(loop_block) + + # Start insertion in loop_block. + g_llvm_builder.position_at_end(loop_block) + + # Start the PHI node with an entry for start. + variable_phi = g_llvm_builder.phi(Type.double(), self.loop_variable) + variable_phi.add_incoming(start_value, pre_header_block) + + # Within the loop, the variable is defined equal to the PHI node. If it + # shadows an existing variable, we have to restore it, so save it now. + old_value = g_named_values.get(self.loop_variable, None) + g_named_values[self.loop_variable] = variable_phi + + # Emit the body of the loop. This, like any other expr, can change the + # current BB. Note that we ignore the value computed by the body. + self.body.CodeGen() + + # Emit the step value. + if self.step: + step_value = self.step.CodeGen() + else: + # If not specified, use 1.0. + step_value = Constant.real(Type.double(), 1) + + next_value = g_llvm_builder.fadd(variable_phi, step_value, 'next') + + # Compute the end condition and convert it to a bool by comparing to 0.0. + end_condition = self.end.CodeGen() + end_condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, end_condition, Constant.real(Type.double(), 0), 'loopcond') + + # Create the "after loop" block and insert it. + loop_end_block = g_llvm_builder.basic_block + after_block = function.append_basic_block('afterloop') + + # Insert the conditional branch into the end of loop_end_block. + g_llvm_builder.cbranch(end_condition_bool, loop_block, after_block) + + # Any new code will be inserted in after_block. + g_llvm_builder.position_at_end(after_block) + + # Add a new entry to the PHI node for the backedge. + variable_phi.add_incoming(next_value, loop_end_block) + + # Restore the unshadowed variable. + if old_value: + g_named_values[self.loop_variable] = old_value + else: + del g_named_values[self.loop_variable] + + # for expr always returns 0.0. + return Constant.real(Type.double(), 0) + + # This class represents the "prototype" for a function, which captures its name, + # and its argument names (thus implicitly the number of arguments the function + # takes). + class PrototypeNode(object): + + def **init**\ (self, name, args): self.name = name self.args = args + + def CodeGen(self): # Make the function type, eg. double(double,double). + funct_type = Type.function( Type.double(), [Type.double()] \* + len(self.args), False) + + :: + + function = Function.new(g_llvm_module, funct_type, self.name) + + # If the name conflicted, there was already something with the same name. + # If it has a body, don't allow redefinition or reextern. + if function.name != self.name: + function.delete() + function = g_llvm_module.get_function_named(self.name) + + # If the function already has a body, reject this. + if not function.is_declaration: + raise RuntimeError('Redefinition of function.') + + # If the function took a different number of args, reject. + if len(function.args) != len(self.args): + raise RuntimeError('Redeclaration of a function with different number ' + 'of args.') + + # Set names for all arguments and add them to the variables symbol table. + for arg, arg_name in zip(function.args, self.args): + arg.name = arg_name + # Add arguments to variable symbol table. + g_named_values[arg_name] = arg + + return function + + # This class represents a function definition itself. + class FunctionNode(object): + + def **init**\ (self, prototype, body): self.prototype = prototype + self.body = body + + def CodeGen(self): # Clear scope. g_named_values.clear() + + :: + + # Create a function object. + function = self.prototype.CodeGen() + + # Create a new basic block to start insertion into. + block = function.append_basic_block('entry') + global g_llvm_builder + g_llvm_builder = Builder.new(block) + + # Finish off the function. + try: + return_value = self.body.CodeGen() + g_llvm_builder.ret(return_value) + + # Validate the generated code, checking for consistency. + function.verify() + + # Optimize the function. + g_llvm_pass_manager.run(function) + except: + function.delete() + raise + + return function + + Parser + ------ + + class Parser(object): + + def **init**\ (self, tokens, binop_precedence): self.tokens = tokens + self.binop_precedence = binop_precedence self.Next() + + # Provide a simple token buffer. Parser.current is the current token the + # parser is looking at. Parser.Next() reads another token from the lexer + and # updates Parser.current with its results. def Next(self): + self.current = self.tokens.next() + + # Gets the precedence of the current token, or -1 if the token is not a + binary # operator. def GetCurrentTokenPrecedence(self): if + isinstance(self.current, CharacterToken): return + self.binop_precedence.get(self.current.char, -1) else: return -1 + + # identifierexpr ::= identifier \| identifier '(' expression\* ')' def + ParseIdentifierExpr(self): identifier_name = self.current.name + self.Next() # eat identifier. + + :: + + if self.current != CharacterToken('('): # Simple variable reference. + return VariableExpressionNode(identifier_name) + + # Call. + self.Next() # eat '('. + args = [] + if self.current != CharacterToken(')'): + while True: + args.append(self.ParseExpression()) + if self.current == CharacterToken(')'): + break + elif self.current != CharacterToken(','): + raise RuntimeError('Expected ")" or "," in argument list.') + self.Next() + + self.Next() # eat ')'. + return CallExpressionNode(identifier_name, args) + + # numberexpr ::= number def ParseNumberExpr(self): result = + NumberExpressionNode(self.current.value) self.Next() # consume the + number. return result + + # parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() + # eat '('. + + :: + + contents = self.ParseExpression() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")".') + self.Next() # eat ')'. + + return contents + + # ifexpr ::= 'if' expression 'then' expression 'else' expression def + ParseIfExpr(self): self.Next() # eat the if. + + :: + + # condition. + condition = self.ParseExpression() + + if not isinstance(self.current, ThenToken): + raise RuntimeError('Expected "then".') + self.Next() # eat the then. + + then_branch = self.ParseExpression() + + if not isinstance(self.current, ElseToken): + raise RuntimeError('Expected "else".') + self.Next() # eat the else. + + else_branch = self.ParseExpression() + + return IfExpressionNode(condition, then_branch, else_branch) + + # forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' + expression def ParseForExpr(self): self.Next() # eat the for. + + :: + + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after for.') + + loop_variable = self.current.name + self.Next() # eat the identifier. + + if self.current != CharacterToken('='): + raise RuntimeError('Expected "=" after for variable.') + self.Next() # eat the '='. + + start = self.ParseExpression() + + if self.current != CharacterToken(','): + raise RuntimeError('Expected "," after for start value.') + self.Next() # eat the ','. + + end = self.ParseExpression() + + # The step value is optional. + if self.current == CharacterToken(','): + self.Next() # eat the ','. + step = self.ParseExpression() + else: + step = None + + if not isinstance(self.current, InToken): + raise RuntimeError('Expected "in" after for variable specification.') + self.Next() # eat 'in'. + + body = self.ParseExpression() + + return ForExpressionNode(loop_variable, start, end, step, body) + + # primary ::= identifierexpr \| numberexpr \| parenexpr \| ifexpr \| + forexpr def ParsePrimary(self): if isinstance(self.current, + IdentifierToken): return self.ParseIdentifierExpr() elif + isinstance(self.current, NumberToken): return self.ParseNumberExpr() + elif isinstance(self.current, IfToken): return self.ParseIfExpr() elif + isinstance(self.current, ForToken): return self.ParseForExpr() elif + self.current == CharacterToken('('): return self.ParseParenExpr() else: + raise RuntimeError('Unknown token when expecting an expression.') + + # binoprhs ::= (operator primary)\* def ParseBinOpRHS(self, left, + left_precedence): # If this is a binary operator, find its precedence. + while True: precedence = self.GetCurrentTokenPrecedence() + + :: + + # If this is a binary operator that binds at least as tightly as the + # current one, consume it; otherwise we are done. + if precedence < left_precedence: + return left + + binary_operator = self.current.char + self.Next() # eat the operator. + + # Parse the primary expression after the binary operator. + right = self.ParsePrimary() + + # If binary_operator binds less tightly with right than the operator after + # right, let the pending operator take right as its left. + next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: + right = self.ParseBinOpRHS(right, precedence + 1) + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right) + + # expression ::= primary binoprhs def ParseExpression(self): left = + self.ParsePrimary() return self.ParseBinOpRHS(left, 0) + + # prototype ::= id '(' id\* ')' def ParsePrototype(self): if not + isinstance(self.current, IdentifierToken): raise RuntimeError('Expected + function name in prototype.') + + :: + + function_name = self.current.name + self.Next() # eat function name. + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + return PrototypeNode(function_name, arg_names) + + # definition ::= 'def' prototype expression def ParseDefinition(self): + self.Next() # eat def. proto = self.ParsePrototype() body = + self.ParseExpression() return FunctionNode(proto, body) + + # toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = + PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) + + # external ::= 'extern' prototype def ParseExtern(self): self.Next() # + eat extern. return self.ParsePrototype() + + # Top-Level parsing def HandleDefinition(self): + self.Handle(self.ParseDefinition, 'Read a function definition:') + + def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') + + def HandleTopLevelExpression(self): try: function = + self.ParseTopLevelExpr().CodeGen() result = + g_llvm_executor.run_function(function, []) print 'Evaluated to:', + result.as_real(Type.double()) except Exception, e: print 'Error:', e + try: self.Next() # Skip for error recovery. except: pass + + def Handle(self, function, message): try: print message, + function().CodeGen() except Exception, e: print 'Error:', e try: + self.Next() # Skip for error recovery. except: pass + + Main driver code. + ----------------- + + def main(): # Set up the optimizer pipeline. Start with registering info + about how the # target lays out data structures. + g_llvm_pass_manager.add(g_llvm_executor.target_data) # Do simple + "peephole" optimizations and bit-twiddling optzns. + g_llvm_pass_manager.add(PASS_INSTRUCTION_COMBINING) # Reassociate + expressions. g_llvm_pass_manager.add(PASS_REASSOCIATE) # Eliminate + Common SubExpressions. g_llvm_pass_manager.add(PASS_GVN) # Simplify + the control flow graph (deleting unreachable blocks, etc). + g_llvm_pass_manager.add(PASS_CFG_SIMPLIFICATION) + + g_llvm_pass_manager.initialize() + + # Install standard binary operators. # 1 is lowest possible precedence. + 40 is the highest. operator_precedence = { '<': 10, '+': 20, '-': 20, + '\*': 40 } + + # Run the main "interpreter loop". while True: print 'ready>', try: raw + = raw_input() except KeyboardInterrupt: break + + :: + + parser = Parser(Tokenize(raw), operator_precedence) + while True: + # top ::= definition | external | expression | EOF + if isinstance(parser.current, EOFToken): + break + if isinstance(parser.current, DefToken): + parser.HandleDefinition() + elif isinstance(parser.current, ExternToken): + parser.HandleExtern() + else: + parser.HandleTopLevelExpression() + + # Print out all of the generated code. print '', g_llvm_module + + if **name** == '**main**\ ': main() diff --git a/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl6.txt b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl6.txt new file mode 100644 index 0000000..dfe95b6 --- /dev/null +++ b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl6.txt @@ -0,0 +1,1357 @@ +********************************************************************** +Chapter 6: Extending the Language: User-defined Operators +********************************************************************** + +Written by `Chris Lattner `_ and `Max +Shawabkeh `_ + +Introduction # {#intro} +======================= + +Welcome to Chapter 6 of the `Implementing a language with +LLVM `_ tutorial. At this +point in our tutorial, we now have a fully functional language that is +fairly minimal, but also useful. There is still one big problem with it, +however. Our language doesn't have many useful operators (like division, +logical negation, or even any comparisons besides less-than). + +This chapter of the tutorial takes a wild digression into adding +user-defined operators to the simple and beautiful Kaleidoscope +language. This digression now gives us a simple and ugly language in +some ways, but also a powerful one at the same time. One of the great +things about creating your own language is that you get to decide what +is good or bad. In this tutorial we'll assume that it is okay to use +this as a way to show some interesting parsing techniques. + +At the end of this tutorial, we'll run through an example Kaleidoscope +application that `renders the Mandelbrot set <#example>`_. This gives an +example of what you can build with Kaleidoscope and its feature set. + +User-defined Operators: the Idea # {#idea} +========================================== + +The "operator overloading" that we will add to Kaleidoscope is more +general than languages like C++. In C++, you are only allowed to +redefine existing operators: you can't programatically change the +grammar, introduce new operators, change precedence levels, etc. In this +chapter, we will add this capability to Kaleidoscope, which will let the +user round out the set of operators that are supported. + +The point of going into user-defined operators in a tutorial like this +is to show the power and flexibility of using a hand-written parser. +Thus far, the parser we have been implementing uses recursive descent +for most parts of the grammar and operator precedence parsing for the +expressions. See `Chapter 2 `_ for details. +Without using operator precedence parsing, it would be very difficult to +allow the programmer to introduce new operators into the grammar: the +grammar is dynamically extensible as the JIT runs. + +The two specific features we'll add are programmable unary operators +(right now, Kaleidoscope has no unary operators at all) as well as +binary operators. An example of this is: + + +.. code-block:: python + + # Logical unary not. def unary!(v) if v then 0 + else 1 + + # Define > with the same precedence as <. + def binary> 10 (LHS RHS) RHS < LHS + + # Binary "logical or", (note that it does not "short circuit"). + def binary\| 5 (LHS RHS) if LHS then 1 else if RHS then 1 else 0 + + # Define = with slightly lower precedence than relationals. + def binary= 9 (LHS RHS) !(LHS < RHS \| LHS > RHS) + + + +Many languages aspire to being able to implement their standard runtime +library in the language itself. In Kaleidoscope, we can implement +significant parts of the language in the library! + +We will break down implementation of these features into two parts: +implementing support for user-defined binary operators and adding unary +operators. + +-------------- + +User-defined Binary Operators # {#binary} +========================================= + +Adding support for user-defined binary operators is pretty simple with +our current framework. We'll first add support for the unary/binary +keywords: + + +.. code-block:: python + + class InToken(object): pass class + BinaryToken(object): pass class UnaryToken(object): pass ... def + Tokenize(string): ... elif identifier == 'in': yield InToken() elif + identifier == 'binary': yield BinaryToken() elif identifier == 'unary': + yield UnaryToken() else: yield IdentifierToken(identifier) {% + endhighlight %} + + This just adds lexer support for the unary and binary keywords, like we + did in `previous chapters `_. One nice + thing about our current AST, is that we represent binary operators with + full generalisation by using their ASCII code as the opcode. For our + extended operators, we'll use this same representation, so we don't need + any new AST or parser support. + + On the other hand, we have to be able to represent the definitions of + these new operators, in the "def binary\| 5" part of the function + definition. In our grammar so far, the "name" for the function + definition is parsed as the "prototype" production and into the + ``PrototypeNode``. To represent our new user-defined operators as + prototypes, we have to extend the ``PrototypeNode`` like this: + + {% highlight python %} # This class represents the "prototype" for a + function, which captures its name, # and its argument names (thus + implicitly the number of arguments the function # takes), as well as if + it is an operator. class PrototypeNode(object): + + def **init**\ (self, name, args, is_operator=False, precedence=0): + self.name = name self.args = args self.is_operator = is_operator + self.precedence = precedence + + def IsBinaryOp(self): return self.is_operator and len(self.args) == 2 + + def GetOperatorName(self): assert self.is_operator return self.name[-1] + + def CodeGen(self): ... + + + +Basically, in addition to knowing a name for the prototype, we now keep +track of whether it was an operator, and if it was, what precedence +level the operator is at. The precedence is only used for binary +operators (as you'll see below, it just doesn't apply for unary +operators). Now that we have a way to represent the prototype for a +user-defined operator, we need to parse it: + + +.. code-block:: python + + # prototype # ::= id '(' id\* ')' # ::= binary + LETTER number? (id, id) # ::= unary LETTER (id) def + ParsePrototype(self): precedence = None if isinstance(self.current, + IdentifierToken): kind = 'normal' function_name = self.current.name + self.Next() # eat function name. elif isinstance(self.current, + BinaryToken): kind = 'binary' self.Next() # eat 'binary'. if not + isinstance(self.current, CharacterToken): raise RuntimeError('Expected + an operator after "binary".') function_name = 'binary' + + self.current.char self.Next() # eat the operator. if + isinstance(self.current, NumberToken): if not 1 <= self.current.value <= + 100: raise RuntimeError('Invalid precedence: must be in range [1, + 100].') precedence = self.current.value self.Next() # eat the + precedence. else: raise RuntimeError('Expected function name, "unary" or + "binary" in ' 'prototype.') + + :: + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + if kind == 'binary' and len(arg_names) != 2: + raise RuntimeError('Invalid number of arguments for a binary operator.') + + return PrototypeNode(function_name, arg_names, kind != 'normal', precedence) + + + + + +This is all fairly straightforward parsing code, and we have already +seen a lot of similar code in the past. One interesting part about the +code above is the couple lines that set up ``function_name`` for +operators. This builds names like "binary@" for a newly defined "@" +operator. This then takes advantage of the fact that symbol names in the +LLVM symbol table are allowed to have any character in them. + +The next interesting thing to add, is codegen support for these binary +operators. Given our current structure, this is a simple addition of a +default case for our existing binary operator node: + + +.. code-block:: python + + def CodeGen(self): left = self.left.CodeGen() + right = self.right.CodeGen() + + :: + + if self.operator == '+': + return g_llvm_builder.fadd(left, right, 'addtmp') + elif self.operator == '-': + return g_llvm_builder.fsub(left, right, 'subtmp') + elif self.operator == '*': + return g_llvm_builder.fmul(left, right, 'multmp') + elif self.operator == '<': + result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') + # Convert bool 0 or 1 to double 0.0 or 1.0. + return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') + else: + function = g_llvm_module.get_function_named('binary' + self.operator) + return g_llvm_builder.call(function, [left, right], 'binop') + + + + + +As you can see above, the new code is actually really simple. It just +does a lookup for the appropriate operator in the symbol table and +generates a function call to it. Since user-defined operators are just +built as normal functions (because the "prototype" boils down to a +function with the right name) everything falls into place. + +The final piece of code we are missing, is a bit of top-level magic. We +will need to make the dinary precedence map global and modify it +whenever we define a new binary operator: + + +.. code-block:: python + + # The binary operator precedence chart. + g_binop_precedence = {} ... class FunctionNode(object): ... def + CodeGen(self): ... # Create a function object. function = + self.prototype.CodeGen() + + :: + + # If this is a binary operator, install its precedence. + if self.prototype.IsBinaryOp(): + operator = self.prototype.GetOperatorName() + g_binop_precedence[operator] = self.prototype.precedence + ... + # Finish off the function. + try: + ... + except: + function.delete() + if self.prototype.IsBinaryOp(): + del g_binop_precedence[self.prototype.GetOperatorName()] + raise + + return function + + ... def main(): ... g_binop_precedence['<'] = 10 + g_binop_precedence['+'] = 20 g_binop_precedence['-'] = 20 + g_binop_precedence['\*'] = 40 ... + + + +Basically, before CodeGening a function, if it is a user-defined +operator, we register it in the precedence table. This allows the binary +operator parsing logic we already have in place to handle it. Since we +are working on a fully-general operator precedence parser, this is all +we need to do to "extend the grammar". + +Now we have useful user-defined binary operators. This builds a lot on +the previous framework we built for other operators. Adding unary +operators is a bit more challenging, because we don't have any framework +for it yet - let's see what it takes. + +User-defined Unary Operators # {#unary} +======================================= + +Since we don't currently support unary operators in the Kaleidoscope +language, we'll need to add everything to support them. Above, we added +simple support for the 'unary' keyword to the lexer. In addition to +that, we need an AST node: + + +.. code-block:: python + + # Expression class for a unary operator. class + UnaryExpressionNode(ExpressionNode): + + def **init**\ (self, operator, operand): self.operator = operator + self.operand = operand + + def CodeGen(self): ... + + + +This AST node is very simple and obvious by now. It directly mirrors the +binary operator AST node, except that it only has one child. With this, +we need to add the parsing logic. Parsing a unary operator is pretty +simple: we'll add a new function to do it: + + +.. code-block:: python + + # unary ::= primary \| unary_operator unary def + ParseUnary(self): # If the current token is not an operator, it must be + a primary expression. if (not isinstance(self.current, CharacterToken) + or self.current in [CharacterToken('('), CharacterToken(',')]): return + self.ParsePrimary() + + :: + + # If this is a unary operator, read it. + operator = self.current.char + self.Next() # eat the operator. + return UnaryExpressionNode(operator, self.ParseUnary()) + + + + + +The grammar we add is pretty straightforward here. If we see a unary +operator when parsing a primary operator, we eat the operator as a +prefix and parse the remaining piece as another unary operator. This +allows us to handle multiple unary operators (e.g. ``!!x``). Note that +unary operators can't have ambiguous parses like binary operators can, +so there is no need for precedence information. + +The problem with this function, is that we need to call ParseUnary from +somewhere. To do this, we change previous callers of ParsePrimary to +call ParseUnary instead: + + +.. code-block:: python + + # binoprhs ::= (binary_operator unary)\* def + ParseBinOpRHS(self, left, left_precedence): ... # Parse the unary + expression after the binary operator. right = self.ParseUnary() ... + + # expression ::= unary binoprhs def ParseExpression(self): left = + self.ParseUnary() return self.ParseBinOpRHS(left, 0) + + + +With these two simple changes, we are now able to parse unary operators +and build the AST for them. Next up, we need to add parser support for +prototypes, to parse the unary operator prototype. We extend the binary +operator code above with: + + +.. code-block:: python + + # prototype # ::= id '(' id\* ')' # ::= binary + LETTER number? (id, id) # ::= unary LETTER (id) def + ParsePrototype(self): precedence = None if isinstance(self.current, + IdentifierToken): ... elif isinstance(self.current, UnaryToken): kind = + 'unary' self.Next() # eat 'unary'. if not isinstance(self.current, + CharacterToken): raise RuntimeError('Expected an operator after + "unary".') function_name = 'unary' + self.current.char self.Next() # + eat the operator. elif isinstance(self.current, BinaryToken): ... else: + raise RuntimeError('Expected function name, "unary" or "binary" in ' + 'prototype.') ... if kind == 'unary' and len(arg_names) != 1: raise + RuntimeError('Invalid number of arguments for a unary operator.') elif + kind == 'binary' and len(arg_names) != 2: raise RuntimeError('Invalid + number of arguments for a binary operator.') + + :: + + return PrototypeNode(function_name, arg_names, kind != 'normal', precedence) + + + + + +As with binary operators, we name unary operators with a name that +includes the operator character. This assists us at code generation +time. Speaking of, the final piece we need to add is codegen support for +unary operators. It looks like this: + + +.. code-block:: python + + class UnaryExpressionNode(ExpressionNode): ... + def CodeGen(self): operand = self.operand.CodeGen() function = + g_llvm_module.get_function_named('unary' + self.operator) return + g_llvm_builder.call(function, [operand], 'unop') + + + +This code is similar to, but simpler than, the code for binary +operators. It is simpler primarily because it doesn't need to handle any +predefined operators. + +-------------- + +Kicking the Tires # {#example} +============================== + +It is somewhat hard to believe, but with a few simple extensions we've +covered in the last chapters, we have grown a real-ish language. With +this, we can do a lot of interesting things, including I/O, math, and a +bunch of other things. For example, we can now add a nice sequencing +operator (assuming we import ``putchard`` as described in Chapter 4): + + +.. code-block:: python + + ready> def binary : 1 (x y) 0 # Low-precedence + operator that ignores operands. ... ready> extern putchard(x) ... ready> + def printd(x) putchard(x) : putchard(10) .. ready> printd(65) : + printd(66) : printd(67) A B C Evaluated to: 0.0 + + + +We can also define a bunch of other "primitive" operations, such as: + + +.. code-block:: python + + # Logical unary not. def unary!(v) if v then 0 + else 1 + + # Unary negate. + def unary-(v) 0-v + + # Define > with the same precedence as <. + def binary> 10 (LHS RHS) RHS < LHS + + # Binary logical or, which does not short circuit. + def binary\| 5 (LHS RHS) if LHS then 1 else if RHS then 1 else 0 + + # Binary logical and, which does not short circuit. + def binary& 6 (LHS RHS) if !LHS then 0 else !!RHS + + # Define = with slightly lower precedence than relationals. + def binary = 9 (LHS RHS) !(LHS < RHS \| LHS > RHS) + + + + + +Given the previous if/then/else support, we can also define interesting +functions for I/O. For example, the following prints out a character +whose "density" reflects the value passed in: the lower the value, the +denser the character: + + +.. code-block:: python + + ready> + + extern putchard(char) def printdensity(d) if d > 8 then putchard(32) # ' + ' else if d > 4 then putchard(46) # '.' else if d > 2 then putchard(43) + # '+' else putchard(42); # '*' ... ready> printdensity(1): + printdensity(2): printdensity(3) : printdensity(4): printdensity(5): + printdensity(9): putchard(10)*\ ++.. Evaluated to 0.000000 {% + endhighlight %} + + Based on these simple primitive operations, we can start to define more + interesting things. For example, here's a little function that solves + for the number of iterations it takes a function in the complex plane to + converge: + + {% highlight python %} # determine whether the specific location + diverges. # Solve for z = z^2 + c in the complex plane. def + mandelconverger(real imag iters creal cimag) if iters > 255 \| + (real\ *real + imag*\ imag > 4) then iters else + mandelconverger(real\ *real - imag*\ imag + creal, 2\ *real*\ imag + + cimag, iters+1, creal, cimag) + + # return the number of iterations required for the iteration to escape + def mandelconverge(real imag) mandelconverger(real, imag, 0, real, imag) + + + + +This "z = z2 + c" function is a beautiful little creature that is the +basis for computation of the `Mandelbrot +Set `_. Our +``mandelconverge`` function returns the number of iterations that it +takes for a complex orbit to escape, saturating to 255. This is not a +very useful function by itself, but if you plot its value over a +two-dimensional plane, you can see the Mandelbrot set. Given that we are +limited to using putchard here, our amazing graphical output is limited, +but we can whip together something using the density plotter above: + + +.. code-block:: python + + # compute and plot the mandlebrot set with the + specified 2 dimensional range # info. def mandelhelp(xmin xmax xstep + ymin ymax ystep) for y = ymin, y < ymax, ystep in ( (for x = xmin, x < + xmax, xstep in printdensity(mandleconverge(x,y))) : putchard(10) ) + + # mandel - This is a convenient helper function for ploting the mandelbrot set + # from the specified position with the specified Magnification. + def mandel(realstart imagstart realmag imagmag) mandelhelp(realstart, + realstart+realmag\ *78, realmag, imagstart, imagstart+imagmag*\ 40, + imagmag); + + + +Given this, we can try plotting out the mandlebrot set! Lets try it out: + + +.. code-block:: bash + + ready> mandel(-2.3, -1.3, 0.05, 0.07) + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++...++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++.. + ...+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++.. + ..+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++. + ..++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++.... + ..++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++....... + .....++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++. . ... + .++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++... + ++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++.... + .+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++..+++++.... + ..+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++. .......... + +++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*++++++++.. .. + .++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*++++++++++... + .++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*\*++++++++++.. + .++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*++++++..... + ..++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*+........ + ...++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*+... .... + ...++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*+++++...... + ..++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\*++++++++++... + .++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*++++++++++... + ++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*+++++++++.. .. + ..++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*++++++.. .......... + +++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++...+++..... + ..+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++.... + ..++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++... + +++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++.. . ... + .++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++....... + ......+++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++.... + ..++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++.. + ..++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++.. + ...+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++.. + ...+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++....+++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + Evaluated to 0.0 ready> mandel(-2, -1, 0.02, 0.04) + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++ + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++ + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++ + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++ + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++++ + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++....... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++.......... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++... + ... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++...... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++....... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++.......... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++........... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++++......... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++...........+++++.............. + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++.... + ......................... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++.... ......... + ............ \*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++..... ...... + \*\*\*\*\*\*\*\*+++++++++++++++++++++++....... + \*\*\*\*\*\*+++++++++++++++++++++++++........ + \*\*\*\*+++++++++++++++++++++++++....... + ***+++++++++++++++++++++++.........**\ ++++++++++++++++...........*\ ++++++++++++................ + \*++++.................... + + *++++....................*\ ++++++++++++................ + **++++++++++++++++...........**\ *+++++++++++++++++++++++......... + \*\*\*\*+++++++++++++++++++++++++....... + \*\*\*\*\*\*+++++++++++++++++++++++++........ + \*\*\*\*\*\*\*\*+++++++++++++++++++++++....... + \*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++..... ...... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++.... ......... + ............ \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++.... + ......................... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++...........+++++.............. + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++++......... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++........... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++++.......... + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++....... + Evaluated to: 0.0 ready> mandel(-0.9, -1.4, 0.02, 0.03) + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++...++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++.. . + .++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++... + ......++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*\*\*+++++++++++++++++++... + .......+++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*\*\*++++++++++++++++++++.... .... + ..++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*\*\*++++++++++++++++++++++...... + ...++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*\*\*+++++++++++++++++++++++....... + .....++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*\*\*++++++++++++++++++++++++....... + .....+++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + \*\*\*\*+++++++++++++++++++++++++.... . + .....+++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* + **+++++++++++++++++++++++++.... + ...++++++++++++++++**\ \*\*\*\*\*\*\*\*\*\*\**\ +++++++++++++++++++++++....... + ....++++++++++++++++\*\*\*\*\*\*\*\*\*\*\*\* + +++++++++++++++++++++.......... + .....++++++++++++++++\*\*\*\*\*\*\*\*\*\*\* + ++++++++++++++++++............. + .......+++++++++++++++\*\*\*\*\*\*\*\*\*\* + +++++++++++++++................ + ............++++++++++\*\*\*\*\*\*\*\*\*\* + +++++++++++++................. .................+++++\*\*\*\*\*\*\*\*\* + +++++++++++... .... .......... .+++++\*\*\*\*\*\*\*\* ++++++++++..... + ........ ...+++++\*\*\*\*\*\*\* ++++++++...... ..++++++\*\*\*\*\*\* + +++++++........ ..+++++\*\*\*\*\*\* +++++.......... ..++++++\*\*\*\*\* + ++++.......... ....++++++\*\*\*\*\* ++.......... ....+++++++\*\*\*\* + .......... ......+++++++\ **\* .......... .....+++++++**\ \* .......... + .....++++++\ **\* ......... .+++++++** ........ .+++++++\ *\* ...... + ...+++++++* . ....++++++++\* ...++++++++\* ..+++++++++ ..+++++++++ + Evaluated to: 0.0 ready> ^C + + + +At this point, you may be starting to realize that Kaleidoscope is a +real and powerful language. It may not be self-similar :), but it can be +used to plot things that are! + +With this, we conclude the "adding user-defined operators" chapter of +the tutorial. We have successfully augmented our language, adding the +ability to extend the language in the library, and we have shown how +this can be used to build a simple but interesting end-user application +in Kaleidoscope. At this point, Kaleidoscope can build a variety of +applications that are functional and can call functions with +side-effects, but it can't actually define and mutate a variable itself. + +Strikingly, variable mutation is an important feature of some languages, +and it is not at all obvious how to `add support for mutable +variables `_ without having to add an "SSA +construction" phase to your front-end. In the next chapter, we will +describe how you can add variable mutation without building SSA in your +front-end. + +-------------- + +Full Code Listing # {#code} +=========================== + +Here is the complete code listing for our running example, enhanced with +the if/then/else and for expressions: + + +.. code-block:: python + + #!/usr/bin/env python + + import re from llvm.core import Module, Constant, Type, Function, + Builder from llvm.ee import ExecutionEngine, TargetData from llvm.passes + import FunctionPassManager + + from llvm.core import FCMP_ULT, FCMP_ONE from llvm.passes import + (PASS_INSTRUCTION_COMBINING, PASS_REASSOCIATE, PASS_GVN, + PASS_CFG_SIMPLIFICATION) + + Globals + ------- + + # The LLVM module, which holds all the IR code. + g_llvm_module = Module.new('my cool jit') + + # The LLVM instruction builder. Created whenever a new function is entered. + g_llvm_builder = None + + # A dictionary that keeps track of which values are defined in the current scope + # and what their LLVM representation is. + g_named_values = {} + + # The function optimization passes manager. + g_llvm_pass_manager = FunctionPassManager.new(g_llvm_module) + + # The LLVM execution engine. + g_llvm_executor = ExecutionEngine.new(g_llvm_module) + + # The binary operator precedence chart. + g_binop_precedence = {} + + Lexer + ----- + + # The lexer yields one of these types for each token. + class EOFToken(object): pass class DefToken(object): pass class + ExternToken(object): pass class IfToken(object): pass class + ThenToken(object): pass class ElseToken(object): pass class + ForToken(object): pass class InToken(object): pass class + BinaryToken(object): pass class UnaryToken(object): pass + + class IdentifierToken(object): def **init**\ (self, name): self.name = + name + + class NumberToken(object): def **init**\ (self, value): self.value = + value + + class CharacterToken(object): def **init**\ (self, char): self.char = + char def **eq**\ (self, other): return isinstance(other, CharacterToken) + and self.char == other.char def **ne**\ (self, other): return not self + == other + + # Regular expressions that tokens and comments of our language. + REGEX_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX_IDENTIFIER = + re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX_COMMENT = re.compile('#.*') + + def Tokenize(string): while string: # Skip whitespace. if + string[0].isspace(): string = string[1:] continue + + :: + + # Run regexes. + comment_match = REGEX_COMMENT.match(string) + number_match = REGEX_NUMBER.match(string) + identifier_match = REGEX_IDENTIFIER.match(string) + + # Check if any of the regexes matched and yield the appropriate result. + if comment_match: + comment = comment_match.group(0) + string = string[len(comment):] + elif number_match: + number = number_match.group(0) + yield NumberToken(float(number)) + string = string[len(number):] + elif identifier_match: + identifier = identifier_match.group(0) + # Check if we matched a keyword. + if identifier == 'def': + yield DefToken() + elif identifier == 'extern': + yield ExternToken() + elif identifier == 'if': + yield IfToken() + elif identifier == 'then': + yield ThenToken() + elif identifier == 'else': + yield ElseToken() + elif identifier == 'for': + yield ForToken() + elif identifier == 'in': + yield InToken() + elif identifier == 'binary': + yield BinaryToken() + elif identifier == 'unary': + yield UnaryToken() + else: + yield IdentifierToken(identifier) + string = string[len(identifier):] + else: + # Yield the ASCII value of the unknown character. + yield CharacterToken(string[0]) + string = string[1:] + + yield EOFToken() + + Abstract Syntax Tree (aka Parse Tree) + ------------------------------------- + + # Base class for all expression nodes. + class ExpressionNode(object): pass + + # Expression class for numeric literals like "1.0". + class NumberExpressionNode(ExpressionNode): + + def **init**\ (self, value): self.value = value + + def CodeGen(self): return Constant.real(Type.double(), self.value) + + # Expression class for referencing a variable, like "a". + class VariableExpressionNode(ExpressionNode): + + def **init**\ (self, name): self.name = name + + def CodeGen(self): if self.name in g_named_values: return + g_named_values[self.name] else: raise RuntimeError('Unknown variable + name: ' + self.name) + + # Expression class for a binary operator. + class BinaryOperatorExpressionNode(ExpressionNode): + + def **init**\ (self, operator, left, right): self.operator = operator + self.left = left self.right = right + + def CodeGen(self): left = self.left.CodeGen() right = + self.right.CodeGen() + + :: + + if self.operator == '+': + return g_llvm_builder.fadd(left, right, 'addtmp') + elif self.operator == '-': + return g_llvm_builder.fsub(left, right, 'subtmp') + elif self.operator == '*': + return g_llvm_builder.fmul(left, right, 'multmp') + elif self.operator == '<': + result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') + # Convert bool 0 or 1 to double 0.0 or 1.0. + return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') + else: + function = g_llvm_module.get_function_named('binary' + self.operator) + return g_llvm_builder.call(function, [left, right], 'binop') + + # Expression class for function calls. + class CallExpressionNode(ExpressionNode): + + def **init**\ (self, callee, args): self.callee = callee self.args = + args + + def CodeGen(self): # Look up the name in the global module table. callee + = g_llvm_module.get_function_named(self.callee) + + :: + + # Check for argument mismatch error. + if len(callee.args) != len(self.args): + raise RuntimeError('Incorrect number of arguments passed.') + + arg_values = [i.CodeGen() for i in self.args] + + return g_llvm_builder.call(callee, arg_values, 'calltmp') + + # Expression class for if/then/else. + class IfExpressionNode(ExpressionNode): + + def **init**\ (self, condition, then_branch, else_branch): + self.condition = condition self.then_branch = then_branch + self.else_branch = else_branch + + def CodeGen(self): condition = self.condition.CodeGen() + + :: + + # Convert condition to a bool by comparing equal to 0.0. + condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, condition, Constant.real(Type.double(), 0), 'ifcond') + + function = g_llvm_builder.basic_block.function + + # Create blocks for the then and else cases. Insert the 'then' block at the + # end of the function. + then_block = function.append_basic_block('then') + else_block = function.append_basic_block('else') + merge_block = function.append_basic_block('ifcond') + + g_llvm_builder.cbranch(condition_bool, then_block, else_block) + + # Emit then value. + g_llvm_builder.position_at_end(then_block) + then_value = self.then_branch.CodeGen() + g_llvm_builder.branch(merge_block) + + # Codegen of 'Then' can change the current block; update then_block for the + # PHI node. + then_block = g_llvm_builder.basic_block + + # Emit else block. + g_llvm_builder.position_at_end(else_block) + else_value = self.else_branch.CodeGen() + g_llvm_builder.branch(merge_block) + + # Codegen of 'Else' can change the current block, update else_block for the + # PHI node. + else_block = g_llvm_builder.basic_block + + # Emit merge block. + g_llvm_builder.position_at_end(merge_block) + phi = g_llvm_builder.phi(Type.double(), 'iftmp') + phi.add_incoming(then_value, then_block) + phi.add_incoming(else_value, else_block) + + return phi + + # Expression class for for/in. + class ForExpressionNode(ExpressionNode): + + def **init**\ (self, loop_variable, start, end, step, body): + self.loop_variable = loop_variable self.start = start self.end = end + self.step = step self.body = body + + def CodeGen(self): # Output this as: # ... # start = startexpr # goto + loop # loop: # variable = phi [start, loopheader], [nextvariable, + loopend] # ... # bodyexpr # ... # loopend: # step = stepexpr # + nextvariable = variable + step # endcond = endexpr # br endcond, loop, + endloop # outloop: + + :: + + # Emit the start code first, without 'variable' in scope. + start_value = self.start.CodeGen() + + # Make the new basic block for the loop header, inserting after current + # block. + function = g_llvm_builder.basic_block.function + pre_header_block = g_llvm_builder.basic_block + loop_block = function.append_basic_block('loop') + + # Insert an explicit fallthrough from the current block to the loop_block. + g_llvm_builder.branch(loop_block) + + # Start insertion in loop_block. + g_llvm_builder.position_at_end(loop_block) + + # Start the PHI node with an entry for start. + variable_phi = g_llvm_builder.phi(Type.double(), self.loop_variable) + variable_phi.add_incoming(start_value, pre_header_block) + + # Within the loop, the variable is defined equal to the PHI node. If it + # shadows an existing variable, we have to restore it, so save it now. + old_value = g_named_values.get(self.loop_variable, None) + g_named_values[self.loop_variable] = variable_phi + + # Emit the body of the loop. This, like any other expr, can change the + # current BB. Note that we ignore the value computed by the body. + self.body.CodeGen() + + # Emit the step value. + if self.step: + step_value = self.step.CodeGen() + else: + # If not specified, use 1.0. + step_value = Constant.real(Type.double(), 1) + + next_value = g_llvm_builder.fadd(variable_phi, step_value, 'next') + + # Compute the end condition and convert it to a bool by comparing to 0.0. + end_condition = self.end.CodeGen() + end_condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, end_condition, Constant.real(Type.double(), 0), 'loopcond') + + # Create the "after loop" block and insert it. + loop_end_block = g_llvm_builder.basic_block + after_block = function.append_basic_block('afterloop') + + # Insert the conditional branch into the end of loop_end_block. + g_llvm_builder.cbranch(end_condition_bool, loop_block, after_block) + + # Any new code will be inserted in after_block. + g_llvm_builder.position_at_end(after_block) + + # Add a new entry to the PHI node for the backedge. + variable_phi.add_incoming(next_value, loop_end_block) + + # Restore the unshadowed variable. + if old_value: + g_named_values[self.loop_variable] = old_value + else: + del g_named_values[self.loop_variable] + + # for expr always returns 0.0. + return Constant.real(Type.double(), 0) + + # Expression class for a unary operator. + class UnaryExpressionNode(ExpressionNode): + + def **init**\ (self, operator, operand): self.operator = operator + self.operand = operand + + def CodeGen(self): operand = self.operand.CodeGen() function = + g_llvm_module.get_function_named('unary' + self.operator) return + g_llvm_builder.call(function, [operand], 'unop') + + # This class represents the "prototype" for a function, which captures its name, + # and its argument names (thus implicitly the number of arguments the function + # takes), as well as if it is an operator. + class PrototypeNode(object): + + def **init**\ (self, name, args, is_operator=False, precedence=0): + self.name = name self.args = args self.is_operator = is_operator + self.precedence = precedence + + def IsBinaryOp(self): return self.is_operator and len(self.args) == 2 + + def GetOperatorName(self): assert self.is_operator return self.name[-1] + + def CodeGen(self): # Make the function type, eg. double(double,double). + funct_type = Type.function( Type.double(), [Type.double()] \* + len(self.args), False) + + :: + + function = Function.new(g_llvm_module, funct_type, self.name) + + # If the name conflicted, there was already something with the same name. + # If it has a body, don't allow redefinition or reextern. + if function.name != self.name: + function.delete() + function = g_llvm_module.get_function_named(self.name) + + # If the function already has a body, reject this. + if not function.is_declaration: + raise RuntimeError('Redefinition of function.') + + # If the function took a different number of args, reject. + if len(function.args) != len(self.args): + raise RuntimeError('Redeclaration of a function with different number ' + 'of args.') + + # Set names for all arguments and add them to the variables symbol table. + for arg, arg_name in zip(function.args, self.args): + arg.name = arg_name + # Add arguments to variable symbol table. + g_named_values[arg_name] = arg + + return function + + # This class represents a function definition itself. + class FunctionNode(object): + + def **init**\ (self, prototype, body): self.prototype = prototype + self.body = body + + def CodeGen(self): # Clear scope. g_named_values.clear() + + :: + + # Create a function object. + function = self.prototype.CodeGen() + + # If this is a binary operator, install its precedence. + if self.prototype.IsBinaryOp(): + operator = self.prototype.GetOperatorName() + g_binop_precedence[operator] = self.prototype.precedence + + # Create a new basic block to start insertion into. + block = function.append_basic_block('entry') + global g_llvm_builder + g_llvm_builder = Builder.new(block) + + # Finish off the function. + try: + return_value = self.body.CodeGen() + g_llvm_builder.ret(return_value) + + # Validate the generated code, checking for consistency. + function.verify() + + # Optimize the function. + g_llvm_pass_manager.run(function) + except: + function.delete() + if self.prototype.IsBinaryOp(): + del g_binop_precedence[self.prototype.GetOperatorName()] + raise + + return function + + Parser + ------ + + class Parser(object): + + def **init**\ (self, tokens): self.tokens = tokens self.Next() + + # Provide a simple token buffer. Parser.current is the current token the + # parser is looking at. Parser.Next() reads another token from the lexer + and # updates Parser.current with its results. def Next(self): + self.current = self.tokens.next() + + # Gets the precedence of the current token, or -1 if the token is not a + binary # operator. def GetCurrentTokenPrecedence(self): if + isinstance(self.current, CharacterToken): return + g_binop_precedence.get(self.current.char, -1) else: return -1 + + # identifierexpr ::= identifier \| identifier '(' expression\* ')' def + ParseIdentifierExpr(self): identifier_name = self.current.name + self.Next() # eat identifier. + + :: + + if self.current != CharacterToken('('): # Simple variable reference. + return VariableExpressionNode(identifier_name) + + # Call. + self.Next() # eat '('. + args = [] + if self.current != CharacterToken(')'): + while True: + args.append(self.ParseExpression()) + if self.current == CharacterToken(')'): + break + elif self.current != CharacterToken(','): + raise RuntimeError('Expected ")" or "," in argument list.') + self.Next() + + self.Next() # eat ')'. + return CallExpressionNode(identifier_name, args) + + # numberexpr ::= number def ParseNumberExpr(self): result = + NumberExpressionNode(self.current.value) self.Next() # consume the + number. return result + + # parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() + # eat '('. + + :: + + contents = self.ParseExpression() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")".') + self.Next() # eat ')'. + + return contents + + # ifexpr ::= 'if' expression 'then' expression 'else' expression def + ParseIfExpr(self): self.Next() # eat the if. + + :: + + # condition. + condition = self.ParseExpression() + + if not isinstance(self.current, ThenToken): + raise RuntimeError('Expected "then".') + self.Next() # eat the then. + + then_branch = self.ParseExpression() + + if not isinstance(self.current, ElseToken): + raise RuntimeError('Expected "else".') + self.Next() # eat the else. + + else_branch = self.ParseExpression() + + return IfExpressionNode(condition, then_branch, else_branch) + + # forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' + expression def ParseForExpr(self): self.Next() # eat the for. + + :: + + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after for.') + + loop_variable = self.current.name + self.Next() # eat the identifier. + + if self.current != CharacterToken('='): + raise RuntimeError('Expected "=" after for variable.') + self.Next() # eat the '='. + + start = self.ParseExpression() + + if self.current != CharacterToken(','): + raise RuntimeError('Expected "," after for start value.') + self.Next() # eat the ','. + + end = self.ParseExpression() + + # The step value is optional. + if self.current == CharacterToken(','): + self.Next() # eat the ','. + step = self.ParseExpression() + else: + step = None + + if not isinstance(self.current, InToken): + raise RuntimeError('Expected "in" after for variable specification.') + self.Next() # eat 'in'. + + body = self.ParseExpression() + + return ForExpressionNode(loop_variable, start, end, step, body) + + # primary ::= identifierexpr \| numberexpr \| parenexpr \| ifexpr \| + forexpr def ParsePrimary(self): if isinstance(self.current, + IdentifierToken): return self.ParseIdentifierExpr() elif + isinstance(self.current, NumberToken): return self.ParseNumberExpr() + elif isinstance(self.current, IfToken): return self.ParseIfExpr() elif + isinstance(self.current, ForToken): return self.ParseForExpr() elif + self.current == CharacterToken('('): return self.ParseParenExpr() else: + raise RuntimeError('Unknown token when expecting an expression.') + + # unary ::= primary \| unary_operator unary def ParseUnary(self): # If + the current token is not an operator, it must be a primary expression. + if (not isinstance(self.current, CharacterToken) or self.current in + [CharacterToken('('), CharacterToken(',')]): return self.ParsePrimary() + + :: + + # If this is a unary operator, read it. + operator = self.current.char + self.Next() # eat the operator. + return UnaryExpressionNode(operator, self.ParseUnary()) + + # binoprhs ::= (binary_operator unary)\* def ParseBinOpRHS(self, left, + left_precedence): # If this is a binary operator, find its precedence. + while True: precedence = self.GetCurrentTokenPrecedence() + + :: + + # If this is a binary operator that binds at least as tightly as the + # current one, consume it; otherwise we are done. + if precedence < left_precedence: + return left + + binary_operator = self.current.char + self.Next() # eat the operator. + + # Parse the unary expression after the binary operator. + right = self.ParseUnary() + + # If binary_operator binds less tightly with right than the operator after + # right, let the pending operator take right as its left. + next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: + right = self.ParseBinOpRHS(right, precedence + 1) + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right) + + # expression ::= unary binoprhs def ParseExpression(self): left = + self.ParseUnary() return self.ParseBinOpRHS(left, 0) + + # prototype # ::= id '(' id\* ')' # ::= binary LETTER number? (id, id) # + ::= unary LETTER (id) def ParsePrototype(self): precedence = None if + isinstance(self.current, IdentifierToken): kind = 'normal' + function_name = self.current.name self.Next() # eat function name. elif + isinstance(self.current, UnaryToken): kind = 'unary' self.Next() # eat + 'unary'. if not isinstance(self.current, CharacterToken): raise + RuntimeError('Expected an operator after "unary".') function_name = + 'unary' + self.current.char self.Next() # eat the operator. elif + isinstance(self.current, BinaryToken): kind = 'binary' self.Next() # eat + 'binary'. if not isinstance(self.current, CharacterToken): raise + RuntimeError('Expected an operator after "binary".') function_name = + 'binary' + self.current.char self.Next() # eat the operator. if + isinstance(self.current, NumberToken): if not 1 <= self.current.value <= + 100: raise RuntimeError('Invalid precedence: must be in range [1, + 100].') precedence = self.current.value self.Next() # eat the + precedence. else: raise RuntimeError('Expected function name, "unary" or + "binary" in ' 'prototype.') + + :: + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + if kind == 'unary' and len(arg_names) != 1: + raise RuntimeError('Invalid number of arguments for a unary operator.') + elif kind == 'binary' and len(arg_names) != 2: + raise RuntimeError('Invalid number of arguments for a binary operator.') + + return PrototypeNode(function_name, arg_names, kind != 'normal', precedence) + + # definition ::= 'def' prototype expression def ParseDefinition(self): + self.Next() # eat def. proto = self.ParsePrototype() body = + self.ParseExpression() return FunctionNode(proto, body) + + # toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = + PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) + + # external ::= 'extern' prototype def ParseExtern(self): self.Next() # + eat extern. return self.ParsePrototype() + + # Top-Level parsing def HandleDefinition(self): + self.Handle(self.ParseDefinition, 'Read a function definition:') + + def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') + + def HandleTopLevelExpression(self): try: function = + self.ParseTopLevelExpr().CodeGen() result = + g_llvm_executor.run_function(function, []) print 'Evaluated to:', + result.as_real(Type.double()) except Exception, e: print 'Error:', e + try: self.Next() # Skip for error recovery. except: pass + + def Handle(self, function, message): try: print message, + function().CodeGen() except Exception, e: print 'Error:', e try: + self.Next() # Skip for error recovery. except: pass + + Main driver code. + ----------------- + + def main(): # Set up the optimizer pipeline. Start with registering info + about how the # target lays out data structures. + g_llvm_pass_manager.add(g_llvm_executor.target_data) # Do simple + "peephole" optimizations and bit-twiddling optzns. + g_llvm_pass_manager.add(PASS_INSTRUCTION_COMBINING) # Reassociate + expressions. g_llvm_pass_manager.add(PASS_REASSOCIATE) # Eliminate + Common SubExpressions. g_llvm_pass_manager.add(PASS_GVN) # Simplify + the control flow graph (deleting unreachable blocks, etc). + g_llvm_pass_manager.add(PASS_CFG_SIMPLIFICATION) + + g_llvm_pass_manager.initialize() + + # Install standard binary operators. # 1 is lowest possible precedence. + 40 is the highest. g_binop_precedence['<'] = 10 + g_binop_precedence['+'] = 20 g_binop_precedence['-'] = 20 + g_binop_precedence['\*'] = 40 + + # Run the main "interpreter loop". while True: print 'ready>', try: raw + = raw_input() except KeyboardInterrupt: break + + :: + + parser = Parser(Tokenize(raw)) + while True: + # top ::= definition | external | expression | EOF + if isinstance(parser.current, EOFToken): + break + if isinstance(parser.current, DefToken): + parser.HandleDefinition() + elif isinstance(parser.current, ExternToken): + parser.HandleExtern() + else: + parser.HandleTopLevelExpression() + + # Print out all of the generated code. print '', g_llvm_module + + if **name** == '**main**\ ': main() diff --git a/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl7.txt b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl7.txt new file mode 100644 index 0000000..576b5b4 --- /dev/null +++ b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl7.txt @@ -0,0 +1,1611 @@ +******************************************************************************* +Chapter 7: Extending the Language: Mutable Variables / SSA construction +******************************************************************************* + +Written by `Chris Lattner `_ and `Max +Shawabkeh `_ + +Introduction # {#intro} +======================= + +Welcome to Chapter 7 of the `Implementing a language with +LLVM `_ tutorial. In +chapters 1 through 6, we've built a very respectable, albeit simple, +`functional programming +language `_. In our +journey, we learned some parsing techniques, how to build and represent +an AST, how to build LLVM IR, and how to optimize the resultant code as +well as JIT compile it. + +While Kaleidoscope is interesting as a functional language, the fact +that it is functional makes it "too easy" to generate LLVM IR for it. In +particular, a functional language makes it very easy to build LLVM IR +directly in `SSA +form `_. +Since LLVM requires that the input code be in SSA form, this is a very +nice property and it is often unclear to newcomers how to generate code +for an imperative language with mutable variables. + +The short (and happy) summary of this chapter is that there is no need +for your front-end to build SSA form: LLVM provides highly tuned and +well tested support for this, though the way it works is a bit +unexpected for some. + +Why is this a hard problem? # {#why} +==================================== + +To understand why mutable variables cause complexities in SSA +construction, consider this extremely simple C example: + + +.. code-block:: python + + int G, H; int test(_Bool Condition) { int X; if + (Condition) X = G; else X = H; return X; } + + + +In this case, we have the variable "X", whose value depends on the path +executed in the program. Because there are two different possible values +for X before the return instruction, a PHI node is inserted to merge the +two values. The LLVM IR that we want for this example looks like this: + + +.. code-block:: llvm + + @G = weak global i32 0 ; type of @G is i32\* @H = + weak global i32 0 ; type of @H is i32\* define i32 @test(i1 %Condition) + { entry: br i1 %Condition, label %cond_true, label %cond_false + cond_true: %X.0 = load i32\* @G br label %cond_next cond_false: %X.1 + = load i32\* @H br label %cond_next cond_next: %X.2 = phi i32 [ %X.1, + %cond_false ], [ %X.0, %cond_true ] ret i32 %X.2 } + + + +In this example, the loads from the G and H global variables are +explicit in the LLVM IR, and they live in the then/else branches of the +if statement (cond\_true/cond\_false). In order to merge the incoming +values, the X.2 phi node in the cond\_next block selects the right value +to use based on where control flow is coming from: if control flow comes +from the cond\_false block, X.2 gets the value of X.1. Alternatively, if +control flow comes from cond\_true, it gets the value of X.0. The intent +of this chapter is not to explain the details of SSA form. For more +information, see one of the many `online +references `_. + +The question for this article is "who places the phi nodes when lowering +assignments to mutable variables?". The issue here is that LLVM +*requires* that its IR be in SSA form: there is no "non-ssa" mode for +it. However, SSA construction requires non-trivial algorithms and data +structures, so it is inconvenient and wasteful for every front-end to +have to reproduce this logic. + +Memory in LLVM # {#memory} +========================== + +The 'trick' here is that while LLVM does require all register values to +be in SSA form, it does not require (or permit) memory objects to be in +SSA form. In the example above, note that the loads from G and H are +direct accesses to G and H: they are not renamed or versioned. This +differs from some other compiler systems, which do try to version memory +objects. In LLVM, instead of encoding dataflow analysis of memory into +the LLVM IR, it is handled with `Analysis +Passes `_ which are +computed on demand. + +With this in mind, the high-level idea is that we want to make a stack +variable (which lives in memory, because it is on the stack) for each +mutable object in a function. To take advantage of this trick, we need +to talk about how LLVM represents stack variables. + +In LLVM, all memory accesses are explicit with load/store instructions, +and it is carefully designed not to have (or need) an "address-of" +operator. Notice how the type of the @G/@H global variables is actually +"i32\ *" even though the variable is defined as "i32". What this means +is that @G defines*\ space\* for an i32 in the global data area, but its +*name* actually refers to the address for that space. Stack variables +work the same way, except that instead of being declared with global +variable definitions, they are declared with the `LLVM alloca +instruction `_: + + +.. code-block:: python + + define i32 @example() { entry: %X = alloca i32 ; + type of %X is i32\ *. ... %tmp = load i32* %X ; load the stack value %X + from the stack. %tmp2 = add i32 %tmp, 1 ; increment it store i32 %tmp2, + i32\* %X ; store it back ... + + + +This code shows an example of how you can declare and manipulate a stack +variable in the LLVM IR. Stack memory allocated with the alloca +instruction is fully general: you can pass the address of the stack slot +to functions, you can store it in other variables, etc. In our example +above, we could rewrite the example to use the alloca technique to avoid +using a PHI node: + + +.. code-block:: llvm + + @G = weak global i32 0 ; type of @G is i32\* @H = + weak global i32 0 ; type of @H is i32\* define i32 @test(i1 %Condition) + { entry: %X = alloca i32 ; type of %X is i32\ *. br i1 %Condition, label + %cond_true, label %cond_false cond_true: %X.0 = load i32* @G store + i32 %X.0, i32\* %X ; Update X br label %cond_next cond_false: %X.1 = + load i32\* @H store i32 %X.1, i32\* %X ; Update X br label %cond_next + cond_next: %X.2 = load i32\* %X ; Read X ret i32 %X.2 } {% endhighlight + %} + + With this, we have discovered a way to handle arbitrary mutable + variables without the need to create Phi nodes at all: + + .. raw:: html + +
    +
  1. + + Each mutable variable becomes a stack allocation. + + .. raw:: html + +
  2. +
  3. + + Each read of the variable becomes a load from the stack. + + .. raw:: html + +
  4. +
  5. + + Each update of the variable becomes a store to the stack. + + .. raw:: html + +
  6. +
  7. + + Taking the address of a variable just uses the stack address directly. + + .. raw:: html + +
  8. +
+ + While this solution has solved our immediate problem, it introduced + another one: we have now apparently introduced a lot of stack traffic + for very simple and common operations, a major performance problem. + Fortunately for us, the LLVM optimizer has a highly-tuned optimization + pass named "mem2reg" that handles this case, promoting allocas like this + into SSA registers, inserting Phi nodes as appropriate. If you run this + example through the pass, for example, you'll get: + + {% highlight bash %} $ llvm-as < example.ll \| opt -mem2reg \| llvm-dis + + + + + +.. code-block:: llvm + + @G = weak global i32 0 @H = weak global i32 0 + define i32 @test(i1 %Condition) { entry: br i1 %Condition, label + %cond_true, label %cond_false cond_true: %X.0 = load i32\* @G br + label %cond_next cond_false: %X.1 = load i32\* @H br label %cond_next + cond_next: %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true + ] ret i32 %X.01 } + + + +The mem2reg pass implements the standard "iterated dominance frontier" +algorithm for constructing SSA form and has a number of optimizations +that speed up (very common) degenerate cases. The mem2reg optimization +pass is the answer to dealing with mutable variables, and we highly +recommend that you depend on it. Note that mem2reg only works on +variables in certain circumstances: + +- mem2reg is alloca-driven: it looks for allocas and if it can handle + them, it promotes them. It does not apply to global variables or heap + allocations. + +- mem2reg only looks for alloca instructions in the entry block of the + function. Being in the entry block guarantees that the alloca is only + executed once, which makes analysis simpler. + +- mem2reg only promotes allocas whose uses are direct loads and stores. + If the address of the stack object is passed to a function, or if any + funny pointer arithmetic is involved, the alloca will not be + promoted. + +- mem2reg only works on allocas of `first + class `_ + values (such as pointers, scalars and vectors), and only if the array + size of the allocation is 1 (or missing in the .ll file). mem2reg is + not capable of promoting structs or arrays to registers. Note that + the "scalarrepl" pass is more powerful and can promote structs, + "unions", and arrays in many cases. + +All of these properties are easy to satisfy for most imperative +languages, and we'll illustrate it below with Kaleidoscope. The final +question you may be asking is: should I bother with this nonsense for my +front-end? Wouldn't it be better if I just did SSA construction +directly, avoiding use of the mem2reg optimization pass? In short, we +strongly recommend that you use this technique for building SSA form, +unless there is an extremely good reason not to. Using this technique +is: + +- Proven and well tested: llvm-gcc and clang both use this technique + for local mutable variables. As such, the most common clients of LLVM + are using this to handle a bulk of their variables. You can be sure + that bugs are found fast and fixed early. + +- Extremely Fast: mem2reg has a number of special cases that make it + fast in common cases as well as fully general. For example, it has + fast-paths for variables that are only used in a single block, + variables that only have one assignment point, good heuristics to + avoid insertion of unneeded phi nodes, etc. + +- Needed for debug info generation: `Debug information in + LLVM `_ relies on + having the address of the variable exposed so that debug info can be + attached to it. This technique dovetails very naturally with this + style of debug info. + +If nothing else, this makes it much easier to get your front-end up and +running, and is very simple to implement. Lets extend Kaleidoscope with +mutable variables now! + +-------------- + +Mutable Variables in Kaleidoscope # {#kalvars} +============================================== + +Now that we know the sort of problem we want to tackle, lets see what +this looks like in the context of our little Kaleidoscope language. +We're going to add two features: + +- The ability to mutate variables with the '=' operator. +- The ability to define new variables. + +While the first item is really what this is about, we only have +variables for incoming arguments as well as for induction variables, and +redefining those only goes so far :). Also, the ability to define new +variables is a useful thing regardless of whether you will be mutating +them. Here's a motivating example that shows how we could use these: + + +.. code-block:: python + + # Define ':' for sequencing: as a low-precedence + operator that ignores operands # and just returns the RHS. def binary : + 1 (x y) y; + + # Recursive fib, we could do this before. + def fib(x) if (x < 3) then 1 else fib(x-1) + fib(x-2) + + # Iterative fib. + def fibi(x) var a = 1, b = 1, c in (for i = 3, i < x in c = a + b : a = + b : b = c) : b + + # Call it. + fibi(10) + + + +In order to mutate variables, we have to change our existing variables +to use the "alloca trick". Once we have that, we'll add our new +operator, then extend Kaleidoscope to support new variable definitions. + +-------------- + +Adjusting Existing Variables for Mutation # {#adjustments} +========================================================== + +The symbol table in Kaleidoscope is managed at code generation time by +the ``g_named_values`` map. This map currently keeps track of the LLVM +"Value" that holds the double value for the named variable. In order to +support mutation, we need to change this slightly, so that it holds the +*memory location* of the variable in question. Note that this change is +a refactoring: it changes the structure of the code, but does not (by +itself) change the behavior of the compiler. All of these changes are +isolated in the Kaleidoscope code generator. + +At this point in Kaleidoscope's development, it only supports variables +for two things: incoming arguments to functions and the induction +variable of 'for' loops. For consistency, we'll allow mutation of these +variables in addition to other user-defined variables. This means that +these will both need memory locations. + +To start our transformation of Kaleidoscope, we will need to create the +allocas that we will store in ``g_named_values``. We'll use a helper +function that ensures that the allocas are created in the entry block of +the function: + + +.. code-block:: python + + # Creates an alloca instruction in the entry + block of the function. This is used # for mutable variables. def + CreateEntryBlockAlloca(function, var_name): entry = + function.get_entry_basic_block() builder = Builder.new(entry) + builder.position_at_beginning(entry) return + builder.alloca(Type.double(), var_name) + + + +This code creates a temporary ``llvm.core.Builder`` that is pointing at +the first instruction of the entry block. It then creates an alloca with +the expected name and returns it. Because all values in Kaleidoscope are +doubles, there is no need to pass in a type to use. + +With this in place, the first functionality change we want to make is to +variable references. In our new scheme, variables live on the stack, so +code generating a reference to them actually needs to produce a load +from the stack slot: + + +.. code-block:: python + + def CodeGen(self): if self.name in + g_named_values: return + g_llvm_builder.load(g_named_values[self.name], self.name) else: + raise RuntimeError('Unknown variable name: ' + self.name) {% + endhighlight %} + + As you can see, this is pretty straightforward. Now we need to update + the things that define the variables to set up the alloca. We'll start + with ``ForExpressionNode.CodeGen`` (see the `full code listing <#code>`_ + for the unabridged code): + + {% highlight python %} def CodeGen(self): function = + g_llvm_builder.basic_block.function + + :: + + # Create an alloca for the variable in the entry block. + alloca = CreateEntryBlockAlloca(function, self.loop_variable) + + # Emit the start code first, without 'variable' in scope. + start_value = self.start.CodeGen() + + # Store the value into the alloca. + g_llvm_builder.store(start_value, alloca) + ... + # Compute the end condition. + end_condition = self.end.CodeGen() + + # Reload, increment, and restore the alloca. This handles the case where + # the body of the loop mutates the variable. + cur_value = g_llvm_builder.load(alloca, self.loop_variable) + next_value = g_llvm_builder.fadd(cur_value, step_value, 'nextvar') + g_llvm_builder.store(next_value, alloca) + + # Convert condition to a bool by comparing equal to 0.0. + end_condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, end_condition, Constant.real(Type.double(), 0), 'loopcond') + ... + + + + + +This code is virtually identical to the code `before we allowed mutable +variables `_. The big difference is +that we no longer have to construct a PHI node, and we use load/store to +access the variable as needed. + +To support mutable argument variables, we need to also make allocas for +them. The code for this is also pretty simple: + + +.. code-block:: python + + class PrototypeNode(object): ... # Create an + alloca for each argument and register the argument in the symbol # table + so that references to it will succeed. def CreateArgumentAllocas(self, + function): for arg_name, arg in zip(self.args, function.args): alloca = + CreateEntryBlockAlloca(function, arg_name) g_llvm_builder.store(arg, + alloca) g_named_values[arg_name] = alloca + + + +For each argument, we make an alloca, store the input value to the +function into the alloca, and register the alloca as the memory location +for the argument. This method gets invoked by ``FunctionNode.CodeGen`` +right after it sets up the entry block for the function. + +The final missing piece is adding the mem2reg pass, which allows us to +get good codegen once again: + + +.. code-block:: python + + from llvm.passes import + (PASS_PROMOTE_MEMORY_TO_REGISTER, PASS_INSTRUCTION_COMBINING, + PASS_REASSOCIATE, PASS_GVN, PASS_CFG_SIMPLIFICATION) ... def main(): + # Set up the optimizer pipeline. Start with registering info about how + the # target lays out data structures. + g_llvm_pass_manager.add(g_llvm_executor.target_data) # Promote + allocas to registers. + g_llvm_pass_manager.add(PASS_PROMOTE_MEMORY_TO_REGISTER) # Do + simple "peephole" optimizations and bit-twiddling optzns. + g_llvm_pass_manager.add(PASS_INSTRUCTION_COMBINING) # Reassociate + expressions. g_llvm_pass_manager.add(PASS_REASSOCIATE) {% + endhighlight %} + + It is interesting to see what the code looks like before and after the + mem2reg optimization runs. For example, this is the before/after code + for our recursive fib function. Before the optimization: + + {% highlight llvm %} define double @fib(double %x) { entry: %x1 = alloca + double store double %x, double\* %x1 %x2 = load double\* %x1 %cmptmp = + fcmp ult double %x2, 3.000000e+00 %booltmp = uitofp i1 %cmptmp to double + %ifcond = fcmp one double %booltmp, 0.000000e+00 br i1 %ifcond, label + %then, label %else then: ; preds = %entry br label %ifcont else: ; preds + = %entry %x3 = load double\* %x1 %subtmp = fsub double %x3, 1.000000e+00 + %calltmp = call double @fib(double %subtmp) %x4 = load double\* %x1 + %subtmp5 = fsub double %x4, 2.000000e+00 %calltmp6 = call double + @fib(double %subtmp5) %addtmp = fadd double %calltmp, %calltmp6 br label + %ifcont ifcont: ; preds = %else, %then %iftmp = phi double [ + 1.000000e+00, %then ], [ %addtmp, %else ] ret double %iftmp } {% + endhighlight %} + + Here there is only one variable (x, the input argument) but you can + still see the extremely simple-minded code generation strategy we are + using. In the entry block, an alloca is created, and the initial input + value is stored into it. Each reference to the variable does a reload + from the stack. Also, note that we didn't modify the if/then/else + expression, so it still inserts a PHI node. While we could make an + alloca for it, it is actually easier to create a PHI node for it, so we + still just make the PHI. + + Here is the code after the mem2reg pass runs: + + {% highlight llvm %} define double @fib(double %x) { entry: %cmptmp = + fcmp ult double %x, 3.000000e+00 %booltmp = uitofp i1 %cmptmp to double + %ifcond = fcmp one double %booltmp, 0.000000e+00 br i1 %ifcond, label + %then, label %else then: br label %ifcont else: %subtmp = fsub double + %x, 1.000000e+00 %calltmp = call double @fib(double %subtmp) %subtmp5 = + fsub double %x, 2.000000e+00 %calltmp6 = call double @fib(double + %subtmp5) %addtmp = fadd double %calltmp, %calltmp6 br label %ifcont + ifcont: ; preds = %else, %then %iftmp = phi double [ 1.000000e+00, %then + ], [ %addtmp, %else ] ret double %iftmp } + + + +This is a trivial case for mem2reg, since there are no redefinitions of +the variable. The point of showing this is to calm your tension about +inserting such blatent inefficiencies :). + +After the rest of the optimizers run, we get: + + +.. code-block:: llvm + + define double @fib(double %x) { entry: %cmptmp = + fcmp ult double %x, 3.000000e+00 %booltmp = uitofp i1 %cmptmp to double + %ifcond = fcmp ueq double %booltmp, 0.000000e+00 br i1 %ifcond, label + %else, label %ifcont else: %subtmp = fsub double %x, 1.000000e+00 + %calltmp = call double @fib(double %subtmp) %subtmp5 = fsub double %x, + 2.000000e+00 %calltmp6 = call double @fib(double %subtmp5) %addtmp = + fadd double %calltmp, %calltmp6 ret double %addtmp ifcont: ret double + 1.000000e+00 } + + + +Here we see that the simplifycfg pass decided to clone the return +instruction into the end of the 'else' block. This allowed it to +eliminate some branches and the PHI node. + +Now that all symbol table references are updated to use stack variables, +we'll add the assignment operator. + +-------------- + +New Assignment Operator # {#assignment} +======================================= + +With our current framework, adding a new assignment operator is really +simple. We will parse it just like any other binary operator, but handle +it internally (instead of allowing the user to define it). The first +step is to set a precedence: + + +.. code-block:: python + + def main(): ... # Install standard binary + operators. # 1 is lowest possible precedence. 40 is the highest. + g_binop_precedence['='] = 2 g_binop_precedence['<'] = 10 + g_binop_precedence['+'] = 20 g_binop_precedence['-'] = 20 {% + endhighlight %} + + Now that the parser knows the precedence of the binary operator, it + takes care of all the parsing and AST generation. We just need to + implement codegen for the assignment operator. This looks like: + + {% highlight python %} class + BinaryOperatorExpressionNode(ExpressionNode): ... def CodeGen(self): # A + special case for '=' because we don't want to emit the LHS as an # + expression. if self.operator == '=': # Assignment requires the LHS to be + an identifier. if not isinstance(self.left, VariableExpressionNode): + raise RuntimeError('Destination of "=" must be a variable.') {% + endhighlight %} + + Unlike the rest of the binary operators, our assignment operator doesn't + follow the "emit LHS, emit RHS, do computation" model. As such, it is + handled as a special case before the other binary operators are handled. + The other strange thing is that it requires the LHS to be a variable. It + is invalid to have ``(x+1) = expr`` -- only things like ``x = expr`` are + allowed. + + {% highlight python %} # Codegen the RHS. value = self.right.CodeGen() + + :: + + # Look up the name. + variable = g_named_values[self.left.name] + + # Store the value and return it. + g_llvm_builder.store(value, variable) + + return value + ... + + + + + +Once we have the variable, CodeGening the assignment is straightforward: +we emit the RHS of the assignment, create a store, and return the +computed value. Returning a value allows for chained assignments like +``X = (Y = Z)``. + +Now that we have an assignment operator, we can mutate loop variables +and arguments. For example, we can now run code like this: + + +.. code-block:: python + + # Function to print a double. extern printd(x) + + # Define ':' for sequencing: as a low-precedence operator that ignores operands + # and just returns the RHS. + def binary : 1 (x y) y + + def test(x) printd(x) : x = 4 : printd(x) + + test(123) + + + +When run, this example prints "123" and then "4", showing that we did +actually mutate the value! Okay, we have now officially implemented our +goal: getting this to work requires SSA construction in the general +case. However, to be really useful, we want the ability to define our +own local variables. Let's add this next! + +-------------- + +User-defined Local Variables # {#localvars} +=========================================== + +Adding var/in is just like any other other extensions we made to +Kaleidoscope: we extend the lexer, the parser, the AST and the code +generator. The first step for adding our new 'var/in' construct is to +extend the lexer. As before, this is pretty trivial, the code looks like +this: + + +.. code-block:: python + + ... class UnaryToken(object): pass class + VarToken(object): pass ... def Tokenize(string): ... elif identifier == + 'unary': yield UnaryToken() elif identifier == 'var': yield VarToken() + else: yield IdentifierToken(identifier) + + + +The next step is to define the AST node that we will construct. For +var/in, it looks like this: + + +.. code-block:: python + + # Expression class for var/in. class + VarExpressionNode(ExpressionNode): + + def **init**\ (self, variables, body): self.variables = variables + self.body = body + + def CodeGen(self): ... + + + +var/in allows a list of names to be defined all at once, and each name +can optionally have an initializer value. As such, we capture this +information in the variables list. Also, var/in has a body, this body is +allowed to access the variables defined by the var/in. + +With this in place, we can define the parser pieces. The first thing we +do is add it as a primary expression: + + +.. code-block:: python + + # primary ::= # dentifierexpr \| numberexpr \| + parenexpr \| ifexpr \| forexpr \| varexpr def ParsePrimary(self): if + isinstance(self.current, IdentifierToken): return + self.ParseIdentifierExpr() elif isinstance(self.current, NumberToken): + return self.ParseNumberExpr() elif isinstance(self.current, IfToken): + return self.ParseIfExpr() elif isinstance(self.current, ForToken): + return self.ParseForExpr() elif isinstance(self.current, VarToken): + return self.ParseVarExpr() elif self.current == CharacterToken('('): + return self.ParseParenExpr() else: raise RuntimeError('Unknown token + when expecting an expression.') + + + +Next we define ParseVarExpr: + + +.. code-block:: python + + # varexpr ::= 'var' (identifier ('=' + expression)?)+ 'in' expression def ParseVarExpr(self): self.Next() # eat + 'var'. + + :: + + variables = {} + + # At least one variable name is required. + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after "var".') + + + + + +The first part of this code parses the list of identifier/expr pairs +into the local ``variables`` list. + + +.. code-block:: python + + while True: var_name = self.current.name + self.Next() # eat the identifier. + + :: + + # Read the optional initializer. + if self.current == CharacterToken('='): + self.Next() # eat '='. + variables[var_name] = self.ParseExpression() + else: + variables[var_name] = None + + # End of var list, exit loop. + if self.current != CharacterToken(','): + break + self.Next() # eat ','. + + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after "," in a var expression.') + + + + + +Once all the variables are parsed, we then parse the body and create the +AST node: + + +.. code-block:: python + + # At this point, we have to have 'in'. if not + isinstance(self.current, InToken): raise RuntimeError('Expected "in" + keyword after "var".') self.Next() # eat 'in'. + + :: + + body = self.ParseExpression() + + return VarExpressionNode(variables, body) + + + + + +Now that we can parse and represent the code, we need to support +emission of LLVM IR for it. This code starts out with: + + +.. code-block:: python + + class VarExpressionNode(ExpressionNode): ... def + CodeGen(self): old_bindings = {} function = + g_llvm_builder.basic_block.function + + :: + + # Register all variables and emit their initializer. + for var_name, var_expression in self.variables.iteritems(): + # Emit the initializer before adding the variable to scope, this prevents + # the initializer from referencing the variable itself, and permits stuff + # like this: + # var a = 1 in + # var a = a in ... # refers to outer 'a'. + if var_expression is not None: + var_value = var_expression.CodeGen() + else: + var_value = Constant.real(Type.double(), 0) + + alloca = CreateEntryBlockAlloca(function, var_name) + g_llvm_builder.store(var_value, alloca) + + # Remember the old variable binding so that we can restore the binding + # when we unrecurse. + old_bindings[var_name] = g_named_values.get(var_name, None) + + # Remember this binding. + g_named_values[var_name] = alloca + + + + + +Basically it loops over all the variables, installing them one at a +time. For each variable we put into the symbol table, we remember the +previous value that we replace in ``old_bindings``. + +There are more comments here than code. The basic idea is that we emit +the initializer, create the alloca, then update the symbol table to +point to it. Once all the variables are installed in the symbol table, +we evaluate the body of the var/in expression: + + +.. code-block:: python + + # Codegen the body, now that all vars are in + scope. body = self.body.CodeGen() + + + +Finally, before returning, we restore the previous variable bindings: + + +.. code-block:: python + + # Pop all our variables from scope. for var_name + in self.variables: if old_bindings[var_name] is not None: + g_named_values[var_name] = old_bindings[var_name] else: del + g_named_values[var_name] + + :: + + # Return the body computation. + return body + + + + + +The end result of all of this is that we get properly scoped variable +definitions, and we even (trivially) allow mutation of them :). + +With this, we completed what we set out to do. Our nice iterative fib +example from the intro compiles and runs just fine. The mem2reg pass +optimizes all of our stack variables into SSA registers, inserting PHI +nodes where needed, and our front-end remains simple: no "iterated +dominance frontier" computation anywhere in sight. + +-------------- + +Full Code Listing # {#code} +=========================== + +Here is the complete code listing for our running example, enhanced with +mutable variables and var/in support: + + +.. code-block:: python + + #!/usr/bin/env python + + import re from llvm.core import Module, Constant, Type, Function, + Builder from llvm.ee import ExecutionEngine, TargetData from llvm.passes + import FunctionPassManager + + from llvm.core import FCMP_ULT, FCMP_ONE from llvm.passes import + (PASS_PROMOTE_MEMORY_TO_REGISTER, PASS_INSTRUCTION_COMBINING, + PASS_REASSOCIATE, PASS_GVN, PASS_CFG_SIMPLIFICATION) + + Globals + ------- + + # The LLVM module, which holds all the IR code. + g_llvm_module = Module.new('my cool jit') + + # The LLVM instruction builder. Created whenever a new function is entered. + g_llvm_builder = None + + # A dictionary that keeps track of which values are defined in the current scope + # and what their LLVM representation is. + g_named_values = {} + + # The function optimization passes manager. + g_llvm_pass_manager = FunctionPassManager.new(g_llvm_module) + + # The LLVM execution engine. + g_llvm_executor = ExecutionEngine.new(g_llvm_module) + + # The binary operator precedence chart. + g_binop_precedence = {} + + # Creates an alloca instruction in the entry block of the function. This is used + # for mutable variables. + def CreateEntryBlockAlloca(function, var_name): entry = + function.get_entry_basic_block() builder = Builder.new(entry) + builder.position_at_beginning(entry) return + builder.alloca(Type.double(), var_name) + + Lexer + ----- + + # The lexer yields one of these types for each token. + class EOFToken(object): pass class DefToken(object): pass class + ExternToken(object): pass class IfToken(object): pass class + ThenToken(object): pass class ElseToken(object): pass class + ForToken(object): pass class InToken(object): pass class + BinaryToken(object): pass class UnaryToken(object): pass class + VarToken(object): pass + + class IdentifierToken(object): def **init**\ (self, name): self.name = + name + + class NumberToken(object): def **init**\ (self, value): self.value = + value + + class CharacterToken(object): def **init**\ (self, char): self.char = + char def **eq**\ (self, other): return isinstance(other, CharacterToken) + and self.char == other.char def **ne**\ (self, other): return not self + == other + + # Regular expressions that tokens and comments of our language. + REGEX_NUMBER = re.compile('[0-9]+(?:.[0-9]+)?') REGEX_IDENTIFIER = + re.compile('[a-zA-Z][a-zA-Z0-9]\ *') REGEX_COMMENT = re.compile('#.*') + + def Tokenize(string): while string: # Skip whitespace. if + string[0].isspace(): string = string[1:] continue + + :: + + # Run regexes. + comment_match = REGEX_COMMENT.match(string) + number_match = REGEX_NUMBER.match(string) + identifier_match = REGEX_IDENTIFIER.match(string) + + # Check if any of the regexes matched and yield the appropriate result. + if comment_match: + comment = comment_match.group(0) + string = string[len(comment):] + elif number_match: + number = number_match.group(0) + yield NumberToken(float(number)) + string = string[len(number):] + elif identifier_match: + identifier = identifier_match.group(0) + # Check if we matched a keyword. + if identifier == 'def': + yield DefToken() + elif identifier == 'extern': + yield ExternToken() + elif identifier == 'if': + yield IfToken() + elif identifier == 'then': + yield ThenToken() + elif identifier == 'else': + yield ElseToken() + elif identifier == 'for': + yield ForToken() + elif identifier == 'in': + yield InToken() + elif identifier == 'binary': + yield BinaryToken() + elif identifier == 'unary': + yield UnaryToken() + elif identifier == 'var': + yield VarToken() + else: + yield IdentifierToken(identifier) + string = string[len(identifier):] + else: + # Yield the ASCII value of the unknown character. + yield CharacterToken(string[0]) + string = string[1:] + + yield EOFToken() + + Abstract Syntax Tree (aka Parse Tree) + ------------------------------------- + + # Base class for all expression nodes. + class ExpressionNode(object): pass + + # Expression class for numeric literals like "1.0". + class NumberExpressionNode(ExpressionNode): + + def **init**\ (self, value): self.value = value + + def CodeGen(self): return Constant.real(Type.double(), self.value) + + # Expression class for referencing a variable, like "a". + class VariableExpressionNode(ExpressionNode): + + def **init**\ (self, name): self.name = name + + def CodeGen(self): if self.name in g_named_values: return + g_llvm_builder.load(g_named_values[self.name], self.name) else: + raise RuntimeError('Unknown variable name: ' + self.name) + + # Expression class for a binary operator. + class BinaryOperatorExpressionNode(ExpressionNode): + + def **init**\ (self, operator, left, right): self.operator = operator + self.left = left self.right = right + + def CodeGen(self): # A special case for '=' because we don't want to + emit the LHS as an # expression. if self.operator == '=': # Assignment + requires the LHS to be an identifier. if not isinstance(self.left, + VariableExpressionNode): raise RuntimeError('Destination of "=" must be + a variable.') + + :: + + # Codegen the RHS. + value = self.right.CodeGen() + + # Look up the name. + variable = g_named_values[self.left.name] + + # Store the value and return it. + g_llvm_builder.store(value, variable) + + return value + + left = self.left.CodeGen() + right = self.right.CodeGen() + + if self.operator == '+': + return g_llvm_builder.fadd(left, right, 'addtmp') + elif self.operator == '-': + return g_llvm_builder.fsub(left, right, 'subtmp') + elif self.operator == '*': + return g_llvm_builder.fmul(left, right, 'multmp') + elif self.operator == '<': + result = g_llvm_builder.fcmp(FCMP_ULT, left, right, 'cmptmp') + # Convert bool 0 or 1 to double 0.0 or 1.0. + return g_llvm_builder.uitofp(result, Type.double(), 'booltmp') + else: + function = g_llvm_module.get_function_named('binary' + self.operator) + return g_llvm_builder.call(function, [left, right], 'binop') + + # Expression class for function calls. + class CallExpressionNode(ExpressionNode): + + def **init**\ (self, callee, args): self.callee = callee self.args = + args + + def CodeGen(self): # Look up the name in the global module table. callee + = g_llvm_module.get_function_named(self.callee) + + :: + + # Check for argument mismatch error. + if len(callee.args) != len(self.args): + raise RuntimeError('Incorrect number of arguments passed.') + + arg_values = [i.CodeGen() for i in self.args] + + return g_llvm_builder.call(callee, arg_values, 'calltmp') + + # Expression class for if/then/else. + class IfExpressionNode(ExpressionNode): + + def **init**\ (self, condition, then_branch, else_branch): + self.condition = condition self.then_branch = then_branch + self.else_branch = else_branch + + def CodeGen(self): condition = self.condition.CodeGen() + + :: + + # Convert condition to a bool by comparing equal to 0.0. + condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, condition, Constant.real(Type.double(), 0), 'ifcond') + + function = g_llvm_builder.basic_block.function + + # Create blocks for the then and else cases. Insert the 'then' block at the + # end of the function. + then_block = function.append_basic_block('then') + else_block = function.append_basic_block('else') + merge_block = function.append_basic_block('ifcond') + + g_llvm_builder.cbranch(condition_bool, then_block, else_block) + + # Emit then value. + g_llvm_builder.position_at_end(then_block) + then_value = self.then_branch.CodeGen() + g_llvm_builder.branch(merge_block) + + # Codegen of 'Then' can change the current block; update then_block for the + # PHI node. + then_block = g_llvm_builder.basic_block + + # Emit else block. + g_llvm_builder.position_at_end(else_block) + else_value = self.else_branch.CodeGen() + g_llvm_builder.branch(merge_block) + + # Codegen of 'Else' can change the current block, update else_block for the + # PHI node. + else_block = g_llvm_builder.basic_block + + # Emit merge block. + g_llvm_builder.position_at_end(merge_block) + phi = g_llvm_builder.phi(Type.double(), 'iftmp') + phi.add_incoming(then_value, then_block) + phi.add_incoming(else_value, else_block) + + return phi + + # Expression class for for/in. + class ForExpressionNode(ExpressionNode): + + def **init**\ (self, loop_variable, start, end, step, body): + self.loop_variable = loop_variable self.start = start self.end = end + self.step = step self.body = body + + def CodeGen(self): # Output this as: # var = alloca double # ... # start + = startexpr # store start -> var # goto loop # loop: # ... # bodyexpr # + ... # loopend: # step = stepexpr # endcond = endexpr # # curvar = load + var # nextvar = curvar + step # store nextvar -> var # br endcond, loop, + endloop # outloop: + + :: + + function = g_llvm_builder.basic_block.function + + # Create an alloca for the variable in the entry block. + alloca = CreateEntryBlockAlloca(function, self.loop_variable) + + # Emit the start code first, without 'variable' in scope. + start_value = self.start.CodeGen() + + # Store the value into the alloca. + g_llvm_builder.store(start_value, alloca) + + # Make the new basic block for the loop, inserting after current block. + loop_block = function.append_basic_block('loop') + + # Insert an explicit fall through from the current block to the loop_block. + g_llvm_builder.branch(loop_block) + + # Start insertion in loop_block. + g_llvm_builder.position_at_end(loop_block) + + # Within the loop, the variable is defined equal to the alloca. If it + # shadows an existing variable, we have to restore it, so save it now. + old_value = g_named_values.get(self.loop_variable, None) + g_named_values[self.loop_variable] = alloca + + # Emit the body of the loop. This, like any other expr, can change the + # current BB. Note that we ignore the value computed by the body. + self.body.CodeGen() + + # Emit the step value. + if self.step: + step_value = self.step.CodeGen() + else: + # If not specified, use 1.0. + step_value = Constant.real(Type.double(), 1) + + # Compute the end condition. + end_condition = self.end.CodeGen() + + # Reload, increment, and restore the alloca. This handles the case where + # the body of the loop mutates the variable. + cur_value = g_llvm_builder.load(alloca, self.loop_variable) + next_value = g_llvm_builder.fadd(cur_value, step_value, 'nextvar') + g_llvm_builder.store(next_value, alloca) + + # Convert condition to a bool by comparing equal to 0.0. + end_condition_bool = g_llvm_builder.fcmp( + FCMP_ONE, end_condition, Constant.real(Type.double(), 0), 'loopcond') + + # Create the "after loop" block and insert it. + after_block = function.append_basic_block('afterloop') + + # Insert the conditional branch into the end of loop_block. + g_llvm_builder.cbranch(end_condition_bool, loop_block, after_block) + + # Any new code will be inserted in after_block. + g_llvm_builder.position_at_end(after_block) + + # Restore the unshadowed variable. + if old_value is not None: + g_named_values[self.loop_variable] = old_value + else: + del g_named_values[self.loop_variable] + + # for expr always returns 0.0. + return Constant.real(Type.double(), 0) + + # Expression class for a unary operator. + class UnaryExpressionNode(ExpressionNode): + + def **init**\ (self, operator, operand): self.operator = operator + self.operand = operand + + def CodeGen(self): operand = self.operand.CodeGen() function = + g_llvm_module.get_function_named('unary' + self.operator) return + g_llvm_builder.call(function, [operand], 'unop') + + # Expression class for var/in. + class VarExpressionNode(ExpressionNode): + + def **init**\ (self, variables, body): self.variables = variables + self.body = body + + def CodeGen(self): old_bindings = {} function = + g_llvm_builder.basic_block.function + + :: + + # Register all variables and emit their initializer. + for var_name, var_expression in self.variables.iteritems(): + # Emit the initializer before adding the variable to scope, this prevents + # the initializer from referencing the variable itself, and permits stuff + # like this: + # var a = 1 in + # var a = a in ... # refers to outer 'a'. + if var_expression is not None: + var_value = var_expression.CodeGen() + else: + var_value = Constant.real(Type.double(), 0) + + alloca = CreateEntryBlockAlloca(function, var_name) + g_llvm_builder.store(var_value, alloca) + + # Remember the old variable binding so that we can restore the binding + # when we unrecurse. + old_bindings[var_name] = g_named_values.get(var_name, None) + + # Remember this binding. + g_named_values[var_name] = alloca + + # Codegen the body, now that all vars are in scope. + body = self.body.CodeGen() + + # Pop all our variables from scope. + for var_name in self.variables: + if old_bindings[var_name] is not None: + g_named_values[var_name] = old_bindings[var_name] + else: + del g_named_values[var_name] + + # Return the body computation. + return body + + # This class represents the "prototype" for a function, which captures its name, + # and its argument names (thus implicitly the number of arguments the function + # takes), as well as if it is an operator. + class PrototypeNode(object): + + def **init**\ (self, name, args, is_operator=False, precedence=0): + self.name = name self.args = args self.is_operator = is_operator + self.precedence = precedence + + def IsBinaryOp(self): return self.is_operator and len(self.args) == 2 + + def GetOperatorName(self): assert self.is_operator return self.name[-1] + + def CodeGen(self): # Make the function type, eg. double(double,double). + funct_type = Type.function( Type.double(), [Type.double()] \* + len(self.args), False) + + :: + + function = Function.new(g_llvm_module, funct_type, self.name) + + # If the name conflicted, there was already something with the same name. + # If it has a body, don't allow redefinition or reextern. + if function.name != self.name: + function.delete() + function = g_llvm_module.get_function_named(self.name) + + # If the function already has a body, reject this. + if not function.is_declaration: + raise RuntimeError('Redefinition of function.') + + # If the function took a different number of args, reject. + if len(function.args) != len(self.args): + raise RuntimeError('Redeclaration of a function with different number ' + 'of args.') + + # Set names for all arguments and add them to the variables symbol table. + for arg, arg_name in zip(function.args, self.args): + arg.name = arg_name + + return function + + # Create an alloca for each argument and register the argument in the + symbol # table so that references to it will succeed. def + CreateArgumentAllocas(self, function): for arg_name, arg in + zip(self.args, function.args): alloca = CreateEntryBlockAlloca(function, + arg_name) g_llvm_builder.store(arg, alloca) + g_named_values[arg_name] = alloca + + # This class represents a function definition itself. + class FunctionNode(object): + + def **init**\ (self, prototype, body): self.prototype = prototype + self.body = body + + def CodeGen(self): # Clear scope. g_named_values.clear() + + :: + + # Create a function object. + function = self.prototype.CodeGen() + + # If this is a binary operator, install its precedence. + if self.prototype.IsBinaryOp(): + operator = self.prototype.GetOperatorName() + g_binop_precedence[operator] = self.prototype.precedence + + # Create a new basic block to start insertion into. + block = function.append_basic_block('entry') + global g_llvm_builder + g_llvm_builder = Builder.new(block) + + # Add all arguments to the symbol table and create their allocas. + self.prototype.CreateArgumentAllocas(function) + + # Finish off the function. + try: + return_value = self.body.CodeGen() + g_llvm_builder.ret(return_value) + + # Validate the generated code, checking for consistency. + function.verify() + + # Optimize the function. + g_llvm_pass_manager.run(function) + except: + function.delete() + if self.prototype.IsBinaryOp(): + del g_binop_precedence[self.prototype.GetOperatorName()] + raise + + return function + + Parser + ------ + + class Parser(object): + + def **init**\ (self, tokens): self.tokens = tokens self.Next() + + # Provide a simple token buffer. Parser.current is the current token the + # parser is looking at. Parser.Next() reads another token from the lexer + and # updates Parser.current with its results. def Next(self): + self.current = self.tokens.next() + + # Gets the precedence of the current token, or -1 if the token is not a + binary # operator. def GetCurrentTokenPrecedence(self): if + isinstance(self.current, CharacterToken): return + g_binop_precedence.get(self.current.char, -1) else: return -1 + + # identifierexpr ::= identifier \| identifier '(' expression\* ')' def + ParseIdentifierExpr(self): identifier_name = self.current.name + self.Next() # eat identifier. + + :: + + if self.current != CharacterToken('('): # Simple variable reference. + return VariableExpressionNode(identifier_name) + + # Call. + self.Next() # eat '('. + args = [] + if self.current != CharacterToken(')'): + while True: + args.append(self.ParseExpression()) + if self.current == CharacterToken(')'): + break + elif self.current != CharacterToken(','): + raise RuntimeError('Expected ")" or "," in argument list.') + self.Next() + + self.Next() # eat ')'. + return CallExpressionNode(identifier_name, args) + + # numberexpr ::= number def ParseNumberExpr(self): result = + NumberExpressionNode(self.current.value) self.Next() # consume the + number. return result + + # parenexpr ::= '(' expression ')' def ParseParenExpr(self): self.Next() + # eat '('. + + :: + + contents = self.ParseExpression() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")".') + self.Next() # eat ')'. + + return contents + + # ifexpr ::= 'if' expression 'then' expression 'else' expression def + ParseIfExpr(self): self.Next() # eat the if. + + :: + + # condition. + condition = self.ParseExpression() + + if not isinstance(self.current, ThenToken): + raise RuntimeError('Expected "then".') + self.Next() # eat the then. + + then_branch = self.ParseExpression() + + if not isinstance(self.current, ElseToken): + raise RuntimeError('Expected "else".') + self.Next() # eat the else. + + else_branch = self.ParseExpression() + + return IfExpressionNode(condition, then_branch, else_branch) + + # forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' + expression def ParseForExpr(self): self.Next() # eat the for. + + :: + + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after for.') + + loop_variable = self.current.name + self.Next() # eat the identifier. + + if self.current != CharacterToken('='): + raise RuntimeError('Expected "=" after for variable.') + self.Next() # eat the '='. + + start = self.ParseExpression() + + if self.current != CharacterToken(','): + raise RuntimeError('Expected "," after for start value.') + self.Next() # eat the ','. + + end = self.ParseExpression() + + # The step value is optional. + if self.current == CharacterToken(','): + self.Next() # eat the ','. + step = self.ParseExpression() + else: + step = None + + if not isinstance(self.current, InToken): + raise RuntimeError('Expected "in" after for variable specification.') + self.Next() # eat 'in'. + + body = self.ParseExpression() + + return ForExpressionNode(loop_variable, start, end, step, body) + + # varexpr ::= 'var' (identifier ('=' expression)?)+ 'in' expression def + ParseVarExpr(self): self.Next() # eat 'var'. + + :: + + variables = {} + + # At least one variable name is required. + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after "var".') + + while True: + var_name = self.current.name + self.Next() # eat the identifier. + + # Read the optional initializer. + if self.current == CharacterToken('='): + self.Next() # eat '='. + variables[var_name] = self.ParseExpression() + else: + variables[var_name] = None + + # End of var list, exit loop. + if self.current != CharacterToken(','): + break + self.Next() # eat ','. + + if not isinstance(self.current, IdentifierToken): + raise RuntimeError('Expected identifier after "," in a var expression.') + + # At this point, we have to have 'in'. + if not isinstance(self.current, InToken): + raise RuntimeError('Expected "in" keyword after "var".') + self.Next() # eat 'in'. + + body = self.ParseExpression() + + return VarExpressionNode(variables, body) + + # primary ::= # dentifierexpr \| numberexpr \| parenexpr \| ifexpr \| + forexpr \| varexpr def ParsePrimary(self): if isinstance(self.current, + IdentifierToken): return self.ParseIdentifierExpr() elif + isinstance(self.current, NumberToken): return self.ParseNumberExpr() + elif isinstance(self.current, IfToken): return self.ParseIfExpr() elif + isinstance(self.current, ForToken): return self.ParseForExpr() elif + isinstance(self.current, VarToken): return self.ParseVarExpr() elif + self.current == CharacterToken('('): return self.ParseParenExpr() else: + raise RuntimeError('Unknown token when expecting an expression.') + + # unary ::= primary \| unary_operator unary def ParseUnary(self): # If + the current token is not an operator, it must be a primary expression. + if (not isinstance(self.current, CharacterToken) or self.current in + [CharacterToken('('), CharacterToken(',')]): return self.ParsePrimary() + + :: + + # If this is a unary operator, read it. + operator = self.current.char + self.Next() # eat the operator. + return UnaryExpressionNode(operator, self.ParseUnary()) + + # binoprhs ::= (binary_operator unary)\* def ParseBinOpRHS(self, left, + left_precedence): # If this is a binary operator, find its precedence. + while True: precedence = self.GetCurrentTokenPrecedence() + + :: + + # If this is a binary operator that binds at least as tightly as the + # current one, consume it; otherwise we are done. + if precedence < left_precedence: + return left + + binary_operator = self.current.char + self.Next() # eat the operator. + + # Parse the unary expression after the binary operator. + right = self.ParseUnary() + + # If binary_operator binds less tightly with right than the operator after + # right, let the pending operator take right as its left. + next_precedence = self.GetCurrentTokenPrecedence() + if precedence < next_precedence: + right = self.ParseBinOpRHS(right, precedence + 1) + + # Merge left/right. + left = BinaryOperatorExpressionNode(binary_operator, left, right) + + # expression ::= unary binoprhs def ParseExpression(self): left = + self.ParseUnary() return self.ParseBinOpRHS(left, 0) + + # prototype # ::= id '(' id\* ')' # ::= binary LETTER number? (id, id) # + ::= unary LETTER (id) def ParsePrototype(self): precedence = None if + isinstance(self.current, IdentifierToken): kind = 'normal' + function_name = self.current.name self.Next() # eat function name. elif + isinstance(self.current, UnaryToken): kind = 'unary' self.Next() # eat + 'unary'. if not isinstance(self.current, CharacterToken): raise + RuntimeError('Expected an operator after "unary".') function_name = + 'unary' + self.current.char self.Next() # eat the operator. elif + isinstance(self.current, BinaryToken): kind = 'binary' self.Next() # eat + 'binary'. if not isinstance(self.current, CharacterToken): raise + RuntimeError('Expected an operator after "binary".') function_name = + 'binary' + self.current.char self.Next() # eat the operator. if + isinstance(self.current, NumberToken): if not 1 <= self.current.value <= + 100: raise RuntimeError('Invalid precedence: must be in range [1, + 100].') precedence = self.current.value self.Next() # eat the + precedence. else: raise RuntimeError('Expected function name, "unary" or + "binary" in ' 'prototype.') + + :: + + if self.current != CharacterToken('('): + raise RuntimeError('Expected "(" in prototype.') + self.Next() # eat '('. + + arg_names = [] + while isinstance(self.current, IdentifierToken): + arg_names.append(self.current.name) + self.Next() + + if self.current != CharacterToken(')'): + raise RuntimeError('Expected ")" in prototype.') + + # Success. + self.Next() # eat ')'. + + if kind == 'unary' and len(arg_names) != 1: + raise RuntimeError('Invalid number of arguments for a unary operator.') + elif kind == 'binary' and len(arg_names) != 2: + raise RuntimeError('Invalid number of arguments for a binary operator.') + + return PrototypeNode(function_name, arg_names, kind != 'normal', precedence) + + # definition ::= 'def' prototype expression def ParseDefinition(self): + self.Next() # eat def. proto = self.ParsePrototype() body = + self.ParseExpression() return FunctionNode(proto, body) + + # toplevelexpr ::= expression def ParseTopLevelExpr(self): proto = + PrototypeNode('', []) return FunctionNode(proto, self.ParseExpression()) + + # external ::= 'extern' prototype def ParseExtern(self): self.Next() # + eat extern. return self.ParsePrototype() + + # Top-Level parsing def HandleDefinition(self): + self.Handle(self.ParseDefinition, 'Read a function definition:') + + def HandleExtern(self): self.Handle(self.ParseExtern, 'Read an extern:') + + def HandleTopLevelExpression(self): try: function = + self.ParseTopLevelExpr().CodeGen() result = + g_llvm_executor.run_function(function, []) print 'Evaluated to:', + result.as_real(Type.double()) except Exception, e: raise#print + 'Error:', e try: self.Next() # Skip for error recovery. except: pass + + def Handle(self, function, message): try: print message, + function().CodeGen() except Exception, e: raise#print 'Error:', e try: + self.Next() # Skip for error recovery. except: pass + + Main driver code. + ----------------- + + def main(): # Set up the optimizer pipeline. Start with registering info + about how the # target lays out data structures. + g_llvm_pass_manager.add(g_llvm_executor.target_data) # Promote + allocas to registers. + g_llvm_pass_manager.add(PASS_PROMOTE_MEMORY_TO_REGISTER) # Do + simple "peephole" optimizations and bit-twiddling optzns. + g_llvm_pass_manager.add(PASS_INSTRUCTION_COMBINING) # Reassociate + expressions. g_llvm_pass_manager.add(PASS_REASSOCIATE) # Eliminate + Common SubExpressions. g_llvm_pass_manager.add(PASS_GVN) # Simplify + the control flow graph (deleting unreachable blocks, etc). + g_llvm_pass_manager.add(PASS_CFG_SIMPLIFICATION) + + g_llvm_pass_manager.initialize() + + # Install standard binary operators. # 1 is lowest possible precedence. + 40 is the highest. g_binop_precedence['='] = 2 + g_binop_precedence['<'] = 10 g_binop_precedence['+'] = 20 + g_binop_precedence['-'] = 20 g_binop_precedence['\*'] = 40 + + # Run the main "interpreter loop". while True: print 'ready<', try: raw + = raw_input() except KeyboardInterrupt: break + + :: + + parser = Parser(Tokenize(raw)) + while True: + # top ::= definition | external | expression | EOF + if isinstance(parser.current, EOFToken): + break + if isinstance(parser.current, DefToken): + parser.HandleDefinition() + elif isinstance(parser.current, ExternToken): + parser.HandleExtern() + else: + parser.HandleTopLevelExpression() + + # Print out all of the generated code. print '', g_llvm_module + + if **name** == '**main**\ ': main() diff --git a/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl8.txt b/docs/_build/html/_sources/doc/kaleidoscope/PythonLangImpl8.txt new file mode 100644 index 0000000..e69de29 diff --git a/docs/_build/html/_sources/doc/kaleidoscope/index.txt b/docs/_build/html/_sources/doc/kaleidoscope/index.txt new file mode 100644 index 0000000..6099fea --- /dev/null +++ b/docs/_build/html/_sources/doc/kaleidoscope/index.txt @@ -0,0 +1,21 @@ +Kaleidoscope +-------------- + +Implementing a Language with LLVM + +The LLVM `Kaleidoscope `_ tutorial +has been ported to llvmpy by Max Shawabkeh. + +.. toctree:: + :titlesonly: + :numbered: + + PythonLangImpl1.rst + PythonLangImpl2.rst + PythonLangImpl3.rst + PythonLangImpl4.rst + PythonLangImpl5.rst + PythonLangImpl6.rst + PythonLangImpl7.rst + PythonLangImpl8.rst + diff --git a/docs/_build/html/_sources/doc/llvm.core.Argument.txt b/docs/_build/html/_sources/doc/llvm.core.Argument.txt new file mode 100644 index 0000000..2481665 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.Argument.txt @@ -0,0 +1,58 @@ ++-------------------------------+ +| layout: page | ++-------------------------------+ +| title: Argument (llvm.core) | ++-------------------------------+ + +The ``args`` property of ``llvm.core.Function`` objects yields +``llvm.core.Argument`` objects. This allows for setting attributes for +functions arguments. ``Argument`` objects cannot be constructed from +user code, the only way to get a reference to these are from +``Function`` objects. + +The method ``add_attribute`` and ``remove_attribute`` can be used to add +or remove the following attributes: + +Value\| Equivalent LLVM Assembly Keyword \| +-----\|----------------------------------\| ``ATTR_ZEXT``\ \| +``zeroext`` \| ``ATTR_SEXT``\ \| ``signext`` \| ``ATTR_IN_REG``\ \| +``inreg`` \| ``ATTR_BY_VAL``\ \| ``byval`` \| ``ATTR_STRUCT_RET``\ \| +``sret`` \| ``ATTR_NO_ALIAS``\ \| ``noalias`` \| ``ATTR_NO_CAPTURE``\ \| +``nocapture`` \| ``ATTR_NEST``\ \| ``nest`` \| + +These method work exactly like the `corresponding +methods `_ of the ``Function`` class above. Refer +`LLVM docs `_ for +information on what each attribute means. + +The alignment of any argument can be set via the ``alignment`` property, +to any power of 2. + +llvm.core.Argument +================== + +Base Class +---------- + +- `llvm.core.Value `_ + +Properties +---------- + +``alignment`` +~~~~~~~~~~~~~ + +The alignment of the argument. Must be a power of 2. + +Methods +------- + +``add_attribute(attr)`` +~~~~~~~~~~~~~~~~~~~~~~~ + +Add an attribute ``attr`` to the argument, from the set listed above. + +``remove_attribute(attr)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Remove the attribute ``attr`` of the argument. diff --git a/docs/_build/html/_sources/doc/llvm.core.ArrayType.txt b/docs/_build/html/_sources/doc/llvm.core.ArrayType.txt new file mode 100644 index 0000000..f8b5e0c --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.ArrayType.txt @@ -0,0 +1,30 @@ ++--------------------------------+ +| layout: page | ++--------------------------------+ +| title: ArrayType (llvm.core) | ++--------------------------------+ + +llvm.core.ArrayType +=================== + +Base Class +---------- + +- `llvm.core.Type `_ + +Properties +---------- + +``element`` +~~~~~~~~~~~ + +[read-only] + +A ``Type`` object representing the type of the element of the array. + +``count`` +~~~~~~~~~ + +[read-only] + +The number of elements in the array. diff --git a/docs/_build/html/_sources/doc/llvm.core.BasicBlock.txt b/docs/_build/html/_sources/doc/llvm.core.BasicBlock.txt new file mode 100644 index 0000000..74806a6 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.BasicBlock.txt @@ -0,0 +1,44 @@ ++---------------------------------+ +| layout: page | ++---------------------------------+ +| title: BasicBlock (llvm.core) | ++---------------------------------+ + +A basicblock is a list of instructions. A wellformed basicblock should +end with a terminator. ``Function.verify()`` will verify that. A +terminator is either a branch instruction or return instruction. It is +not possible to have instructions after a branch or return instruction. + +llvm.core.BasicBlock +==================== + +Base Class +---------- + +- `llvm.core.Value `_ + +Methods +------- + +``delete(self)`` +~~~~~~~~~~~~~~~~ + +Delete this basicblock from the function (``self.function``). + +``insert_before(self, name)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +TODO + +Proporties +---------- + +``function`` +~~~~~~~~~~~~ + +The parent function of this basicblock. + +``instructions`` +~~~~~~~~~~~~~~~~ + +A list of instructions in this basicblock. diff --git a/docs/_build/html/_sources/doc/llvm.core.Builder.txt b/docs/_build/html/_sources/doc/llvm.core.Builder.txt new file mode 100644 index 0000000..1e90908 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.Builder.txt @@ -0,0 +1,411 @@ ++------------------------------+ +| layout: page | ++------------------------------+ +| title: Builder (llvm.core) | ++------------------------------+ + +The ``Builder`` class corresponds to the +`IRBuilder `_ +in C++ llvm. It provides an uniform API to populating +`BasicBlocks `_. Most of the methods in +``Builder`` correspond to the instructions in the LLVM IR. See `LLVM +documentation `_ for detail. These +methods have the ``name`` argument for overiding the name of the result +variable. When it is an empty string (default value), LLVM will set a +numeric ID for the result variable. + +llvm.core.Builder +================= + +- This will become a table of contents (this text will be scraped). + {:toc} + +Static Factor Method +-------------------- + +``new(basic_block)`` +~~~~~~~~~~~~~~~~~~~~ + +Create an instance of ``Builder`` at +`BasicBlock `_. + +Methods +------- + +``add(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs+rhs`` for integer values only. + +``alloca(self, ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that allocates stack memory for a value of type +``ty``. + +``alloca_array(self, ty, size, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that allocates stack memory for a ``size`` +elements array of type ``ty``. + +``and_(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs & rhs``. + +``ashr(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs >> rhs`` using arithmetic +shift. + +``bitcast(self, value, dest_ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that cast ``value`` to type ``dest_ty``. + +``branch(self, bblk)`` +~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that branch to basicblock ``bblk``. + +``call(self, fn, args, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that call function ``fn`` with a iterable of +arguments ``args``. + +``cbranch(self, if_value, then_blk, else_blk)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that conditionally branch base on the predicate +``if_value``. If ``if_value`` is ``True``, branch to ``then_blk``; +Otherwise, branch to ``else_blk``. + +``extract_element(self, vec_val, idx_val, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that extracts an element from a value ``vec_val`` +of `llvm.core.VectorType `_ at index +``idx_val``. + +``extract_value(self, retval, idx, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that extracts an element from an aggregate value +``retval`` at index ``idx``. + +``fadd(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs + rhs`` for floating-point +values. + +``fcmp(self, rpred, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that compares ``lhs`` and ``rhs`` using the +comparision operation defined by ``rpred``. See +`here `_ for a list of comparators. + +``fdiv(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs / rhs`` for floating-point +values. + +``fmul(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs * rhs`` for floating-point +values. + +``fpext(self, value, dest_ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that extends ``value`` to a float type +``dest_ty``. + +``fptosi(self, value, dest_ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that converts a floating-point value ``value`` to +a signed integer type ``dest_ty``. + +``fptoui(self, value, dest_ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that converts a floating-point value ``value`` to +an unsigned integer type ``dest_ty``. + +``fptrunc(self, value, dest_ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that truncates a floating-point value ``value`` to +a float type ``dest_ty``. + +``free(self, ptr)`` +~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that call performs heap deallocation on pointer +``ptr``. + +``frem(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs % rhs`` for floating-point +values. + +``fsub(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs - rhs`` for floating-point +values. + +``gep(self, ptr, indices, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +See `GEP `_. + +``getresult(self, retval, idx, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +same as ``extract_value``. + +``icmp(self, ipred, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that compares ``lhs`` and ``rhs`` using the +comparision operation defined by ``ipred``. See +`here `_ for a list of comparators. + +``insert_element(self, vec_val, elt_val, idx_val, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that inserts a value ``elt_val`` into ``vec_val`` +of `llvm.core.VectorType `_ at index +``idx_val``. + +``inttoptr(self, value, dest_ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that converts an integer ``value`` to pointer +``dest_ty``. + +``invoke(self, func, args, then_blk, catch_blk, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +See `invoke `_ + +``load(self, ptr, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that loads a value at the memory pointed by +``ptr``. + +``lshr(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs >> rhs`` using logical shift. + +``malloc(self, ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that allocates heap memory of type ``ty``. The +instruction returns a pointer that points to a value of type ``ty``. + +``malloc_array(self, ty, size, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Similar to ``malloc`` but allocates an array of ``size`` elements. + +``mul(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs * rhs`` for integer types. + +``neg(self, val, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``0 - val``. + +``not_(self, val, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes an one's complement of ``val``. + +``or_(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs | rhs``. + +``phi(self, ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create a PHI node of type ``ty``. + +``position_at_beginning(self, bblk)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Position the builder at the beginning of the given block. Next +instruction inserted will be first one in the block. + +``position_at_end(self, bblk)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Position the builder at the end of the given block. Next instruction +inserted will be last one in the block. + +``position_before(self, instr)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Position the builder before the given instruction. The instruction can +belong to a basic block other than the current one. + +``ptrtoint(self, value, dest_ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that converts a pointer to an integer ``value`` of +type ``dest_ty``. + +``ret(self, value)`` +~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that returns ``value``. + +``ret_many(self, values)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that returns ``values`` which is an iterable of +`llvm.core.Value `_. + +``ret_void(self)`` +~~~~~~~~~~~~~~~~~~ + +Insert an instruction that returns nothing (void). + +``sdiv(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs / rhs`` for signed integers. + +``select(self, cond, then_value, else_value, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``cond ? then_value : else_value``. + +``sext(self, value, dest_ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that sign extends an integer ``value`` to type +``dest_ty``. + +``shl(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs << rhs``. + +``shuffle_vector(self, vecA, vecB, mask, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that performs a vector shuffle base on the two +vectors -- ``vecA`` and ``vecB``, base on a bit mask ``mask``. The mask +must be a constant. + +See `LLVM document `_ +for detail. + +``sitofp(self, value, dest_ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that converts a signed integer ``value`` to a +floating-point type ``dest_ty``. + +``srem(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs % rhs`` for signed integers. + +``store(self, value, ptr)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that stores ``value`` into the memory pointed by +``ptr``. + +``sub(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs - rhs``. + +``switch(self, value, else_blk, n=10)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that transfer control flow depending on the +``value``. ``else_blk`` is the default case. ``n`` sets the number of +additional cases. + +This method returns an instance of +`SwitchInstruction `_ for adding +cases to the switch. + +``trunc(self, value, dest_ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that truncates an integer ``value`` to the +destination integer type ``dest_ty``. + +``udiv(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs / rhs`` for unsigned integers. + +``uitofp(self, value, dest_ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that converts an unsigned integer ``value`` to a +floating-point type ``dest_ty``. + +``unreachable(self)`` +~~~~~~~~~~~~~~~~~~~~~ + +Insert an unreachabe instruction, which has no defined semantics. See +`LLVM document `_ for +detail. + +``urem(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs % rhs`` for unsigned integers. + +``vaarg(self, list_val, ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This is used to access variable arguments given as ``list_val`` of type +``ty``. see `LLVM +document `_ about +variable argument intrinsics. + +``xor(self, lhs, rhs, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that computes ``lhs xor rhs``. + +``zext(self, value, dest_ty, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Insert an instruction that zero extends ``value`` to type ``dest_ty``. + +Properties +---------- + +``basic_block`` +~~~~~~~~~~~~~~~ + +The `BasicBlock `_ where the builder is +positioned. + +``block`` +~~~~~~~~~ + +Deprecated. Same as ``basic_block`` diff --git a/docs/_build/html/_sources/doc/llvm.core.Constant.txt b/docs/_build/html/_sources/doc/llvm.core.Constant.txt new file mode 100644 index 0000000..4f2ed08 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.Constant.txt @@ -0,0 +1,353 @@ ++-------------------------------+ +| layout: page | ++-------------------------------+ +| title: Constant (llvm.core) | ++-------------------------------+ + +``Constant``-s represents constants that appear within the code. The +values of such objects are known at creation time. Constants can be +created from Python constants. A constant expression is also a constant +-- given a ``Constant`` object, an operation (like addition, subtraction +etc) can be specified, to yield a new ``Constant`` object. Let's see +some examples: + + +.. code-block:: python + + #!/usr/bin/env python + + ti = Type.int() # a 32-bit int type + + k1 = Constant.int(ti, 42) # "int k1 = 42;" k2 = k1.add( Constant.int( + ti, 10 ) ) # "int k2 = k1 + 10;" + + tr = Type.float() + + r1 = Constant.real(tr, "3.141592") # create from a string r2 = + Constant.real(tr, 1.61803399) # create from a Python float {% + endhighlight %} + + # llvm.core.Constant + - This will become a table of contents (this text will be scraped). + {:toc} + + Static factory methods + ---------------------- + + ``null(ty)`` + ~~~~~~~~~~~~ + + A null value (all zeros) of type ``ty`` + + ``all_ones(ty)`` + ~~~~~~~~~~~~~~~~ + + All 1's value of type ``ty`` + + ``undef(ty)`` + ~~~~~~~~~~~~~ + + An undefined value of type ``ty`` + + ``int(ty, value)`` + ~~~~~~~~~~~~~~~~~~ + + Integer of type ``ty``, with value ``value`` (a Python int or long) + + ``int_signextend(ty, value)`` + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Integer of signed type ``ty`` (use for signed types) + + ``real(ty, value)`` + ~~~~~~~~~~~~~~~~~~~ + + Floating point value of type ``ty``, with value ``value`` (a Python + float) + + ``stringz(value)`` + ~~~~~~~~~~~~~~~~~~ + + A null-terminated string. ``value`` is a Python string + + ``string(value)`` + ~~~~~~~~~~~~~~~~~ + + As ``string(ty)``, but not null terminated + + ``array(ty, consts)`` + ~~~~~~~~~~~~~~~~~~~~~ + + Array of type ``ty``, initialized with ``consts`` (an iterable yielding + ``Constant`` objects of the appropriate type) + + ``struct(ty, consts)`` + ~~~~~~~~~~~~~~~~~~~~~~ + + Struct (unpacked) of type ``ty``, initialized with ``consts`` (an + iterable yielding ``Constant`` objects of the appropriate type) + + ``packed_struct(ty, consts)`` + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + As ``struct(ty, consts)`` but packed + + ``vector(consts)`` + ~~~~~~~~~~~~~~~~~~ + + Vector, initialized with ``consts`` (an iterable yielding ``Constant`` + objects of the appropriate type) + + ``sizeof(ty)`` + ~~~~~~~~~~~~~~ + + Constant value representing the sizeof the type ``ty`` + + Methods + ------- + + The following operations on constants are supported. For more details on + any operation, consult the `Constant + Expressions `_ + section of the LLVM Language Reference. + + ``k.neg()`` + ~~~~~~~~~~~ + + negation, same as ``0 - k`` + + ``k.not_()`` + ~~~~~~~~~~~~ + + 1's complement of ``k``. Note trailing underscore. + + ``k.add(k2)`` + ~~~~~~~~~~~~~ + + ``k + k2``, where ``k`` and ``k2`` are integers. + + ``k.fadd(k2)`` + ~~~~~~~~~~~~~~ + + ``k + k2``, where ``k`` and ``k2`` are floating-point. + + ``k.sub(k2)`` + ~~~~~~~~~~~~~ + + ``k - k2``, where ``k`` and ``k2`` are integers. + + ``k.fsub(k2)`` + ~~~~~~~~~~~~~~ + + ``k - k2``, where ``k`` and ``k2`` are floating-point. + + ``k.mul(k2)`` + ~~~~~~~~~~~~~ + + ``k * k2``, where ``k`` and ``k2`` are integers. + + ``k.fmul(k2)`` + ~~~~~~~~~~~~~~ + + ``k * k2``, where ``k`` and ``k2`` are floating-point. + + ``k.udiv(k2)`` + ~~~~~~~~~~~~~~ + + Quotient of unsigned division of ``k`` with ``k2`` + + ``k.sdiv(k2)`` + ~~~~~~~~~~~~~~ + + Quotient of signed division of ``k`` with ``k2`` + + ``k.fdiv(k2)`` + ~~~~~~~~~~~~~~ + + Quotient of floating point division of ``k`` with ``k2`` + + ``k.urem(k2)`` + ~~~~~~~~~~~~~~ + + Reminder of unsigned division of ``k`` with ``k2`` + + ``k.srem(k2)`` + ~~~~~~~~~~~~~~ + + Reminder of signed division of ``k`` with ``k2`` + + ``k.frem(k2)`` + ~~~~~~~~~~~~~~ + + Reminder of floating point division of ``k`` with ``k2`` + + ``k.and_(k2)`` + ~~~~~~~~~~~~~~ + + Bitwise and of ``k`` and ``k2``. Note trailing underscore. + + ``k.or_(k2)`` + ~~~~~~~~~~~~~ + + Bitwise or of ``k`` and ``k2``. Note trailing underscore. + + ``k.xor(k2)`` + ~~~~~~~~~~~~~ + + Bitwise exclusive-or of ``k`` and ``k2``. + + ``k.icmp(icmp, k2)`` + ~~~~~~~~~~~~~~~~~~~~ + + Compare ``k`` with ``k2`` using the predicate ``icmp``. See + `here `_ for list of predicates for integer + operands. + + ``k.fcmp(fcmp, k2)`` + ~~~~~~~~~~~~~~~~~~~~ + + Compare ``k`` with ``k2`` using the predicate ``fcmp``. See + `here `_ for list of predicates for real + operands. + + ``k.shl(k2)`` + ~~~~~~~~~~~~~ + + Shift ``k`` left by ``k2`` bits. + + ``k.lshr(k2)`` + ~~~~~~~~~~~~~~ + + Shift ``k`` logically right by ``k2`` bits (new bits are 0s). + + ``k.ashr(k2)`` + ~~~~~~~~~~~~~~ + + Shift ``k`` arithmetically right by ``k2`` bits (new bits are same as + previous sign bit). + + ``k.gep(indices)`` + ~~~~~~~~~~~~~~~~~~ + + GEP, see `LLVM docs `_. + + ``k.trunc(ty)`` + ~~~~~~~~~~~~~~~ + + Truncate ``k`` to a type ``ty`` of lower bitwidth. + + ``k.sext(ty)`` + ~~~~~~~~~~~~~~ + + Sign extend ``k`` to a type ``ty`` of higher bitwidth, while extending + the sign bit. + + ``k.zext(ty)`` + ~~~~~~~~~~~~~~ + + Sign extend ``k`` to a type ``ty`` of higher bitwidth, all new bits are + 0s. + + ``k.fptrunc(ty)`` + ~~~~~~~~~~~~~~~~~ + + Truncate floating point constant ``k`` to floating point type ``ty`` of + lower size than k's. + + ``k.fpext(ty)`` + ~~~~~~~~~~~~~~~ + + Extend floating point constant ``k`` to floating point type ``ty`` of + higher size than k's. + + ``k.uitofp(ty)`` + ~~~~~~~~~~~~~~~~ + + Convert an unsigned integer constant ``k`` to floating point constant of + type ``ty``. + + ``k.sitofp(ty)`` + ~~~~~~~~~~~~~~~~ + + Convert a signed integer constant ``k`` to floating point constant of + type ``ty``. + + ``k.fptoui(ty)`` + ~~~~~~~~~~~~~~~~ + + Convert a floating point constant ``k`` to an unsigned integer constant + of type ``ty``. + + ``k.fptosi(ty)`` + ~~~~~~~~~~~~~~~~ + + Convert a floating point constant ``k`` to a signed integer constant of + type ``ty``. + + ``k.ptrtoint(ty)`` + ~~~~~~~~~~~~~~~~~~ + + Convert a pointer constant ``k`` to an integer constant of type ``ty``. + + ``k.inttoptr(ty)`` + ~~~~~~~~~~~~~~~~~~ + + Convert an integer constant ``k`` to a pointer constant of type ``ty``. + + ``k.bitcast(ty)`` + ~~~~~~~~~~~~~~~~~ + + Convert ``k`` to a (equal-width) constant of type ``ty``. + + ``k.select(cond,k2,k3)`` + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Replace value with ``k2`` if the 1-bit integer constant ``cond`` is 1, + else with ``k3``. + + ``k.extract_element(idx)`` + ~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Extract value at ``idx`` (integer constant) from a vector constant + ``k``. + + ``k.insert_element(k2,idx)`` + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Insert value ``k2`` (scalar constant) at index ``idx`` (integer + constant) of vector constant ``k``. + + ``k.shuffle_vector(k2,mask)`` + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Shuffle vector constant ``k`` based on vector constants ``k2`` and + ``mask``. + + -------------- + + # Other Constant Classes + The following subclasses of ``Constant`` do not provide additional + methods, **they serve only to provide richer type information.** + + Subclass \| LLVM C++ Class \| Remarks \| + ---------\|----------------\|---------\| ``ConstantExpr`` \| + ``llvmConstantExpr`` \| A constant expression \| + ``ConstantAggregateZero``\ \| ``llvmConstantAggregateZero``\ \| All-zero + constant \| ``ConstantInt``\ \| ``llvmConstantInt``\ \| An integer + constant \| ``ConstantFP``\ \| ``llvmConstantFP``\ \| A floating-point + constant \| ``ConstantArray``\ \| ``llvmConstantArray``\ \| An array + constant \| ``ConstantStruct``\ \| ``llvmConstantStruct``\ \| A + structure constant \| ``ConstantVector``\ \| ``llvmConstantVector``\ \| + A vector constant \| ``ConstantPointerNull``\ \| + ``llvmConstantPointerNull``\ \| All-zero pointer constant \| + ``UndefValue``\ \| ``llvmUndefValue``\ \| corresponds to ``undef`` of + LLVM IR \| + + These types are helpful in ``isinstance`` checks, like so: + + {% highlight python %} ti = Type.int(32) k1 = Constant.int(ti, 42) # + int32_t k1 = 42; k2 = Constant.array(ti, [k1, k1]) # int32_t k2[] = { + k1, k1 }; + + assert isinstance(k1, ConstantInt) assert isinstance(k2, ConstantArray) + diff --git a/docs/_build/html/_sources/doc/llvm.core.Function.txt b/docs/_build/html/_sources/doc/llvm.core.Function.txt new file mode 100644 index 0000000..1d6070c --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.Function.txt @@ -0,0 +1,151 @@ ++-------------------------------+ +| layout: page | ++-------------------------------+ +| title: Function (llvm.core) | ++-------------------------------+ + +llvm.core.Function +================== + +- This will become a table of contents (this text will be scraped). + {:toc} + +Base Class +---------- + +- `llvm.core.GlobalValue `_ + +Static Constructors +------------------- + +``new(module_obj, func_ty, name)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create a function named ``name`` of type ``func_ty`` in the module +``module_obj`` and return a ``Function`` object that represents it. + +``get(module_obj, name)`` +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Return a ``Function`` object to represent the function named ``name`` in +the module ``module_obj`` or raise ``LLVMException`` if such a function +does not exist. + +``get_or_insert(module_obj, func_ty, name)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Similar to ``get``, except that if the function does not exist it is +added first, as though with ``new``. + +``intrinsic(module_obj, intrinsic_id, types)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create and return a ``Function`` object that refers to an intrinsic +function, as described `here `_. + +Properties +---------- + +``calling_convention`` +~~~~~~~~~~~~~~~~~~~~~~ + +The calling convention for the function, as listed +`here `_. + +``collector`` +~~~~~~~~~~~~~ + +A string holding the name of the garbage collection algorithm. See `LLVM +docs `_. + +``does_not_throw`` +~~~~~~~~~~~~~~~~~~ + +Setting to True sets the ``ATTR_NO_UNWIND`` attribute, False removes it. +Shortcut to using ``f.add_attribute(ATTR_NO_UNWIND)`` and +``f.remove_attribute(ATTR_NO_UNWIND)``. + +``args`` +~~~~~~~~ + +[read-only] + +List of `llvm.core.Argument `_ objects +representing the formal arguments of the function. + +``basic_block_count`` +~~~~~~~~~~~~~~~~~~~~~ + +[read-only] + +Number of basic blocks belonging to this function. Same as +``len(f.basic_blocks)`` but faster if you just want the count. + +``entry_basic_block`` +~~~~~~~~~~~~~~~~~~~~~ + +[read-only] + +The `llvm.core.BasicBlock `_ object +representing the entry basic block for this function, or ``None`` if +there are no basic blocks. + +``basic_blocks`` +~~~~~~~~~~~~~~~~ + +[read-only] + +List of `llvm.core.BasicBlock `_ objects +representing the basic blocks belonging to this function. + +``intrinsic_id`` +~~~~~~~~~~~~~~~~ + +[read-only] + +Returns the ID of the intrinsic if this object represents an intrinsic +instruction. Otherwise 0. + +Methods +------- + +``delete()`` +~~~~~~~~~~~~ + +Deletes the function from it's module. Do not hold any references to +this object after calling ``delete`` on it. + +``append_basic_block(name)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Add a new basic block named ``name``, and return a corresponding +`llvm.core.BasicBlock `_ object. Note that if +this is not the entry basic block, you'll have to add appropriate branch +instructions from other basic blocks yourself. + +``add_attribute(attr)`` +~~~~~~~~~~~~~~~~~~~~~~~ + +Add an attribute ``attr`` to the function, from the set listed above. + +``remove_attribute(attr)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Remove the attribute ``attr`` of the function. + +``viewCFG()`` +~~~~~~~~~~~~~ + +Displays the control flow graph using the GraphViz tool. + +``viewCFGOnly()`` +~~~~~~~~~~~~~~~~~ + +Displays the control flow graph using the GraphViz tool, but omitting +function bodies. + +``verify()`` +~~~~~~~~~~~~ + +Verifies the function. See `LLVM +docs `_. diff --git a/docs/_build/html/_sources/doc/llvm.core.FunctionType.txt b/docs/_build/html/_sources/doc/llvm.core.FunctionType.txt new file mode 100644 index 0000000..8d40009 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.FunctionType.txt @@ -0,0 +1,48 @@ ++-----------------------------------+ +| layout: page | ++-----------------------------------+ +| title: FunctionType (llvm.core) | ++-----------------------------------+ + +llvm.core.FunctionType +====================== + +Base Class +---------- + +- `llvm.core.Type `_ + +Properties +---------- + +``return_type`` +~~~~~~~~~~~~~~~ + +[read-only] + +A `Type `_ object, representing the return type of +the function. + +``vararg`` +~~~~~~~~~~ + +[read-only] + +``True`` if the function is variadic. + +``args`` +~~~~~~~~ + +[read-only] + +Returns an iterable object that yields `Type `_ +objects that represent, in order, the types of the arguments accepted by +the function. Used like this: + + +.. code-block:: python + + func_type = Type.function( Type.int(), [ + Type.int(), Type.int() ] ) for arg in func_type.args: assert arg.kind + == TYPE_INTEGER assert arg == Type.int() assert func_type.arg_count + == len(func_type.args) diff --git a/docs/_build/html/_sources/doc/llvm.core.GlobalValue.txt b/docs/_build/html/_sources/doc/llvm.core.GlobalValue.txt new file mode 100644 index 0000000..70dee7b --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.GlobalValue.txt @@ -0,0 +1,97 @@ ++----------------------------------+ +| layout: page | ++----------------------------------+ +| title: GlobalValue (llvm.core) | ++----------------------------------+ + +The class ``llvm.core.GlobalValue`` represents module-scope aliases, +variables and functions. Global variables are represented by the +sub-class `llvm.core.GlobalVariable `_ +and functions by `llvm.core.Function `_. + +Global values have the read-write properties ``linkage``, ``section``, +``visibility`` and ``alignment``. Use one of the following constants +(from llvm.core) as values for ``linkage`` (see `LLVM +documentaion `_ for +details on each): + +Value \| Equivalent LLVM Assembly Keyword \| +------\|----------------------------------\| ``LINKAGE_EXTERNAL`` \| +``externally_visible`` \| ``LINKAGE_AVAILABLE_EXTERNALLY`` \| +``available_externally`` \| ``LINKAGE_LINKONCE_ANY`` \| ``linkonce`` \| +``LINKAGE_LINKONCE_ODR`` \| ``linkonce_odr`` \| ``LINKAGE_WEAK_ANY`` \| +``weak`` \| ``LINKAGE_WEAK_ODR`` \| ``weak_odr`` \| +``LINKAGE_APPENDING`` \| ``appending`` \| ``LINKAGE_INTERNAL`` \| +``internal`` \| ``LINKAGE_PRIVATE`` \| ``private`` \| +``LINKAGE_DLLIMPORT`` \| ``dllimport`` \| ``LINKAGE_DLLEXPORT`` \| +``dllexport`` \| ``LINKAGE_EXTERNAL_WEAK`` \| ``extern_weak`` \| +``LINKAGE_GHOST`` \| deprecated -- do not use \| ``LINKAGE_COMMON`` \| +``common`` \| ``LINKAGE_LINKER_PRIVATE`` \| ``linker_private`` \| + +The ``section`` property can be assigned strings (like ".rodata"), which +will be used if the target supports it. Visibility property can be set +to one of thse constants (from llvm.core, see also `LLVM +docs `_): + +Value \| Equivalent LLVM Assembly Keyword \| +------\|----------------------------------\| ``VISIBILITY_DEFAULT`` \| +``default`` \| ``VISIBILITY_HIDDEN`` \| ``hidden`` \| +``VISIBILITY_PROTECTED`` \| ``protected`` \| + +The ``alignment`` property can be 0 (default), or can be set to a power +of 2. The read-only property ``is_declaration`` can be used to check if +the global is a declaration or not. The module to which the global +belongs to can be retrieved using the ``module`` property (read-only). + +llvm.core.GlobalValue +===================== + +- This will become a table of contents (this text will be scraped). + {:toc} + +Base Class +---------- + +- `llvm.core.Constant `_ + +Properties +---------- + +``linkage`` +~~~~~~~~~~~ + +The linkage type, takes one of the constants listed above (LINKAGE\_\*). + +``section`` +~~~~~~~~~~~ + +A string like ".rodata", indicating the section into which the global is +placed into. + +``visibility`` +~~~~~~~~~~~~~~ + +The visibility type, takes one of the constants listed above +(VISIBILITY\_\*). + +``alignment`` +~~~~~~~~~~~~~ + +A power-of-2 integer indicating the boundary to align to. + +``is_declaration`` +~~~~~~~~~~~~~~~~~~ + +[read-only] + +``True`` if the global is a declaration, ``False`` otherwise. + +``module`` +~~~~~~~~~~ + +[read-only] + +:: + + The module object to which this global belongs to. + diff --git a/docs/_build/html/_sources/doc/llvm.core.GlobalVariable.txt b/docs/_build/html/_sources/doc/llvm.core.GlobalVariable.txt new file mode 100644 index 0000000..392a6a0 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.GlobalVariable.txt @@ -0,0 +1,39 @@ ++-------------------------------------+ +| layout: page | ++-------------------------------------+ +| title: GlobalVariable (llvm.core) | ++-------------------------------------+ + +Global variables (``llvm.core.GlobalVariable``) are subclasses of +`llvm.core.GlobalValue `_ and represent +module-level variables. These can have optional initializers and can be +marked as constants. Global variables can be created either by using the +``add_global_variable`` method of the `Module `_ +class, or by using the static method ``GlobalVariable.new``. + + +.. code-block:: python + + # create a global variable using + add_global_variable method gv1 = + module_obj.add_global_variable(Type.int(), "gv1") + + # or equivalently, using a static constructor method + gv2 = GlobalVariable.new(module_obj, Type.int(), "gv2") {% endhighlight + %} + + Existing global variables of a module can be accessed by name using + ``module_obj.get_global_variable_named(name)`` or + ``GlobalVariable.get``. All existing global variables can be enumerated + via iterating over the property ``module_obj.global_variables``. + + {% highlight python %} # retrieve a reference to the global variable + gv1, # using the get_global_variable_named method gv1 = + module_obj.get_global_variable_named("gv1") + + # or equivalently, using the static ``get`` method: + gv2 = GlobalVariable.get(module_obj, "gv2") + + # list all global variables in a module + for gv in module_obj.global_variables: print gv.name, "of type", + gv.type diff --git a/docs/_build/html/_sources/doc/llvm.core.Instruction.txt b/docs/_build/html/_sources/doc/llvm.core.Instruction.txt new file mode 100644 index 0000000..95aeb81 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.Instruction.txt @@ -0,0 +1,242 @@ ++----------------------------------+ +| layout: page | ++----------------------------------+ +| title: Instruction (llvm.core) | ++----------------------------------+ + +An ``llvm.core.Instruction`` object represents an LLVM instruction. This +class is the root of a small hierarchy: + +:: + + Instruction + CallOrInvokeInstruction + PHINode + SwitchInstruction + CompareInstruction + +Instructions are not created directly, but via a builder. The builder +both creates instructions and adds them to a basic block at the same +time. One way of getting instruction objects are from basic blocks. + +Being derived from `llvm.core.User `_, the +instruction is-a user, i.e., an instruction in turn uses other values. +The values an instruction uses are its operands. These may be accessed +using ``operands`` property from the +`llvm.core.User `_ base. + +The name of the instruction (like ``add``, ``mul`` etc) can be got via +the ``opcode_name`` property. The ``basic_block`` property gives the +basic block to which the instruction belongs to. Note that llvmpy does +not allow free-standing instruction objects (i.e., all instructions are +created contained within a basic block). + +Classes of instructions can be got via the properties ``is_terminator``, +``is_binary_op``, ``is_shift`` etc. See below for the full list. + +- This will become a table of contents (this text will be scraped). + {:toc} + +llvm.core.Instruction +===================== + +Base Class +---------- + +- `llvm.core.User `_ + +Properties +---------- + +``basic_block`` +~~~~~~~~~~~~~~~ + +[read-only] The basic block to which this instruction belongs to. + +``is_terminator`` +~~~~~~~~~~~~~~~~~ + +[read-only] True if the instruction is a terminator instruction. + +``is_binary_op`` +~~~~~~~~~~~~~~~~ + +[read-only] True if the instruction is a binary operator. + +``is_shift`` +~~~~~~~~~~~~ + +[read-only] True if the instruction is a shift instruction. + +``is_cast`` +~~~~~~~~~~~ + +[read-only] True if the instruction is a cast instruction. + +``is_logical_shift`` +~~~~~~~~~~~~~~~~~~~~ + +[read-only] True if the instruction is a logical shift instruction. + +``is_arithmetic_shift`` +~~~~~~~~~~~~~~~~~~~~~~~ + +[read-only] True if the instruction is an arithmetic shift instruction. + +``is_associative`` +~~~~~~~~~~~~~~~~~~ + +[read-only] True if the instruction is associative. + +``is_commutative`` +~~~~~~~~~~~~~~~~~~ + +[read-only] True if the instruction is commutative. + +``is_volatile`` +~~~~~~~~~~~~~~~ + +[read-only] True if the instruction is a volatile load or store. + +``opcode`` +~~~~~~~~~~ + +[read-only] The numeric opcode value of the instruction. Do not rely on +the absolute value of this number, it may change with LLVM version. + +``opcode_name`` +~~~~~~~~~~~~~~~ + +[read-only] The name of the instruction, like ``add``, ``sub`` etc. + +-------------- + +llvm.core.CallOrInvokeInstruction +================================= + +The ``llvm.core.CallOrInvokeInstruction`` is a subclass of +``llvm.core.Instruction``, and represents either a ``call`` or an +``invoke`` instruction. + +Base Class +---------- + +- ``llvm.core.Instruction`` + +Properties +---------- + +``calling_convention`` Get or set the calling convention. See +`here `_ for possible values. + +Methods +------- + +``add_parameter_attribute(idx, attr)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Add an attribute ``attr`` to the ``idx``-th argument. See +`here `_ for possible values of ``attr``. + +``remove_parameter_attribute(idx, attr)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Remove an attribute ``attr`` from the ``idx``-th argument. See +`here `_ for possible values of ``attr``. + +``set_parameter_alignment(idx, align)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Set the alignment of the ``idx``-th argument to ``align``. ``align`` +should be a power of two. + +-------------- + +llvm.core.PHINode +================= + +The ``llvm.core.PHINode`` is a subclass of ``llvm.core.Instruction``, +and represents the ``phi`` instruction. When created (using +``Builder.phi``) the phi node contains no incoming blocks (nor their +corresponding values). To add an incoming arc to the phi node, use the +``add_incoming`` method, which takes a source block +(`llvm.core.BasicBlock `_ object) and a value +(object of `llvm.core.Value `_ or of a class +derived from it) that the phi node will take on if control branches in +from that block. + +Base Class +---------- + +- ``llvm.core.Instruction`` + +Properties +---------- + +``incoming_count`` [read-only] The number of incoming arcs for this phi +node. + +Methods +------- + +``add_incoming(value, block)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Add an incoming arc, from the +`llvm.core.BasicBlock `_ object ``block``, +with the corresponding value ``value``. ``value`` should be an object of +`llvm.core.Value `_ (or of a descendent class). + +``get_incoming_value(idx)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Returns the ``idx``-th incoming arc's value. + +``get_incoming_block(idx)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Returns the ``idx``-th incoming arc's block. + +llvm.core.SwitchInstruction # {#switchinstr} +============================================ + +(TODO describe) + +Base Class +---------- + +- ``llvm.core.Instruction`` + +Methods +------- + +``add_case(const, block)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Add another case to the switch statement. When the expression being +evaluated equals ``const``, then control branches to ``block``. Here +``const`` must be of type +`llvm.core.ConstantInt `_. + +-------------- + +llvm.core.CompareInstruction +============================ + +(TODO describe) + +Base Class +---------- + +- ``llvm.core.Instruction`` + +Properties +---------- + +``predicate`` +~~~~~~~~~~~~~ + +[read-only] + +The predicate of the compare instruction, one of the ``ICMP_*`` or +``FCMP_*`` constants. diff --git a/docs/_build/html/_sources/doc/llvm.core.IntegerType.txt b/docs/_build/html/_sources/doc/llvm.core.IntegerType.txt new file mode 100644 index 0000000..f62907c --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.IntegerType.txt @@ -0,0 +1,23 @@ ++----------------------------------+ +| layout: page | ++----------------------------------+ +| title: IntegerType (llvm.core) | ++----------------------------------+ + +llvm.core.IntegerType +===================== + +Base Class +---------- + +- `llvm.core.Type `_ + +Properties +---------- + +``width`` +~~~~~~~~~ + +[read-only] + +The width of the integer type, in number of bits. diff --git a/docs/_build/html/_sources/doc/llvm.core.Module.txt b/docs/_build/html/_sources/doc/llvm.core.Module.txt new file mode 100644 index 0000000..3dbd5c8 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.Module.txt @@ -0,0 +1,19 @@ ++-----------------------------+ +| layout: page | ++-----------------------------+ +| title: Module (llvm.core) | ++-----------------------------+ + +Modules are top-level container objects. You need to create a module +object first, before you can add global variables, aliases or functions. +Modules are created using the static method ``Module.new``: + + +.. code-block:: python + + #!/usr/bin/env python + + from llvm import \* from llvm.core import \* + + # create a module + my_module = Module.new('my_module') diff --git a/docs/_build/html/_sources/doc/llvm.core.PointerType.txt b/docs/_build/html/_sources/doc/llvm.core.PointerType.txt new file mode 100644 index 0000000..a869fba --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.PointerType.txt @@ -0,0 +1,31 @@ ++----------------------------------+ +| layout: page | ++----------------------------------+ +| title: PointerType (llvm.core) | ++----------------------------------+ + +llvm.core.PointerType +===================== + +Base Class +---------- + +- `llvm.core.Type `_ + +Properties +---------- + +``address_space`` +~~~~~~~~~~~~~~~~~ + +[read-only] + +The address space of the pointer. + +``pointee`` +~~~~~~~~~~~ + +[read-only] + +A `Type `_ object representing the type of the +value pointed to. diff --git a/docs/_build/html/_sources/doc/llvm.core.StructType.txt b/docs/_build/html/_sources/doc/llvm.core.StructType.txt new file mode 100644 index 0000000..e69de29 diff --git a/docs/_build/html/_sources/doc/llvm.core.Type.txt b/docs/_build/html/_sources/doc/llvm.core.Type.txt new file mode 100644 index 0000000..58883d7 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.Type.txt @@ -0,0 +1,128 @@ ++---------------------------+ +| layout: page | ++---------------------------+ +| title: Type (llvm.core) | ++---------------------------+ + +llvm.core.Type +============== + +- This will become a table of contents (this text will be scraped). + {:toc} + +Static Constructors +------------------- + +``int(n)`` +~~~~~~~~~~ + +Create an integer type of bit width ``n``. + +``float()`` +~~~~~~~~~~~ + +Create a 32-bit floating point type. + +``double()`` +~~~~~~~~~~~~ + +Create a 64-bit floating point type. + +``x86_fp80()`` +~~~~~~~~~~~~~~ + +Create a 80-bit 80x87-style floating point type. + +``fp128()`` +~~~~~~~~~~~ + +Create a 128-bit floating point type (112-bit mantissa). + +``ppc_fp128()`` +~~~~~~~~~~~~~~~ + +Create a 128-bit float (two 64-bits). + +``function(ret, params, vararg=False)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create a function type, having the return type ``ret`` (must be a +``Type``), accepting the parameters ``params``, where ``params`` is an +iterable, that yields ``Type`` objects representing the type of each +function argument in order. If ``vararg`` is ``True``, function is +variadic. + +``struct(eltys, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create an unpacked structure. ``eltys`` is an iterable, that yields +``Type`` objects representing the type of each element in order. + +If ``name`` is evaulates ``True`` (not empty), create an *identified +structure*; otherwise, create a *literal structure* by default. + +``packed_struct(eltys, name='')`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Like ``struct(eltys)``, but creates a packed struct. + +``array(elty, count)`` +~~~~~~~~~~~~~~~~~~~~~~ + +Creates an array type, holding ``count`` elements, each of type ``elty`` +(which should be a ``Type``). + +``pointer(pty, addrspc=0)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create a pointer to type ``pty`` (which should be a ``Type``). +``addrspc`` is an integer that represents the address space of the +pointer (see LLVM docs or ask on llvm-dev for more info). + +``void()`` +~~~~~~~~~~ + +Creates a void type. Used for function return types. + +``label()`` +~~~~~~~~~~~ + +Creates a label type. + +``opaque(name)`` +~~~~~~~~~~~~~~~~ + +Opaque `StructType `_, used for creating +self-referencing types. + +Properties +---------- + +``kind`` +~~~~~~~~ + +[read-only] + +A value (enum) representing the "type" of the object. It will be one of +the following constants defined in ``llvm.core``: + + +.. code-block:: python + + # Warning: do not rely on actual numerical + values! TYPE_VOID = 0 TYPE_FLOAT = 1 TYPE_DOUBLE = 2 TYPE_X86_FP80 + = 3 TYPE_FP128 = 4 TYPE_PPC_FP128 = 5 TYPE_LABEL = 6 TYPE_INTEGER = + 7 TYPE_FUNCTION = 8 TYPE_STRUCT = 9 TYPE_ARRAY = 10 TYPE_POINTER = + 11 TYPE_OPAQUE = 12 TYPE_VECTOR = 13 TYPE_METADATA = 14 TYPE_UNION = + 15 + + + +Example: +^^^^^^^^ + + +.. code-block:: python + + assert Type.int().kind == TYPE_INTEGER assert + Type.void().kind == TYPE_VOID diff --git a/docs/_build/html/_sources/doc/llvm.core.User.txt b/docs/_build/html/_sources/doc/llvm.core.User.txt new file mode 100644 index 0000000..0762729 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.User.txt @@ -0,0 +1,39 @@ ++---------------------------+ +| layout: page | ++---------------------------+ +| title: User (llvm.core) | ++---------------------------+ + +``User``-s are values that refer to other values. The values so refered +can be retrived by the properties of ``User``. This is the reverse of +the ``Value.uses``. Together these can be used to traverse the use-def +chains of the SSA. + +-------------- + +llvm.core.User # {#user} +======================== + +Base Class +---------- + +- `llvm.core.Value `_ + +Properties +---------- + +``operands`` +~~~~~~~~~~~~ + +[read-only] + +The list of operands (values, of type +`llvm.core.Value `_) that this value refers to. + +``operand_count`` +~~~~~~~~~~~~~~~~~ + +[read-only] + +The number of operands that this value referes to. Same as +``len(uses.operands)`` but faster if you just want the count. diff --git a/docs/_build/html/_sources/doc/llvm.core.Value.txt b/docs/_build/html/_sources/doc/llvm.core.Value.txt new file mode 100644 index 0000000..fdff4cc --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.Value.txt @@ -0,0 +1,65 @@ ++----------------------------+ +| layout: page | ++----------------------------+ +| title: Value (llvm.core) | ++----------------------------+ + +llvm.core.Value +=============== + +- This will become a table of contents (this text will be scraped). + {:toc} + +Properties +---------- + +``name`` +~~~~~~~~ + +The name of the value. + +``type`` +~~~~~~~~ + +[read-only] + +An ``llvm.core.Type`` object representing the type of the value. + +``uses`` +~~~~~~~~ + +[read-only] + +The list of values (``llvm.core.Value``) that use this value. + +``use_count`` +~~~~~~~~~~~~~ + +[read-only] + +The number of values that use (refer) this value. Same as +``len(val.uses)`` but faster if you just want the count. + +``value_id`` +~~~~~~~~~~~~ + +[read-only] + +Returns ``llvmValuegetValueID()``. Refer LLVM documentation for more +info. + +Special Methods +--------------- + +``__str__`` +~~~~~~~~~~~ + +``Value`` objects can be stringified into it's LLVM assembly language +representation. + +``__eq__`` +~~~~~~~~~~ + +``Value`` objects can be compared for equality. Internally, this +converts both arguments into their LLVM assembly representations and +compares the resultant strings. diff --git a/docs/_build/html/_sources/doc/llvm.core.VectorType.txt b/docs/_build/html/_sources/doc/llvm.core.VectorType.txt new file mode 100644 index 0000000..19b98e6 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.core.VectorType.txt @@ -0,0 +1,31 @@ ++---------------------------------+ +| layout: page | ++---------------------------------+ +| title: VectorType (llvm.core) | ++---------------------------------+ + +llvm.core.VectorType +==================== + +Base Class +---------- + +- `llvm.core.Type `_ + +Properties +---------- + +``element`` +~~~~~~~~~~~ + +[read-only] + +A `Type `_ object representing the type of the +element of the vector. + +``count`` +~~~~~~~~~ + +[read-only] + +The number of elements in the vector. diff --git a/docs/_build/html/_sources/doc/llvm.ee.EngineBuilder.txt b/docs/_build/html/_sources/doc/llvm.ee.EngineBuilder.txt new file mode 100644 index 0000000..e317acc --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.ee.EngineBuilder.txt @@ -0,0 +1,56 @@ ++----------------------------------+ +| layout: page | ++----------------------------------+ +| title: EngineBuilder (llvm.ee) | ++----------------------------------+ + +llvm.ee.EngineBuilder +===================== + +A convenient class for building +`llvm.ee.ExecutionEngine `_. Each +``EngineBuilder`` instance can only create one ``ExecutionEngine``. + +Methods +------- + +``create(self)`` +~~~~~~~~~~~~~~~~ + +Create and return a new +`ExecutionEngine `_ instance. + +Raise ``llvm.LLVMException`` if the builder cannot create an +``ExecutionEngine`` base on the given configuration. + +``force_interpreter(self)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Force the output the output ``ExecutionEngine`` to be an LLVM IR +interpreter. + +``force_jit(self)`` +~~~~~~~~~~~~~~~~~~~ + +Force the output the output ``ExecutionEngine`` to be a JIT engine. + +``opt(self, level)`` +~~~~~~~~~~~~~~~~~~~~ + +Set the code generation optimization level for a JIT engine. Valid value +of ``level`` is 0-3, inclusive. The default setting is 2. To use vector +instructions, such as SSE on Intel processors, ``level`` must be 3 +(aggressive). + +Static Factory Methods +---------------------- + +``new(module)`` +~~~~~~~~~~~~~~~ + +Create a new EngineBuilder. ``module`` must be a +`llvm.core.Module `_ instance. Its ownership is +transferred to the resulting +`ExecutionEngine `_. Therefore, it is +impossible to create more than one ``ExecutionEngine`` with a single +``EngineBuilder`` diff --git a/docs/_build/html/_sources/doc/llvm.ee.ExecutionEngine.txt b/docs/_build/html/_sources/doc/llvm.ee.ExecutionEngine.txt new file mode 100644 index 0000000..718358c --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.ee.ExecutionEngine.txt @@ -0,0 +1,62 @@ ++------------------------------------+ +| layout: page | ++------------------------------------+ +| title: ExecutionEngine (llvm.ee) | ++------------------------------------+ + +llvm.ee.ExecutionEngine +======================= + +Methods +------- + +``add_module(self, module)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Add a new module to the ExecutionEngine. The ownership is of ``module`` +is transferred. When the ``ExecutionEngine`` is destroyed, the module is +destroyed. + +``free_machine_code_for(self, fn)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Release memory used for the machine code generated for the function +``fn``. + +``get_pointer_to_function(self, fn)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Obtain the pointer to the function ``fn``. This forces the +ExecutionEngine to generate the machine code in lazy mode. + +If ``fn`` is not defined, ``ExecutionEngine`` will lookup the symbol +through ``dlsym``. + +The returned function pointer can be wrapped as a ``ctypes`` function. + +``remove_module(self, module)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Remove the ``module``. + +``run_function(self, fn, args)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Execute the function ``fn`` with an iterable of arguments ``args`` which +are of ``GenericValue``. This method returns whatever that is returned +by ``fn`` as a ``GenericValue``. + +``run_static_ctors(self)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``run_static_dtors(self)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Properties +---------- + +``target_data`` +~~~~~~~~~~~~~~~ + +Access the `TargetData `_ instance associated +with the ``ExecutionEngine``. diff --git a/docs/_build/html/_sources/doc/llvm.ee.GenericValue.txt b/docs/_build/html/_sources/doc/llvm.ee.GenericValue.txt new file mode 100644 index 0000000..8680093 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.ee.GenericValue.txt @@ -0,0 +1,64 @@ ++---------------------------------+ +| layout: page | ++---------------------------------+ +| title: GenericValue (llvm.ee) | ++---------------------------------+ + +llvm.ee.GenericValue +==================== + +- This will become a table of contents (this text will be scraped). + {:toc} + +Methods +------- + +``as_int(self)`` +~~~~~~~~~~~~~~~~ + +Return the value of this ``GenericValue`` instance as an unsigned +integer + +``as_int_signed(self)`` +~~~~~~~~~~~~~~~~~~~~~~~ + +Return the value of this ``GenericValue`` instance as a signed integer. + +``as_pointer(self)`` +~~~~~~~~~~~~~~~~~~~~ + +Return the value of this ``GenericValue`` instance as a pointer. The +type of the return value is ``int``. + +``as_real(self, ty)`` +~~~~~~~~~~~~~~~~~~~~~ + +Return the value of this ``GenericValue`` instance as a real number +which type is specified by ``ty``. ``ty`` must be a +`Type `_ instance of a real number type. + +Static Factory Methods +---------------------- + +``int(ty, intval)`` +~~~~~~~~~~~~~~~~~~~ + +Create a ``GenericValue`` instance with a ``int`` value, which is +zero-extended if necessary. The type of the value is specified by +``ty``, which is a `Type `_ instance. + +``int_signed(ty, intval)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create a ``GenericValue`` instance with a ``int`` value, which is +sign-extended if necessary. The type of the value is specified by +``ty``, which is a `Type `_ instance. + +``pointer(ty, addr)`` or ``pointer(addr)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Create a ``GenericValue`` instance with a ``int`` value, which is +representing a pointer value. + +The two argument version is **deprecated**. The old code never used +``ty`` anyway. diff --git a/docs/_build/html/_sources/doc/llvm.ee.TargetData.txt b/docs/_build/html/_sources/doc/llvm.ee.TargetData.txt new file mode 100644 index 0000000..5a774b8 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.ee.TargetData.txt @@ -0,0 +1,70 @@ ++-------------------------------+ +| layout: page | ++-------------------------------+ +| title: TargetData (llvm.ee) | ++-------------------------------+ + +llvm.ee.TargetData +================== + +- This will become a table of contents (this text will be scraped). + {:toc} + +Methods +------- + +``abi_alignment(self, ty)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Returns the minimum ABI-required alignment for the specified type +``ty``. + +``abi_size(self, ty)`` +~~~~~~~~~~~~~~~~~~~~~~ + +``callframe_alignment(self, ty)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Returns the minimum ABI-required alignment for the specified type ``ty`` +when it is part of a call frame. + +``element_at_offset(self, ty, ofs)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``offset_of_element(self, ty, el)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``preferred_alignment(self, ty_or_gv)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``size(self, ty)`` +~~~~~~~~~~~~~~~~~~ + +``store_size(self, ty)`` +~~~~~~~~~~~~~~~~~~~~~~~~ + +``__str__(self)`` +~~~~~~~~~~~~~~~~~ + +Returns the string representation. + +Static Factory Methods +---------------------- + +``new(strrep)`` +~~~~~~~~~~~~~~~ + +Construct a new ``TargetData`` instance from the string representation + +Properties +---------- + +``byte_order`` +~~~~~~~~~~~~~~ + +``pointer_size`` +~~~~~~~~~~~~~~~~ + +``target_integer_type`` +~~~~~~~~~~~~~~~~~~~~~~~ + diff --git a/docs/_build/html/_sources/doc/llvm.passes.FunctionPassManager.txt b/docs/_build/html/_sources/doc/llvm.passes.FunctionPassManager.txt new file mode 100644 index 0000000..137cf74 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.passes.FunctionPassManager.txt @@ -0,0 +1,42 @@ ++--------------------------------------------+ +| layout: page | ++--------------------------------------------+ +| title: FunctionPassManager (llvm.passes) | ++--------------------------------------------+ + +llvm.passes.FunctionPassManager +=============================== + +Base Classes +------------ + +- `llvm.passes.PassManager `_ + +Methods +------- + +``finalize(self)`` +~~~~~~~~~~~~~~~~~~ + +Finalizes all associated function passes in the LLVM system. + +Beware that this destroys all associated passes even if another pass +manager is using those passes. This may result is a segfault. + +``initialize(self)`` +~~~~~~~~~~~~~~~~~~~~ + +Initializes all associated function passes in the LLVM system. + +``run(self, fn)`` +~~~~~~~~~~~~~~~~~ + +Run all passes on the given function ``fn``. + +Static Factory Methods +---------------------- + +``new(module)`` +~~~~~~~~~~~~~~~ + +Create a ``FunctionPassManager`` instance for a given ``module``. diff --git a/docs/_build/html/_sources/doc/llvm.passes.PassManager.txt b/docs/_build/html/_sources/doc/llvm.passes.PassManager.txt new file mode 100644 index 0000000..e0a2536 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.passes.PassManager.txt @@ -0,0 +1,29 @@ ++------------------------------------+ +| layout: page | ++------------------------------------+ +| title: PassManager (llvm.passes) | ++------------------------------------+ + +llvm.passes.PassManager +======================= + +Methods +------- + +``add(self, tgt_data_or_pass_id)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Add a pass by its ID. A pass IDs are defined as ``PASS_*``. + +``run(self, module)`` +~~~~~~~~~~~~~~~~~~~~~ + +Run all passes on the given ``module``. + +Static Factory Methods +---------------------- + +``new()`` +~~~~~~~~~ + +Creates a new ``PassManager`` instance. diff --git a/docs/_build/html/_sources/doc/llvm.passes.PassManagerBuilder.txt b/docs/_build/html/_sources/doc/llvm.passes.PassManagerBuilder.txt new file mode 100644 index 0000000..99e1935 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm.passes.PassManagerBuilder.txt @@ -0,0 +1,72 @@ ++-------------------------------------------+ +| layout: page | ++-------------------------------------------+ +| title: PassManagerBuilder (llvm.passes) | ++-------------------------------------------+ + +llvm.passes.PassManagerBuilder +============================== + +Provide a simple API to populate pass managers for language like C/C++. +Refer to `LLVM API +Documentation `_ +for detail. + +Methods +------- + +``populate(self, pm)`` +~~~~~~~~~~~~~~~~~~~~~~ + +Populate a `FunctionPassManager `_ +or `PassManager `_ given as ``pm``. + +``use_inliner_with_threshold(self, threshold)`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use an inliner pass with the given ``threshold``. + +Properties +---------- + +The following properties can be overriden to customize how pass managers +are populated. + +``disable_simplify_lib_calls`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Boolean. Default is ``False``. + +``disable_unit_at_a_time`` +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Boolean. Default is ``False``. + +``disable_unroll_loops`` +~~~~~~~~~~~~~~~~~~~~~~~~ + +Boolean. Default is ``False``. + +``opt_level`` +~~~~~~~~~~~~~ + +Default is ``2``. Valid values are 0-3. Corresponds to O0, O1, O2, O3 as +in C/C++ optimization options. + +``size_level`` +~~~~~~~~~~~~~~ + +Default is ``0``. + +``vectorize`` +~~~~~~~~~~~~~ + +Default is ``False``. + +Static Factory Methods +---------------------- + +``new()`` +~~~~~~~~~ + +Creates a new ``PassManagerBuilder`` instance. diff --git a/docs/_build/html/_sources/doc/llvm_concepts.txt b/docs/_build/html/_sources/doc/llvm_concepts.txt new file mode 100644 index 0000000..c5f74aa --- /dev/null +++ b/docs/_build/html/_sources/doc/llvm_concepts.txt @@ -0,0 +1,236 @@ +******************** +LLVM Concepts +******************** + +This section explains a few concepts related to LLVM, not specific to +llvmpy. + +.. toctree:: + :hidden: + + + + +Intermediate Representation +=========================== + +The intermediate representation, or IR for short, is an in-memory data +structure that represents executable code. The IR data structures allow +for creation of types, constants, functions, function arguments, +instructions, global variables and so on. For example, to create a +function *sum* that takes two integers and returns their sum, we need to +follow these steps: + +- create an integer type *ti* of required bitwidth +- create a function type *tf* which takes two *ti* -s and returns + another *ti* +- create a function of type *tf* named *sum* +- add a *basic block* to the function +- using a helper object called an *instruction builder*, add two + instructions into the basic block: . an instruction to add the two + arguments and store the result into a temporary variable . a return + instruction to return the value of the temporary variable + +(A basic block is a block of instructions.) + +LLVM has it's own instruction set; the instructions used above (*add* +and *ret*) are from this set. The LLVM instructions are at a higher +level than the usual assembly language; for example there are +instructions related to variable argument handling, exception handling, +and garbage collection. These allow high-level languages to be +represented cleanly in the IR. + + +SSA Form and PHI Nodes +====================== + +All LLVM instructions are represented in the *Static Single Assignment* +(SSA) form. Essentially, this means that any variable can be assigned to +only once. Such a representation facilitates better optimization, among +other benefits. + +A consequence of single assignment are PHI (Φ) nodes. These are required +when a variable can be assigned a different value based on the path of +control flow. For example, the value of *b* at the end of execution of +the snippet below: + +.. code-block:: c + + a = 1; + if (v < 10) + a = 2; + b = a; + +cannot be determined statically. The value of '2' cannot be assigned to +the 'original' *a*, since *a* can be assigned to only once. There are +two *a* 's in there, and the last assignment has to choose between which +version to pick. This is accomplished by adding a PHI node: + +.. code-block:: c + + a1 = 1; + if (v < 10) + a2 = 2; + b = PHI(a1, a2); + +The PHI node selects *a1* or *a2*, depending on where the control +reached the PHI node. The argument *a1* of the PHI node is associated +with the block *"a1 = 1;"* and *a2* with the block *"a2 = 2;"*. + +PHI nodes have to be explicitly created in the LLVM IR. Accordingly the +LLVM instruction set has an instruction called *phi*. + + +LLVM Assembly Language +====================== + +The LLVM IR can be represented offline in two formats + +- a textual, human-readable form, similar to assembly language, called + the LLVM assembly language (files with .ll extension) +- a binary form, called the LLVM bitcode (files with .bc extension) + +All three formats (the in-memory IR, the LLVM assembly language and the +LLVM bitcode) represent the *same* information. Each format can be +converted into the other two formats (using LLVM APIs). + +The `LLVM demo page `_ lets you type in C or +C++ code, converts it into LLVM IR and outputs the IR as LLVM assembly +language code. + +Just to get a feel of the LLVM assembly language, here's a function in +C, and the corresponding LLVM assembly (as generated by the demo page): + +.. code-block:: c + + /* compute sum of 1..n */ + unsigned sum(unsigned n) { + if (n == 0) + return 0; + else + return n + sum(n-1); + } + +The corresponding LLVM assembly: + +.. code-block:: llvm + + ; ModuleID = '/tmp/webcompile/_7149_0.bc' + target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + target triple = "x86_64-linux-gnu" + + define i32 @sum(i32 %n) nounwind readnone { + entry: + %0 = icmp eq i32 %n, 0 ; [#uses=1] + br i1 %0, label %bb2, label %bb1 + + bb1: ; preds = %entry + %1 = add i32 %n, -1 ; [#uses=2] + %2 = icmp eq i32 %1, 0 ; [#uses=1] + br i1 %2, label %sum.exit, label %bb1.i + + bb1.i: ; preds = %bb1 + %3 = add i32 %n, -2 ; [#uses=1] + %4 = tail call i32 @sum(i32 %3) nounwind ; [#uses=1] + %5 = add i32 %4, %1 ; [#uses=1] + br label %sum.exit + + sum.exit: ; preds = %bb1.i, %bb1 + %6 = phi i32 [ %5, %bb1.i ], [ 0, %bb1 ] ; [#uses=1] + %7 = add i32 %6, %n ; [#uses=1] + ret i32 %7 + + bb2: ; preds = %entry + ret i32 0 + } + +Note the usage of SSA form. The long string called ``target datalayout`` +is a specification of the platform ABI (like endianness, sizes of types, +alignment etc.). + +The `LLVM Language Reference `_ +defines the LLVM assembly language including the entire instruction set. + + +Modules +======= + +`Modules <./llvm.core.Module.html>`_, in the LLVM IR, are similar to a +single *C* language source file (.c file). A module contains: + +- functions (declarations and definitions) +- global variables and constants +- global type aliases for structures + +Modules are top-level containers; all executable code representation is +contained within modules. Modules may be combined (linked) together to +give a bigger resultant module. During this process LLVM attempts to +reconcile the references between the combined modules. + + +Optimization and Passes +======================= + +LLVM provides quite a few optimization algorithms that work on the IR. +These algorithms are organized as *passes*. Each pass does something +specific, like combining redundant instructions. Passes need not always +optimize the IR, it can also do other operations like inserting +instrumentation code, or analyzing the IR (the result of which can be +used by passes that do optimizations) or even printing call graphs. + +This LLVM `documentation page `_ +describes all the available passes, and what they do. + +LLVM does not automatically choose to run any passes, anytime. Passes +have to be explicitly selected and run on each module. This gives you +the flexibility to choose transformations and optimizations that are +most suitable for the code in the module. + +There is an LLVM binary called +`opt `_, which lets you run passes on +bitcode files from the command line. You can write your own passes (in +C/C++, as a shared library). This can be loaded and executed by +opt+. +(Although llvmpy does not allow you to write your own passes, it does +allow you to navigate the entire IR at any stage, and perform any +transforms on it as you like.) + +A "pass manager" is responsible for loading passes, selecting the +correct objects to run them on (for example, a pass may work only on +functions, individually) and actually runs them. ``opt`` is a +command-line wrapper for the pass manager. + +LLVM defines two kinds of pass managers: + +- The + `FunctionPassManager `_ + manages function or basic-block passes. These lighter weight passes + can be used immediately after each generated function to reduce + memory footprint. + +- The + `PassManager `_ + manages module passes for optimizing the entire module. + + +Bitcode +======= + +LLVM IR can be represented as a bitcode format for disk storage. It is +`suitable for fast loading by JIT +compiler `_. See `LLVM +documentation `_ for detail +about the bitcode format. + + +Execution Engine, JIT and Interpreter +===================================== + +The *execution engine* implements execution of LLVM IR through an +interpreter or a JIT dynamic compiler. An *execution engine* can contain +multiple modules. + + **Note** + + Inter-module reference is not possible. That is module ``A`` cannot + call a function in module ``B``, directly. + diff --git a/docs/_build/html/_sources/doc/llvmpy_package.txt b/docs/_build/html/_sources/doc/llvmpy_package.txt new file mode 100644 index 0000000..eac3814 --- /dev/null +++ b/docs/_build/html/_sources/doc/llvmpy_package.txt @@ -0,0 +1,89 @@ +*********************** +The llvmpy Package +*********************** + +The llvmpy is a Python package, consisting of 6 modules, that wrap over +enough LLVM APIs to allow the implementation of your own compiler/VM +backend in pure Python. If you're come this far, you probably know why +this is a good idea. + +Out of the 6 modules, one is an "extension" module (i.e., it is written +in C), and another one is a small private utility module, which leaves 4 +public modules. These are: + +- *llvm* -- top-level package, common classes (like exceptions) +- *llvm.core* -- IR-related APIs +- *llvm.ee* -- execution engine related APIs +- *llvm.passes* -- pass manager and passes related APIs + +The modules contain only classes and (integer) constants. Mostly simple +Python constructs are used (deliberately) -- +`property() `_ and +`property +decorators `_ are +probably the most exotic animals around. All classes are "new style" +classes. The APIs are designed to be navigable (and guessable!) once you +know a few conventions. These conventions are highlighted in the +sections below. + +Here is a quick overview of the contents of each package: + +llvm +---- + +- LLVMException -- exception class (currently the only one) + +llvm.core +--------- + +- `Module `_ -- represents an LLVM Module +- `Type `_ -- represents an LLVM Type +- `Value `_ -- represents an LLVM Value, including: + globals, constants, variables, arguments, functions, instructions, + etc.. +- `BasicBlock `_ -- another derived of + Value, represents an LLVM basic block +- `Builder `_ -- used for creating + instructions, wraps LLVM IRBuilder helper class +- constants *TYPE\_\** that represents various types +- constants *CC\_\** that represent calling conventions +- constants *ICMP\_\** and *FCMP\_\** that represent integer and real + comparison predicates (like less than, greater than etc.) +- constants *LINKAGE\_\** that represent linkage of symbols (external, + internal etc.) +- constants *VISIBILITY\_\** that represents visibility of symbols + (default, hidden, protected) +- constants *ATTR\_\** that represent function parameter attributes + +llvm.ee +------- + +- `ExecutionEngine `_ -- represents an + execution engine (which can be an either an interpreter or a JIT) +- `TargetData `_ -- represents the ABI of the + target platform (details like sizes and alignment of primitive types, + endinanness etc) + +llvm.passes +----------- + +- `PassManager `_ -- represents an LLVM + pass manager +- `FunctionPassManager `_ -- + represents an LLVM function pass manager +- constants *PASS\_\** that represent various passes + +A note on the importing of these modules +---------------------------------------- + +Pythonically, modules are imported with the statement +``import llvm.core``. However, you might find it more convenient to +import llvmpy modules thus: + + +.. code-block:: python + + from llvm import * + from llvm.core import * + from llvm.ee import * + from llvm.passes import * diff --git a/docs/_build/html/_sources/doc/types.txt b/docs/_build/html/_sources/doc/types.txt new file mode 100644 index 0000000..f3b9da2 --- /dev/null +++ b/docs/_build/html/_sources/doc/types.txt @@ -0,0 +1,123 @@ ++----------------+ +| layout: page | ++----------------+ +| title: Types | ++----------------+ + +Types are what you think they are. A instance of +`llvm.core.Type `_, or one of its derived classes, +represent a type. llvmpy does not use as many classes to represent +types as does LLVM itself. Some types are represented using +`llvm.core.Type `_ itself and the rest are +represented using derived classes of +`llvm.core.Type `_. As usual, an instance is +created via one of the static methods of `Type `_. +These methods return an instance of either +`llvm.core.Type `_ itself or one of its derived +classes. + +The following table lists all the available types along with the static +method which has to be used to construct it and the name of the class +whose object is actually returned by the static method. + +Name \| Constructor Method \| Class \| +-----\|:------------------:\|:-----:\| integer of bitwidth *n* \| +Type.int(n) \| `IntegerType `_ \| 32-bit +float \| Type.float() \| `Type `_ \| 64-bit double +\| Type.double() \| `Type `_ \| 80-bit float \| +Type.x86\_fp80() \| `Type `_ \| 128-bit float +(112-bit mantissa) \| Type.fp128() \| `Type `_ \| +128-bit float (two 64-bits) \| Type.ppc\_fp128() \| +`Type `_ \| function \| Type.function(r, p, v) \| +`FunctionType `_ \| unpacked struct \| +Type.struct(eltys, name) \| `StructType `_ \| +packed struct \| Type.packed\_struct(eltys, name) \| +`StructType `_ \| opaque struct \| +Type.opaque(name) \| `StructType `_ \| array +\| Type.array(elty, count) \| `ArrayType `_ \| +pointer to value of type *pty* \| Type.pointer(pty, addrspc) \| +`PointerType `_ \| vector \| +Type.vector(elty, count) \| `VectorType `_ \| +void \| Type.void() \| `Type `_ \| label \| +Type.label() \| `Type `_ \| + +The class hierarchy is: + +:: + + Type + IntegerType + FunctionType + StructType + ArrayType + PointerType + VectorType + +-------------- + +An Example +---------- + +Here is an example that demonstrates the creation of types: + + +.. code-block:: python + + #!/usr/bin/env python + + # integers + int_ty = Type.int() bool_ty = Type.int(1) int_64bit = Type.int(64) + + # floats + sprec_real = Type.float() dprec_real = Type.double() + + # arrays and vectors + intar_ty = Type.array( int_ty, 10 ) # "typedef int intar_ty[10];" + twodim = Type.array( intar_ty , 10 ) # "typedef int twodim[10][10];" + vec = Type.array( int_ty, 10 ) + + # structures + s1_ty = Type.struct( [ int_ty, sprec_real ] ) # "struct s1_ty { int + v1; float v2; };" + + # pointers + intptr_ty = Type.pointer(int_ty) # "typedef int \*intptr_ty;" + + # functions + f1 = Type.function( int_ty, [ int_ty ] ) # functions that take 1 + int_ty and return 1 int_ty + + f2 = Type.function( Type.void(), [ int_ty, int_ty ] ) # functions that + take 2 int_tys and return nothing + + f3 = Type.function( Type.void(), ( int_ty, int_ty ) ) # same as f2; + any iterable can be used + + fnargs = [ Type.pointer( Type.int(8) ) ] printf = Type.function( + Type.int(), fnargs, True ) # variadic function + + + +-------------- + +Another Example: Recursive Type +------------------------------- + +The type system was rewritten in LLVM 3.0. The old opaque type was +removed. Instead, identified ``StructType`` can now be defined without a +body. Doing so creates a opaque structure. One can then set the body +after the construction of a structure. + +(See `LLVM +Blog `_ +for detail about the new type system.) + +The following code defines a opaque structure, named "mystruct". The +body is defined after the construction using ``StructType.set_body``. +The second subtype is a pointer to a "mystruct" type. + + +.. code-block:: python + + ts = Type.opaque('mystruct') + ts.set_body([Type.int(), Type.pointer(ts)]) diff --git a/docs/_build/html/_sources/doc/userguide.txt b/docs/_build/html/_sources/doc/userguide.txt new file mode 100644 index 0000000..1bb4b4f --- /dev/null +++ b/docs/_build/html/_sources/doc/userguide.txt @@ -0,0 +1,17 @@ +************ +User Guide +************ + +llvmpy provides Python bindings for LLVM. This document explains how +you can setup and use it. A working knowledge of Python and a basic idea +of LLVM is assumed. + +.. toctree:: + :maxdepth: 1 + + getting_started.rst + llvm_concepts.rst + llvmpy_package.rst + + + diff --git a/docs/_build/html/_sources/doc/values.txt b/docs/_build/html/_sources/doc/values.txt new file mode 100644 index 0000000..a999740 --- /dev/null +++ b/docs/_build/html/_sources/doc/values.txt @@ -0,0 +1,78 @@ ++-----------------+ +| layout: page | ++-----------------+ +| title: Values | ++-----------------+ + +`llvm.core.Value `_ is the base class of all +values computed by a program that may be used as operands to other +values. A value has a type associated with it (an object of +`llvm.core.Type `_). + +The class hierarchy is: + +:: + + Value + User + Constant + ConstantExpr + ConstantAggregateZero + ConstantInt + ConstantFP + ConstantArray + ConstantStruct + ConstantVector + ConstantPointerNull + UndefValue + GlobalValue + GlobalVariable + Function + Instruction + CallOrInvokeInstruction + PHINode + SwitchInstruction + CompareInstruction + Argument + BasicBlock + +The `Value `_ class is abstract, it's not meant to +be instantiated. `User `_ is a +`Value `_ that in turn uses (i.e., can refer to) +other values (for e.g., a constant expression 1+2 refers to two constant +values 1 and 2). + +`Constant `_-s represent constants that appear +within code or as initializers of globals. They are constructed using +static methods of `Constant `_. Various types +of constants are represented by various subclasses of +`Constant `_. However, most of them are empty +and do not provide any additional attributes or methods over +`Constant `_. + +The `Function `_ object represents an instance of a +function type. Such objects contain +`Argument `_ objects, which represent the +actual, local-variable-like arguments of the function (not to be +confused with the arguments returned by a function *type* object -- +these represent the *type* of the arguments). + +The various `Instruction `_-s are created by +the `Builder `_ class. Most instructions are +represented by `Instruction `_ itself, but +there are a few subclasses that represent interesting instructions. + +`Value `_ objects have a type (read-only), and a +name (read-write). + +**Related Links** `functions `_, +`comparision `_, +`llvm.core.Value `_, +`llvm.core.User `_, +`llvm.core.Constant `_, +`llvm.core.GlobalValue `_, +`llvm.core.GlobalVariable `_, +`llvm.core.Argument `_, +`llvm.core.Instruction `_, +`llvm.core.Builder `_, +`llvm.core.BasicBlock `_ diff --git a/docs/_build/html/_sources/index.txt b/docs/_build/html/_sources/index.txt new file mode 100644 index 0000000..fbca2ec --- /dev/null +++ b/docs/_build/html/_sources/index.txt @@ -0,0 +1,25 @@ +.. llvmpy documentation master file, created by + sphinx-quickstart on Wed Aug 8 17:33:58 2012. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Documentation for llvmpy +================= + +Contents: + +.. toctree:: + :titlesonly: + :maxdepth: 2 + + doc/userguide.rst + doc/examples.rst + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/_build/html/_static/ajax-loader.gif b/docs/_build/html/_static/ajax-loader.gif new file mode 100644 index 0000000..61faf8c Binary files /dev/null and b/docs/_build/html/_static/ajax-loader.gif differ diff --git a/docs/_build/html/_static/basic.css b/docs/_build/html/_static/basic.css new file mode 100644 index 0000000..f0379f3 --- /dev/null +++ b/docs/_build/html/_static/basic.css @@ -0,0 +1,540 @@ +/* + * basic.css + * ~~~~~~~~~ + * + * Sphinx stylesheet -- basic theme. + * + * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar input[type="text"] { + width: 170px; +} + +div.sphinxsidebar input[type="submit"] { + width: 30px; +} + +img { + border: 0; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin: 10px 0 0 20px; + padding: 0; +} + +ul.search li { + padding: 5px 0 5px 20px; + background-image: url(file.png); + background-repeat: no-repeat; + background-position: 0 7px; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li div.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable dl, table.indextable dd { + margin-top: 0; + margin-bottom: 0; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- general body styles --------------------------------------------------- */ + +a.headerlink { + visibility: hidden; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.field-list ul { + padding-left: 1em; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px 7px 0 7px; + background-color: #ffe; + width: 40%; + float: right; +} + +p.sidebar-title { + font-weight: bold; +} + +/* -- topics ---------------------------------------------------------------- */ + +div.topic { + border: 1px solid #ccc; + padding: 7px 7px 0 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +div.admonition dl { + margin-bottom: 0; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + border: 0; + border-collapse: collapse; +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +table.field-list td, table.field-list th { + border: 0 !important; +} + +table.footnote td, table.footnote th { + border: 0 !important; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +dl { + margin-bottom: 15px; +} + +dd p { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +dt:target, .highlighted { + background-color: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +.refcount { + color: #060; +} + +.optional { + font-size: 1.3em; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +td.linenos pre { + padding: 5px 0px; + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + margin-left: 0.5em; +} + +table.highlighttable td { + padding: 0 0.5em 0 0.5em; +} + +tt.descname { + background-color: transparent; + font-weight: bold; + font-size: 1.2em; +} + +tt.descclassname { + background-color: transparent; +} + +tt.xref, a tt { + background-color: transparent; + font-weight: bold; +} + +h1 tt, h2 tt, h3 tt, h4 tt, h5 tt, h6 tt { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file diff --git a/docs/_build/html/_static/comment-bright.png b/docs/_build/html/_static/comment-bright.png new file mode 100644 index 0000000..551517b Binary files /dev/null and b/docs/_build/html/_static/comment-bright.png differ diff --git a/docs/_build/html/_static/comment-close.png b/docs/_build/html/_static/comment-close.png new file mode 100644 index 0000000..09b54be Binary files /dev/null and b/docs/_build/html/_static/comment-close.png differ diff --git a/docs/_build/html/_static/comment.png b/docs/_build/html/_static/comment.png new file mode 100644 index 0000000..92feb52 Binary files /dev/null and b/docs/_build/html/_static/comment.png differ diff --git a/docs/_build/html/_static/default.css b/docs/_build/html/_static/default.css new file mode 100644 index 0000000..21f3f50 --- /dev/null +++ b/docs/_build/html/_static/default.css @@ -0,0 +1,256 @@ +/* + * default.css_t + * ~~~~~~~~~~~~~ + * + * Sphinx stylesheet -- default theme. + * + * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +@import url("basic.css"); + +/* -- page layout ----------------------------------------------------------- */ + +body { + font-family: sans-serif; + font-size: 100%; + background-color: #11303d; + color: #000; + margin: 0; + padding: 0; +} + +div.document { + background-color: #1c4e63; +} + +div.documentwrapper { + float: left; + width: 100%; +} + +div.bodywrapper { + margin: 0 0 0 230px; +} + +div.body { + background-color: #ffffff; + color: #000000; + padding: 0 20px 30px 20px; +} + +div.footer { + color: #ffffff; + width: 100%; + padding: 9px 0 9px 0; + text-align: center; + font-size: 75%; +} + +div.footer a { + color: #ffffff; + text-decoration: underline; +} + +div.related { + background-color: #133f52; + line-height: 30px; + color: #ffffff; +} + +div.related a { + color: #ffffff; +} + +div.sphinxsidebar { +} + +div.sphinxsidebar h3 { + font-family: 'Trebuchet MS', sans-serif; + color: #ffffff; + font-size: 1.4em; + font-weight: normal; + margin: 0; + padding: 0; +} + +div.sphinxsidebar h3 a { + color: #ffffff; +} + +div.sphinxsidebar h4 { + font-family: 'Trebuchet MS', sans-serif; + color: #ffffff; + font-size: 1.3em; + font-weight: normal; + margin: 5px 0 0 0; + padding: 0; +} + +div.sphinxsidebar p { + color: #ffffff; +} + +div.sphinxsidebar p.topless { + margin: 5px 10px 10px 10px; +} + +div.sphinxsidebar ul { + margin: 10px; + padding: 0; + color: #ffffff; +} + +div.sphinxsidebar a { + color: #98dbcc; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + + + +/* -- hyperlink styles ------------------------------------------------------ */ + +a { + color: #355f7c; + text-decoration: none; +} + +a:visited { + color: #355f7c; + text-decoration: none; +} + +a:hover { + text-decoration: underline; +} + + + +/* -- body styles ----------------------------------------------------------- */ + +div.body h1, +div.body h2, +div.body h3, +div.body h4, +div.body h5, +div.body h6 { + font-family: 'Trebuchet MS', sans-serif; + background-color: #f2f2f2; + font-weight: normal; + color: #20435c; + border-bottom: 1px solid #ccc; + margin: 20px -20px 10px -20px; + padding: 3px 0 3px 10px; +} + +div.body h1 { margin-top: 0; font-size: 200%; } +div.body h2 { font-size: 160%; } +div.body h3 { font-size: 140%; } +div.body h4 { font-size: 120%; } +div.body h5 { font-size: 110%; } +div.body h6 { font-size: 100%; } + +a.headerlink { + color: #c60f0f; + font-size: 0.8em; + padding: 0 4px 0 4px; + text-decoration: none; +} + +a.headerlink:hover { + background-color: #c60f0f; + color: white; +} + +div.body p, div.body dd, div.body li { + text-align: justify; + line-height: 130%; +} + +div.admonition p.admonition-title + p { + display: inline; +} + +div.admonition p { + margin-bottom: 5px; +} + +div.admonition pre { + margin-bottom: 5px; +} + +div.admonition ul, div.admonition ol { + margin-bottom: 5px; +} + +div.note { + background-color: #eee; + border: 1px solid #ccc; +} + +div.seealso { + background-color: #ffc; + border: 1px solid #ff6; +} + +div.topic { + background-color: #eee; +} + +div.warning { + background-color: #ffe4e4; + border: 1px solid #f66; +} + +p.admonition-title { + display: inline; +} + +p.admonition-title:after { + content: ":"; +} + +pre { + padding: 5px; + background-color: #eeffcc; + color: #333333; + line-height: 120%; + border: 1px solid #ac9; + border-left: none; + border-right: none; +} + +tt { + background-color: #ecf0f3; + padding: 0 1px 0 1px; + font-size: 0.95em; +} + +th { + background-color: #ede; +} + +.warning tt { + background: #efc2c2; +} + +.note tt { + background: #d6d6d6; +} + +.viewcode-back { + font-family: sans-serif; +} + +div.viewcode-block:target { + background-color: #f4debf; + border-top: 1px solid #ac9; + border-bottom: 1px solid #ac9; +} \ No newline at end of file diff --git a/docs/_build/html/_static/doctools.js b/docs/_build/html/_static/doctools.js new file mode 100644 index 0000000..d4619fd --- /dev/null +++ b/docs/_build/html/_static/doctools.js @@ -0,0 +1,247 @@ +/* + * doctools.js + * ~~~~~~~~~~~ + * + * Sphinx JavaScript utilities for all documentation. + * + * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/** + * select a different prefix for underscore + */ +$u = _.noConflict(); + +/** + * make the code below compatible with browsers without + * an installed firebug like debugger +if (!window.console || !console.firebug) { + var names = ["log", "debug", "info", "warn", "error", "assert", "dir", + "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace", + "profile", "profileEnd"]; + window.console = {}; + for (var i = 0; i < names.length; ++i) + window.console[names[i]] = function() {}; +} + */ + +/** + * small helper function to urldecode strings + */ +jQuery.urldecode = function(x) { + return decodeURIComponent(x).replace(/\+/g, ' '); +} + +/** + * small helper function to urlencode strings + */ +jQuery.urlencode = encodeURIComponent; + +/** + * This function returns the parsed url parameters of the + * current request. Multiple values per key are supported, + * it will always return arrays of strings for the value parts. + */ +jQuery.getQueryParameters = function(s) { + if (typeof s == 'undefined') + s = document.location.search; + var parts = s.substr(s.indexOf('?') + 1).split('&'); + var result = {}; + for (var i = 0; i < parts.length; i++) { + var tmp = parts[i].split('=', 2); + var key = jQuery.urldecode(tmp[0]); + var value = jQuery.urldecode(tmp[1]); + if (key in result) + result[key].push(value); + else + result[key] = [value]; + } + return result; +}; + +/** + * small function to check if an array contains + * a given item. + */ +jQuery.contains = function(arr, item) { + for (var i = 0; i < arr.length; i++) { + if (arr[i] == item) + return true; + } + return false; +}; + +/** + * highlight a given string on a jquery object by wrapping it in + * span elements with the given class name. + */ +jQuery.fn.highlightText = function(text, className) { + function highlight(node) { + if (node.nodeType == 3) { + var val = node.nodeValue; + var pos = val.toLowerCase().indexOf(text); + if (pos >= 0 && !jQuery(node.parentNode).hasClass(className)) { + var span = document.createElement("span"); + span.className = className; + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + node.parentNode.insertBefore(span, node.parentNode.insertBefore( + document.createTextNode(val.substr(pos + text.length)), + node.nextSibling)); + node.nodeValue = val.substr(0, pos); + } + } + else if (!jQuery(node).is("button, select, textarea")) { + jQuery.each(node.childNodes, function() { + highlight(this); + }); + } + } + return this.each(function() { + highlight(this); + }); +}; + +/** + * Small JavaScript module for the documentation. + */ +var Documentation = { + + init : function() { + this.fixFirefoxAnchorBug(); + this.highlightSearchWords(); + this.initIndexTable(); + }, + + /** + * i18n support + */ + TRANSLATIONS : {}, + PLURAL_EXPR : function(n) { return n == 1 ? 0 : 1; }, + LOCALE : 'unknown', + + // gettext and ngettext don't access this so that the functions + // can safely bound to a different name (_ = Documentation.gettext) + gettext : function(string) { + var translated = Documentation.TRANSLATIONS[string]; + if (typeof translated == 'undefined') + return string; + return (typeof translated == 'string') ? translated : translated[0]; + }, + + ngettext : function(singular, plural, n) { + var translated = Documentation.TRANSLATIONS[singular]; + if (typeof translated == 'undefined') + return (n == 1) ? singular : plural; + return translated[Documentation.PLURALEXPR(n)]; + }, + + addTranslations : function(catalog) { + for (var key in catalog.messages) + this.TRANSLATIONS[key] = catalog.messages[key]; + this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')'); + this.LOCALE = catalog.locale; + }, + + /** + * add context elements like header anchor links + */ + addContextElements : function() { + $('div[id] > :header:first').each(function() { + $('\u00B6'). + attr('href', '#' + this.id). + attr('title', _('Permalink to this headline')). + appendTo(this); + }); + $('dt[id]').each(function() { + $('\u00B6'). + attr('href', '#' + this.id). + attr('title', _('Permalink to this definition')). + appendTo(this); + }); + }, + + /** + * workaround a firefox stupidity + */ + fixFirefoxAnchorBug : function() { + if (document.location.hash && $.browser.mozilla) + window.setTimeout(function() { + document.location.href += ''; + }, 10); + }, + + /** + * highlight the search words provided in the url in the text + */ + highlightSearchWords : function() { + var params = $.getQueryParameters(); + var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : []; + if (terms.length) { + var body = $('div.body'); + window.setTimeout(function() { + $.each(terms, function() { + body.highlightText(this.toLowerCase(), 'highlighted'); + }); + }, 10); + $('') + .appendTo($('#searchbox')); + } + }, + + /** + * init the domain index toggle buttons + */ + initIndexTable : function() { + var togglers = $('img.toggler').click(function() { + var src = $(this).attr('src'); + var idnum = $(this).attr('id').substr(7); + $('tr.cg-' + idnum).toggle(); + if (src.substr(-9) == 'minus.png') + $(this).attr('src', src.substr(0, src.length-9) + 'plus.png'); + else + $(this).attr('src', src.substr(0, src.length-8) + 'minus.png'); + }).css('display', ''); + if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) { + togglers.click(); + } + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords : function() { + $('#searchbox .highlight-link').fadeOut(300); + $('span.highlighted').removeClass('highlighted'); + }, + + /** + * make the url absolute + */ + makeURL : function(relativeURL) { + return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL; + }, + + /** + * get the current relative url + */ + getCurrentURL : function() { + var path = document.location.pathname; + var parts = path.split(/\//); + $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() { + if (this == '..') + parts.pop(); + }); + var url = parts.join('/'); + return path.substring(url.lastIndexOf('/') + 1, path.length - 1); + } +}; + +// quick alias for translations +_ = Documentation.gettext; + +$(document).ready(function() { + Documentation.init(); +}); diff --git a/docs/_build/html/_static/down-pressed.png b/docs/_build/html/_static/down-pressed.png new file mode 100644 index 0000000..6f7ad78 Binary files /dev/null and b/docs/_build/html/_static/down-pressed.png differ diff --git a/docs/_build/html/_static/down.png b/docs/_build/html/_static/down.png new file mode 100644 index 0000000..3003a88 Binary files /dev/null and b/docs/_build/html/_static/down.png differ diff --git a/docs/_build/html/_static/file.png b/docs/_build/html/_static/file.png new file mode 100644 index 0000000..d18082e Binary files /dev/null and b/docs/_build/html/_static/file.png differ diff --git a/docs/_build/html/_static/jquery.js b/docs/_build/html/_static/jquery.js new file mode 100644 index 0000000..7c24308 --- /dev/null +++ b/docs/_build/html/_static/jquery.js @@ -0,0 +1,154 @@ +/*! + * jQuery JavaScript Library v1.4.2 + * http://jquery.com/ + * + * Copyright 2010, John Resig + * Dual licensed under the MIT or GPL Version 2 licenses. + * http://jquery.org/license + * + * Includes Sizzle.js + * http://sizzlejs.com/ + * Copyright 2010, The Dojo Foundation + * Released under the MIT, BSD, and GPL Licenses. + * + * Date: Sat Feb 13 22:33:48 2010 -0500 + */ +(function(A,w){function ma(){if(!c.isReady){try{s.documentElement.doScroll("left")}catch(a){setTimeout(ma,1);return}c.ready()}}function Qa(a,b){b.src?c.ajax({url:b.src,async:false,dataType:"script"}):c.globalEval(b.text||b.textContent||b.innerHTML||"");b.parentNode&&b.parentNode.removeChild(b)}function X(a,b,d,f,e,j){var i=a.length;if(typeof b==="object"){for(var o in b)X(a,o,b[o],f,e,d);return a}if(d!==w){f=!j&&f&&c.isFunction(d);for(o=0;o)[^>]*$|^#([\w-]+)$/,Ua=/^.[^:#\[\.,]*$/,Va=/\S/, +Wa=/^(\s|\u00A0)+|(\s|\u00A0)+$/g,Xa=/^<(\w+)\s*\/?>(?:<\/\1>)?$/,P=navigator.userAgent,xa=false,Q=[],L,$=Object.prototype.toString,aa=Object.prototype.hasOwnProperty,ba=Array.prototype.push,R=Array.prototype.slice,ya=Array.prototype.indexOf;c.fn=c.prototype={init:function(a,b){var d,f;if(!a)return this;if(a.nodeType){this.context=this[0]=a;this.length=1;return this}if(a==="body"&&!b){this.context=s;this[0]=s.body;this.selector="body";this.length=1;return this}if(typeof a==="string")if((d=Ta.exec(a))&& +(d[1]||!b))if(d[1]){f=b?b.ownerDocument||b:s;if(a=Xa.exec(a))if(c.isPlainObject(b)){a=[s.createElement(a[1])];c.fn.attr.call(a,b,true)}else a=[f.createElement(a[1])];else{a=sa([d[1]],[f]);a=(a.cacheable?a.fragment.cloneNode(true):a.fragment).childNodes}return c.merge(this,a)}else{if(b=s.getElementById(d[2])){if(b.id!==d[2])return T.find(a);this.length=1;this[0]=b}this.context=s;this.selector=a;return this}else if(!b&&/^\w+$/.test(a)){this.selector=a;this.context=s;a=s.getElementsByTagName(a);return c.merge(this, +a)}else return!b||b.jquery?(b||T).find(a):c(b).find(a);else if(c.isFunction(a))return T.ready(a);if(a.selector!==w){this.selector=a.selector;this.context=a.context}return c.makeArray(a,this)},selector:"",jquery:"1.4.2",length:0,size:function(){return this.length},toArray:function(){return R.call(this,0)},get:function(a){return a==null?this.toArray():a<0?this.slice(a)[0]:this[a]},pushStack:function(a,b,d){var f=c();c.isArray(a)?ba.apply(f,a):c.merge(f,a);f.prevObject=this;f.context=this.context;if(b=== +"find")f.selector=this.selector+(this.selector?" ":"")+d;else if(b)f.selector=this.selector+"."+b+"("+d+")";return f},each:function(a,b){return c.each(this,a,b)},ready:function(a){c.bindReady();if(c.isReady)a.call(s,c);else Q&&Q.push(a);return this},eq:function(a){return a===-1?this.slice(a):this.slice(a,+a+1)},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},slice:function(){return this.pushStack(R.apply(this,arguments),"slice",R.call(arguments).join(","))},map:function(a){return this.pushStack(c.map(this, +function(b,d){return a.call(b,d,b)}))},end:function(){return this.prevObject||c(null)},push:ba,sort:[].sort,splice:[].splice};c.fn.init.prototype=c.fn;c.extend=c.fn.extend=function(){var a=arguments[0]||{},b=1,d=arguments.length,f=false,e,j,i,o;if(typeof a==="boolean"){f=a;a=arguments[1]||{};b=2}if(typeof a!=="object"&&!c.isFunction(a))a={};if(d===b){a=this;--b}for(;b
a"; +var e=d.getElementsByTagName("*"),j=d.getElementsByTagName("a")[0];if(!(!e||!e.length||!j)){c.support={leadingWhitespace:d.firstChild.nodeType===3,tbody:!d.getElementsByTagName("tbody").length,htmlSerialize:!!d.getElementsByTagName("link").length,style:/red/.test(j.getAttribute("style")),hrefNormalized:j.getAttribute("href")==="/a",opacity:/^0.55$/.test(j.style.opacity),cssFloat:!!j.style.cssFloat,checkOn:d.getElementsByTagName("input")[0].value==="on",optSelected:s.createElement("select").appendChild(s.createElement("option")).selected, +parentNode:d.removeChild(d.appendChild(s.createElement("div"))).parentNode===null,deleteExpando:true,checkClone:false,scriptEval:false,noCloneEvent:true,boxModel:null};b.type="text/javascript";try{b.appendChild(s.createTextNode("window."+f+"=1;"))}catch(i){}a.insertBefore(b,a.firstChild);if(A[f]){c.support.scriptEval=true;delete A[f]}try{delete b.test}catch(o){c.support.deleteExpando=false}a.removeChild(b);if(d.attachEvent&&d.fireEvent){d.attachEvent("onclick",function k(){c.support.noCloneEvent= +false;d.detachEvent("onclick",k)});d.cloneNode(true).fireEvent("onclick")}d=s.createElement("div");d.innerHTML="";a=s.createDocumentFragment();a.appendChild(d.firstChild);c.support.checkClone=a.cloneNode(true).cloneNode(true).lastChild.checked;c(function(){var k=s.createElement("div");k.style.width=k.style.paddingLeft="1px";s.body.appendChild(k);c.boxModel=c.support.boxModel=k.offsetWidth===2;s.body.removeChild(k).style.display="none"});a=function(k){var n= +s.createElement("div");k="on"+k;var r=k in n;if(!r){n.setAttribute(k,"return;");r=typeof n[k]==="function"}return r};c.support.submitBubbles=a("submit");c.support.changeBubbles=a("change");a=b=d=e=j=null}})();c.props={"for":"htmlFor","class":"className",readonly:"readOnly",maxlength:"maxLength",cellspacing:"cellSpacing",rowspan:"rowSpan",colspan:"colSpan",tabindex:"tabIndex",usemap:"useMap",frameborder:"frameBorder"};var G="jQuery"+J(),Ya=0,za={};c.extend({cache:{},expando:G,noData:{embed:true,object:true, +applet:true},data:function(a,b,d){if(!(a.nodeName&&c.noData[a.nodeName.toLowerCase()])){a=a==A?za:a;var f=a[G],e=c.cache;if(!f&&typeof b==="string"&&d===w)return null;f||(f=++Ya);if(typeof b==="object"){a[G]=f;e[f]=c.extend(true,{},b)}else if(!e[f]){a[G]=f;e[f]={}}a=e[f];if(d!==w)a[b]=d;return typeof b==="string"?a[b]:a}},removeData:function(a,b){if(!(a.nodeName&&c.noData[a.nodeName.toLowerCase()])){a=a==A?za:a;var d=a[G],f=c.cache,e=f[d];if(b){if(e){delete e[b];c.isEmptyObject(e)&&c.removeData(a)}}else{if(c.support.deleteExpando)delete a[c.expando]; +else a.removeAttribute&&a.removeAttribute(c.expando);delete f[d]}}}});c.fn.extend({data:function(a,b){if(typeof a==="undefined"&&this.length)return c.data(this[0]);else if(typeof a==="object")return this.each(function(){c.data(this,a)});var d=a.split(".");d[1]=d[1]?"."+d[1]:"";if(b===w){var f=this.triggerHandler("getData"+d[1]+"!",[d[0]]);if(f===w&&this.length)f=c.data(this[0],a);return f===w&&d[1]?this.data(d[0]):f}else return this.trigger("setData"+d[1]+"!",[d[0],b]).each(function(){c.data(this, +a,b)})},removeData:function(a){return this.each(function(){c.removeData(this,a)})}});c.extend({queue:function(a,b,d){if(a){b=(b||"fx")+"queue";var f=c.data(a,b);if(!d)return f||[];if(!f||c.isArray(d))f=c.data(a,b,c.makeArray(d));else f.push(d);return f}},dequeue:function(a,b){b=b||"fx";var d=c.queue(a,b),f=d.shift();if(f==="inprogress")f=d.shift();if(f){b==="fx"&&d.unshift("inprogress");f.call(a,function(){c.dequeue(a,b)})}}});c.fn.extend({queue:function(a,b){if(typeof a!=="string"){b=a;a="fx"}if(b=== +w)return c.queue(this[0],a);return this.each(function(){var d=c.queue(this,a,b);a==="fx"&&d[0]!=="inprogress"&&c.dequeue(this,a)})},dequeue:function(a){return this.each(function(){c.dequeue(this,a)})},delay:function(a,b){a=c.fx?c.fx.speeds[a]||a:a;b=b||"fx";return this.queue(b,function(){var d=this;setTimeout(function(){c.dequeue(d,b)},a)})},clearQueue:function(a){return this.queue(a||"fx",[])}});var Aa=/[\n\t]/g,ca=/\s+/,Za=/\r/g,$a=/href|src|style/,ab=/(button|input)/i,bb=/(button|input|object|select|textarea)/i, +cb=/^(a|area)$/i,Ba=/radio|checkbox/;c.fn.extend({attr:function(a,b){return X(this,a,b,true,c.attr)},removeAttr:function(a){return this.each(function(){c.attr(this,a,"");this.nodeType===1&&this.removeAttribute(a)})},addClass:function(a){if(c.isFunction(a))return this.each(function(n){var r=c(this);r.addClass(a.call(this,n,r.attr("class")))});if(a&&typeof a==="string")for(var b=(a||"").split(ca),d=0,f=this.length;d-1)return true;return false},val:function(a){if(a===w){var b=this[0];if(b){if(c.nodeName(b,"option"))return(b.attributes.value||{}).specified?b.value:b.text;if(c.nodeName(b,"select")){var d=b.selectedIndex,f=[],e=b.options;b=b.type==="select-one";if(d<0)return null;var j=b?d:0;for(d=b?d+1:e.length;j=0;else if(c.nodeName(this,"select")){var u=c.makeArray(r);c("option",this).each(function(){this.selected= +c.inArray(c(this).val(),u)>=0});if(!u.length)this.selectedIndex=-1}else this.value=r}})}});c.extend({attrFn:{val:true,css:true,html:true,text:true,data:true,width:true,height:true,offset:true},attr:function(a,b,d,f){if(!a||a.nodeType===3||a.nodeType===8)return w;if(f&&b in c.attrFn)return c(a)[b](d);f=a.nodeType!==1||!c.isXMLDoc(a);var e=d!==w;b=f&&c.props[b]||b;if(a.nodeType===1){var j=$a.test(b);if(b in a&&f&&!j){if(e){b==="type"&&ab.test(a.nodeName)&&a.parentNode&&c.error("type property can't be changed"); +a[b]=d}if(c.nodeName(a,"form")&&a.getAttributeNode(b))return a.getAttributeNode(b).nodeValue;if(b==="tabIndex")return(b=a.getAttributeNode("tabIndex"))&&b.specified?b.value:bb.test(a.nodeName)||cb.test(a.nodeName)&&a.href?0:w;return a[b]}if(!c.support.style&&f&&b==="style"){if(e)a.style.cssText=""+d;return a.style.cssText}e&&a.setAttribute(b,""+d);a=!c.support.hrefNormalized&&f&&j?a.getAttribute(b,2):a.getAttribute(b);return a===null?w:a}return c.style(a,b,d)}});var O=/\.(.*)$/,db=function(a){return a.replace(/[^\w\s\.\|`]/g, +function(b){return"\\"+b})};c.event={add:function(a,b,d,f){if(!(a.nodeType===3||a.nodeType===8)){if(a.setInterval&&a!==A&&!a.frameElement)a=A;var e,j;if(d.handler){e=d;d=e.handler}if(!d.guid)d.guid=c.guid++;if(j=c.data(a)){var i=j.events=j.events||{},o=j.handle;if(!o)j.handle=o=function(){return typeof c!=="undefined"&&!c.event.triggered?c.event.handle.apply(o.elem,arguments):w};o.elem=a;b=b.split(" ");for(var k,n=0,r;k=b[n++];){j=e?c.extend({},e):{handler:d,data:f};if(k.indexOf(".")>-1){r=k.split("."); +k=r.shift();j.namespace=r.slice(0).sort().join(".")}else{r=[];j.namespace=""}j.type=k;j.guid=d.guid;var u=i[k],z=c.event.special[k]||{};if(!u){u=i[k]=[];if(!z.setup||z.setup.call(a,f,r,o)===false)if(a.addEventListener)a.addEventListener(k,o,false);else a.attachEvent&&a.attachEvent("on"+k,o)}if(z.add){z.add.call(a,j);if(!j.handler.guid)j.handler.guid=d.guid}u.push(j);c.event.global[k]=true}a=null}}},global:{},remove:function(a,b,d,f){if(!(a.nodeType===3||a.nodeType===8)){var e,j=0,i,o,k,n,r,u,z=c.data(a), +C=z&&z.events;if(z&&C){if(b&&b.type){d=b.handler;b=b.type}if(!b||typeof b==="string"&&b.charAt(0)==="."){b=b||"";for(e in C)c.event.remove(a,e+b)}else{for(b=b.split(" ");e=b[j++];){n=e;i=e.indexOf(".")<0;o=[];if(!i){o=e.split(".");e=o.shift();k=new RegExp("(^|\\.)"+c.map(o.slice(0).sort(),db).join("\\.(?:.*\\.)?")+"(\\.|$)")}if(r=C[e])if(d){n=c.event.special[e]||{};for(B=f||0;B=0){a.type= +e=e.slice(0,-1);a.exclusive=true}if(!d){a.stopPropagation();c.event.global[e]&&c.each(c.cache,function(){this.events&&this.events[e]&&c.event.trigger(a,b,this.handle.elem)})}if(!d||d.nodeType===3||d.nodeType===8)return w;a.result=w;a.target=d;b=c.makeArray(b);b.unshift(a)}a.currentTarget=d;(f=c.data(d,"handle"))&&f.apply(d,b);f=d.parentNode||d.ownerDocument;try{if(!(d&&d.nodeName&&c.noData[d.nodeName.toLowerCase()]))if(d["on"+e]&&d["on"+e].apply(d,b)===false)a.result=false}catch(j){}if(!a.isPropagationStopped()&& +f)c.event.trigger(a,b,f,true);else if(!a.isDefaultPrevented()){f=a.target;var i,o=c.nodeName(f,"a")&&e==="click",k=c.event.special[e]||{};if((!k._default||k._default.call(d,a)===false)&&!o&&!(f&&f.nodeName&&c.noData[f.nodeName.toLowerCase()])){try{if(f[e]){if(i=f["on"+e])f["on"+e]=null;c.event.triggered=true;f[e]()}}catch(n){}if(i)f["on"+e]=i;c.event.triggered=false}}},handle:function(a){var b,d,f,e;a=arguments[0]=c.event.fix(a||A.event);a.currentTarget=this;b=a.type.indexOf(".")<0&&!a.exclusive; +if(!b){d=a.type.split(".");a.type=d.shift();f=new RegExp("(^|\\.)"+d.slice(0).sort().join("\\.(?:.*\\.)?")+"(\\.|$)")}e=c.data(this,"events");d=e[a.type];if(e&&d){d=d.slice(0);e=0;for(var j=d.length;e-1?c.map(a.options,function(f){return f.selected}).join("-"):"";else if(a.nodeName.toLowerCase()==="select")d=a.selectedIndex;return d},fa=function(a,b){var d=a.target,f,e;if(!(!da.test(d.nodeName)||d.readOnly)){f=c.data(d,"_change_data");e=Fa(d);if(a.type!=="focusout"||d.type!=="radio")c.data(d,"_change_data", +e);if(!(f===w||e===f))if(f!=null||e){a.type="change";return c.event.trigger(a,b,d)}}};c.event.special.change={filters:{focusout:fa,click:function(a){var b=a.target,d=b.type;if(d==="radio"||d==="checkbox"||b.nodeName.toLowerCase()==="select")return fa.call(this,a)},keydown:function(a){var b=a.target,d=b.type;if(a.keyCode===13&&b.nodeName.toLowerCase()!=="textarea"||a.keyCode===32&&(d==="checkbox"||d==="radio")||d==="select-multiple")return fa.call(this,a)},beforeactivate:function(a){a=a.target;c.data(a, +"_change_data",Fa(a))}},setup:function(){if(this.type==="file")return false;for(var a in ea)c.event.add(this,a+".specialChange",ea[a]);return da.test(this.nodeName)},teardown:function(){c.event.remove(this,".specialChange");return da.test(this.nodeName)}};ea=c.event.special.change.filters}s.addEventListener&&c.each({focus:"focusin",blur:"focusout"},function(a,b){function d(f){f=c.event.fix(f);f.type=b;return c.event.handle.call(this,f)}c.event.special[b]={setup:function(){this.addEventListener(a, +d,true)},teardown:function(){this.removeEventListener(a,d,true)}}});c.each(["bind","one"],function(a,b){c.fn[b]=function(d,f,e){if(typeof d==="object"){for(var j in d)this[b](j,f,d[j],e);return this}if(c.isFunction(f)){e=f;f=w}var i=b==="one"?c.proxy(e,function(k){c(this).unbind(k,i);return e.apply(this,arguments)}):e;if(d==="unload"&&b!=="one")this.one(d,f,e);else{j=0;for(var o=this.length;j0){y=t;break}}t=t[g]}m[q]=y}}}var f=/((?:\((?:\([^()]+\)|[^()]+)+\)|\[(?:\[[^[\]]*\]|['"][^'"]*['"]|[^[\]'"]+)+\]|\\.|[^ >+~,(\[\\]+)+|[>+~])(\s*,\s*)?((?:.|\r|\n)*)/g, +e=0,j=Object.prototype.toString,i=false,o=true;[0,0].sort(function(){o=false;return 0});var k=function(g,h,l,m){l=l||[];var q=h=h||s;if(h.nodeType!==1&&h.nodeType!==9)return[];if(!g||typeof g!=="string")return l;for(var p=[],v,t,y,S,H=true,M=x(h),I=g;(f.exec(""),v=f.exec(I))!==null;){I=v[3];p.push(v[1]);if(v[2]){S=v[3];break}}if(p.length>1&&r.exec(g))if(p.length===2&&n.relative[p[0]])t=ga(p[0]+p[1],h);else for(t=n.relative[p[0]]?[h]:k(p.shift(),h);p.length;){g=p.shift();if(n.relative[g])g+=p.shift(); +t=ga(g,t)}else{if(!m&&p.length>1&&h.nodeType===9&&!M&&n.match.ID.test(p[0])&&!n.match.ID.test(p[p.length-1])){v=k.find(p.shift(),h,M);h=v.expr?k.filter(v.expr,v.set)[0]:v.set[0]}if(h){v=m?{expr:p.pop(),set:z(m)}:k.find(p.pop(),p.length===1&&(p[0]==="~"||p[0]==="+")&&h.parentNode?h.parentNode:h,M);t=v.expr?k.filter(v.expr,v.set):v.set;if(p.length>0)y=z(t);else H=false;for(;p.length;){var D=p.pop();v=D;if(n.relative[D])v=p.pop();else D="";if(v==null)v=h;n.relative[D](y,v,M)}}else y=[]}y||(y=t);y||k.error(D|| +g);if(j.call(y)==="[object Array]")if(H)if(h&&h.nodeType===1)for(g=0;y[g]!=null;g++){if(y[g]&&(y[g]===true||y[g].nodeType===1&&E(h,y[g])))l.push(t[g])}else for(g=0;y[g]!=null;g++)y[g]&&y[g].nodeType===1&&l.push(t[g]);else l.push.apply(l,y);else z(y,l);if(S){k(S,q,l,m);k.uniqueSort(l)}return l};k.uniqueSort=function(g){if(B){i=o;g.sort(B);if(i)for(var h=1;h":function(g,h){var l=typeof h==="string";if(l&&!/\W/.test(h)){h=h.toLowerCase();for(var m=0,q=g.length;m=0))l||m.push(v);else if(l)h[p]=false;return false},ID:function(g){return g[1].replace(/\\/g,"")},TAG:function(g){return g[1].toLowerCase()}, +CHILD:function(g){if(g[1]==="nth"){var h=/(-?)(\d*)n((?:\+|-)?\d*)/.exec(g[2]==="even"&&"2n"||g[2]==="odd"&&"2n+1"||!/\D/.test(g[2])&&"0n+"+g[2]||g[2]);g[2]=h[1]+(h[2]||1)-0;g[3]=h[3]-0}g[0]=e++;return g},ATTR:function(g,h,l,m,q,p){h=g[1].replace(/\\/g,"");if(!p&&n.attrMap[h])g[1]=n.attrMap[h];if(g[2]==="~=")g[4]=" "+g[4]+" ";return g},PSEUDO:function(g,h,l,m,q){if(g[1]==="not")if((f.exec(g[3])||"").length>1||/^\w/.test(g[3]))g[3]=k(g[3],null,null,h);else{g=k.filter(g[3],h,l,true^q);l||m.push.apply(m, +g);return false}else if(n.match.POS.test(g[0])||n.match.CHILD.test(g[0]))return true;return g},POS:function(g){g.unshift(true);return g}},filters:{enabled:function(g){return g.disabled===false&&g.type!=="hidden"},disabled:function(g){return g.disabled===true},checked:function(g){return g.checked===true},selected:function(g){return g.selected===true},parent:function(g){return!!g.firstChild},empty:function(g){return!g.firstChild},has:function(g,h,l){return!!k(l[3],g).length},header:function(g){return/h\d/i.test(g.nodeName)}, +text:function(g){return"text"===g.type},radio:function(g){return"radio"===g.type},checkbox:function(g){return"checkbox"===g.type},file:function(g){return"file"===g.type},password:function(g){return"password"===g.type},submit:function(g){return"submit"===g.type},image:function(g){return"image"===g.type},reset:function(g){return"reset"===g.type},button:function(g){return"button"===g.type||g.nodeName.toLowerCase()==="button"},input:function(g){return/input|select|textarea|button/i.test(g.nodeName)}}, +setFilters:{first:function(g,h){return h===0},last:function(g,h,l,m){return h===m.length-1},even:function(g,h){return h%2===0},odd:function(g,h){return h%2===1},lt:function(g,h,l){return hl[3]-0},nth:function(g,h,l){return l[3]-0===h},eq:function(g,h,l){return l[3]-0===h}},filter:{PSEUDO:function(g,h,l,m){var q=h[1],p=n.filters[q];if(p)return p(g,l,h,m);else if(q==="contains")return(g.textContent||g.innerText||a([g])||"").indexOf(h[3])>=0;else if(q==="not"){h= +h[3];l=0;for(m=h.length;l=0}},ID:function(g,h){return g.nodeType===1&&g.getAttribute("id")===h},TAG:function(g,h){return h==="*"&&g.nodeType===1||g.nodeName.toLowerCase()===h},CLASS:function(g,h){return(" "+(g.className||g.getAttribute("class"))+" ").indexOf(h)>-1},ATTR:function(g,h){var l=h[1];g=n.attrHandle[l]?n.attrHandle[l](g):g[l]!=null?g[l]:g.getAttribute(l);l=g+"";var m=h[2];h=h[4];return g==null?m==="!=":m=== +"="?l===h:m==="*="?l.indexOf(h)>=0:m==="~="?(" "+l+" ").indexOf(h)>=0:!h?l&&g!==false:m==="!="?l!==h:m==="^="?l.indexOf(h)===0:m==="$="?l.substr(l.length-h.length)===h:m==="|="?l===h||l.substr(0,h.length+1)===h+"-":false},POS:function(g,h,l,m){var q=n.setFilters[h[2]];if(q)return q(g,l,h,m)}}},r=n.match.POS;for(var u in n.match){n.match[u]=new RegExp(n.match[u].source+/(?![^\[]*\])(?![^\(]*\))/.source);n.leftMatch[u]=new RegExp(/(^(?:.|\r|\n)*?)/.source+n.match[u].source.replace(/\\(\d+)/g,function(g, +h){return"\\"+(h-0+1)}))}var z=function(g,h){g=Array.prototype.slice.call(g,0);if(h){h.push.apply(h,g);return h}return g};try{Array.prototype.slice.call(s.documentElement.childNodes,0)}catch(C){z=function(g,h){h=h||[];if(j.call(g)==="[object Array]")Array.prototype.push.apply(h,g);else if(typeof g.length==="number")for(var l=0,m=g.length;l";var l=s.documentElement;l.insertBefore(g,l.firstChild);if(s.getElementById(h)){n.find.ID=function(m,q,p){if(typeof q.getElementById!=="undefined"&&!p)return(q=q.getElementById(m[1]))?q.id===m[1]||typeof q.getAttributeNode!=="undefined"&& +q.getAttributeNode("id").nodeValue===m[1]?[q]:w:[]};n.filter.ID=function(m,q){var p=typeof m.getAttributeNode!=="undefined"&&m.getAttributeNode("id");return m.nodeType===1&&p&&p.nodeValue===q}}l.removeChild(g);l=g=null})();(function(){var g=s.createElement("div");g.appendChild(s.createComment(""));if(g.getElementsByTagName("*").length>0)n.find.TAG=function(h,l){l=l.getElementsByTagName(h[1]);if(h[1]==="*"){h=[];for(var m=0;l[m];m++)l[m].nodeType===1&&h.push(l[m]);l=h}return l};g.innerHTML=""; +if(g.firstChild&&typeof g.firstChild.getAttribute!=="undefined"&&g.firstChild.getAttribute("href")!=="#")n.attrHandle.href=function(h){return h.getAttribute("href",2)};g=null})();s.querySelectorAll&&function(){var g=k,h=s.createElement("div");h.innerHTML="

";if(!(h.querySelectorAll&&h.querySelectorAll(".TEST").length===0)){k=function(m,q,p,v){q=q||s;if(!v&&q.nodeType===9&&!x(q))try{return z(q.querySelectorAll(m),p)}catch(t){}return g(m,q,p,v)};for(var l in g)k[l]=g[l];h=null}}(); +(function(){var g=s.createElement("div");g.innerHTML="
";if(!(!g.getElementsByClassName||g.getElementsByClassName("e").length===0)){g.lastChild.className="e";if(g.getElementsByClassName("e").length!==1){n.order.splice(1,0,"CLASS");n.find.CLASS=function(h,l,m){if(typeof l.getElementsByClassName!=="undefined"&&!m)return l.getElementsByClassName(h[1])};g=null}}})();var E=s.compareDocumentPosition?function(g,h){return!!(g.compareDocumentPosition(h)&16)}: +function(g,h){return g!==h&&(g.contains?g.contains(h):true)},x=function(g){return(g=(g?g.ownerDocument||g:0).documentElement)?g.nodeName!=="HTML":false},ga=function(g,h){var l=[],m="",q;for(h=h.nodeType?[h]:h;q=n.match.PSEUDO.exec(g);){m+=q[0];g=g.replace(n.match.PSEUDO,"")}g=n.relative[g]?g+"*":g;q=0;for(var p=h.length;q=0===d})};c.fn.extend({find:function(a){for(var b=this.pushStack("","find",a),d=0,f=0,e=this.length;f0)for(var j=d;j0},closest:function(a,b){if(c.isArray(a)){var d=[],f=this[0],e,j= +{},i;if(f&&a.length){e=0;for(var o=a.length;e-1:c(f).is(e)){d.push({selector:i,elem:f});delete j[i]}}f=f.parentNode}}return d}var k=c.expr.match.POS.test(a)?c(a,b||this.context):null;return this.map(function(n,r){for(;r&&r.ownerDocument&&r!==b;){if(k?k.index(r)>-1:c(r).is(a))return r;r=r.parentNode}return null})},index:function(a){if(!a||typeof a=== +"string")return c.inArray(this[0],a?c(a):this.parent().children());return c.inArray(a.jquery?a[0]:a,this)},add:function(a,b){a=typeof a==="string"?c(a,b||this.context):c.makeArray(a);b=c.merge(this.get(),a);return this.pushStack(qa(a[0])||qa(b[0])?b:c.unique(b))},andSelf:function(){return this.add(this.prevObject)}});c.each({parent:function(a){return(a=a.parentNode)&&a.nodeType!==11?a:null},parents:function(a){return c.dir(a,"parentNode")},parentsUntil:function(a,b,d){return c.dir(a,"parentNode", +d)},next:function(a){return c.nth(a,2,"nextSibling")},prev:function(a){return c.nth(a,2,"previousSibling")},nextAll:function(a){return c.dir(a,"nextSibling")},prevAll:function(a){return c.dir(a,"previousSibling")},nextUntil:function(a,b,d){return c.dir(a,"nextSibling",d)},prevUntil:function(a,b,d){return c.dir(a,"previousSibling",d)},siblings:function(a){return c.sibling(a.parentNode.firstChild,a)},children:function(a){return c.sibling(a.firstChild)},contents:function(a){return c.nodeName(a,"iframe")? +a.contentDocument||a.contentWindow.document:c.makeArray(a.childNodes)}},function(a,b){c.fn[a]=function(d,f){var e=c.map(this,b,d);eb.test(a)||(f=d);if(f&&typeof f==="string")e=c.filter(f,e);e=this.length>1?c.unique(e):e;if((this.length>1||gb.test(f))&&fb.test(a))e=e.reverse();return this.pushStack(e,a,R.call(arguments).join(","))}});c.extend({filter:function(a,b,d){if(d)a=":not("+a+")";return c.find.matches(a,b)},dir:function(a,b,d){var f=[];for(a=a[b];a&&a.nodeType!==9&&(d===w||a.nodeType!==1||!c(a).is(d));){a.nodeType=== +1&&f.push(a);a=a[b]}return f},nth:function(a,b,d){b=b||1;for(var f=0;a;a=a[d])if(a.nodeType===1&&++f===b)break;return a},sibling:function(a,b){for(var d=[];a;a=a.nextSibling)a.nodeType===1&&a!==b&&d.push(a);return d}});var Ja=/ jQuery\d+="(?:\d+|null)"/g,V=/^\s+/,Ka=/(<([\w:]+)[^>]*?)\/>/g,hb=/^(?:area|br|col|embed|hr|img|input|link|meta|param)$/i,La=/<([\w:]+)/,ib=/"},F={option:[1,""],legend:[1,"
","
"],thead:[1,"","
"],tr:[2,"","
"],td:[3,"","
"],col:[2,"","
"],area:[1,"",""],_default:[0,"",""]};F.optgroup=F.option;F.tbody=F.tfoot=F.colgroup=F.caption=F.thead;F.th=F.td;if(!c.support.htmlSerialize)F._default=[1,"div
","
"];c.fn.extend({text:function(a){if(c.isFunction(a))return this.each(function(b){var d= +c(this);d.text(a.call(this,b,d.text()))});if(typeof a!=="object"&&a!==w)return this.empty().append((this[0]&&this[0].ownerDocument||s).createTextNode(a));return c.text(this)},wrapAll:function(a){if(c.isFunction(a))return this.each(function(d){c(this).wrapAll(a.call(this,d))});if(this[0]){var b=c(a,this[0].ownerDocument).eq(0).clone(true);this[0].parentNode&&b.insertBefore(this[0]);b.map(function(){for(var d=this;d.firstChild&&d.firstChild.nodeType===1;)d=d.firstChild;return d}).append(this)}return this}, +wrapInner:function(a){if(c.isFunction(a))return this.each(function(b){c(this).wrapInner(a.call(this,b))});return this.each(function(){var b=c(this),d=b.contents();d.length?d.wrapAll(a):b.append(a)})},wrap:function(a){return this.each(function(){c(this).wrapAll(a)})},unwrap:function(){return this.parent().each(function(){c.nodeName(this,"body")||c(this).replaceWith(this.childNodes)}).end()},append:function(){return this.domManip(arguments,true,function(a){this.nodeType===1&&this.appendChild(a)})}, +prepend:function(){return this.domManip(arguments,true,function(a){this.nodeType===1&&this.insertBefore(a,this.firstChild)})},before:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,false,function(b){this.parentNode.insertBefore(b,this)});else if(arguments.length){var a=c(arguments[0]);a.push.apply(a,this.toArray());return this.pushStack(a,"before",arguments)}},after:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,false,function(b){this.parentNode.insertBefore(b, +this.nextSibling)});else if(arguments.length){var a=this.pushStack(this,"after",arguments);a.push.apply(a,c(arguments[0]).toArray());return a}},remove:function(a,b){for(var d=0,f;(f=this[d])!=null;d++)if(!a||c.filter(a,[f]).length){if(!b&&f.nodeType===1){c.cleanData(f.getElementsByTagName("*"));c.cleanData([f])}f.parentNode&&f.parentNode.removeChild(f)}return this},empty:function(){for(var a=0,b;(b=this[a])!=null;a++)for(b.nodeType===1&&c.cleanData(b.getElementsByTagName("*"));b.firstChild;)b.removeChild(b.firstChild); +return this},clone:function(a){var b=this.map(function(){if(!c.support.noCloneEvent&&!c.isXMLDoc(this)){var d=this.outerHTML,f=this.ownerDocument;if(!d){d=f.createElement("div");d.appendChild(this.cloneNode(true));d=d.innerHTML}return c.clean([d.replace(Ja,"").replace(/=([^="'>\s]+\/)>/g,'="$1">').replace(V,"")],f)[0]}else return this.cloneNode(true)});if(a===true){ra(this,b);ra(this.find("*"),b.find("*"))}return b},html:function(a){if(a===w)return this[0]&&this[0].nodeType===1?this[0].innerHTML.replace(Ja, +""):null;else if(typeof a==="string"&&!ta.test(a)&&(c.support.leadingWhitespace||!V.test(a))&&!F[(La.exec(a)||["",""])[1].toLowerCase()]){a=a.replace(Ka,Ma);try{for(var b=0,d=this.length;b0||e.cacheable||this.length>1?k.cloneNode(true):k)}o.length&&c.each(o,Qa)}return this}});c.fragments={};c.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){c.fn[a]=function(d){var f=[];d=c(d);var e=this.length===1&&this[0].parentNode;if(e&&e.nodeType===11&&e.childNodes.length===1&&d.length===1){d[b](this[0]); +return this}else{e=0;for(var j=d.length;e0?this.clone(true):this).get();c.fn[b].apply(c(d[e]),i);f=f.concat(i)}return this.pushStack(f,a,d.selector)}}});c.extend({clean:function(a,b,d,f){b=b||s;if(typeof b.createElement==="undefined")b=b.ownerDocument||b[0]&&b[0].ownerDocument||s;for(var e=[],j=0,i;(i=a[j])!=null;j++){if(typeof i==="number")i+="";if(i){if(typeof i==="string"&&!jb.test(i))i=b.createTextNode(i);else if(typeof i==="string"){i=i.replace(Ka,Ma);var o=(La.exec(i)||["", +""])[1].toLowerCase(),k=F[o]||F._default,n=k[0],r=b.createElement("div");for(r.innerHTML=k[1]+i+k[2];n--;)r=r.lastChild;if(!c.support.tbody){n=ib.test(i);o=o==="table"&&!n?r.firstChild&&r.firstChild.childNodes:k[1]===""&&!n?r.childNodes:[];for(k=o.length-1;k>=0;--k)c.nodeName(o[k],"tbody")&&!o[k].childNodes.length&&o[k].parentNode.removeChild(o[k])}!c.support.leadingWhitespace&&V.test(i)&&r.insertBefore(b.createTextNode(V.exec(i)[0]),r.firstChild);i=r.childNodes}if(i.nodeType)e.push(i);else e= +c.merge(e,i)}}if(d)for(j=0;e[j];j++)if(f&&c.nodeName(e[j],"script")&&(!e[j].type||e[j].type.toLowerCase()==="text/javascript"))f.push(e[j].parentNode?e[j].parentNode.removeChild(e[j]):e[j]);else{e[j].nodeType===1&&e.splice.apply(e,[j+1,0].concat(c.makeArray(e[j].getElementsByTagName("script"))));d.appendChild(e[j])}return e},cleanData:function(a){for(var b,d,f=c.cache,e=c.event.special,j=c.support.deleteExpando,i=0,o;(o=a[i])!=null;i++)if(d=o[c.expando]){b=f[d];if(b.events)for(var k in b.events)e[k]? +c.event.remove(o,k):Ca(o,k,b.handle);if(j)delete o[c.expando];else o.removeAttribute&&o.removeAttribute(c.expando);delete f[d]}}});var kb=/z-?index|font-?weight|opacity|zoom|line-?height/i,Na=/alpha\([^)]*\)/,Oa=/opacity=([^)]*)/,ha=/float/i,ia=/-([a-z])/ig,lb=/([A-Z])/g,mb=/^-?\d+(?:px)?$/i,nb=/^-?\d/,ob={position:"absolute",visibility:"hidden",display:"block"},pb=["Left","Right"],qb=["Top","Bottom"],rb=s.defaultView&&s.defaultView.getComputedStyle,Pa=c.support.cssFloat?"cssFloat":"styleFloat",ja= +function(a,b){return b.toUpperCase()};c.fn.css=function(a,b){return X(this,a,b,true,function(d,f,e){if(e===w)return c.curCSS(d,f);if(typeof e==="number"&&!kb.test(f))e+="px";c.style(d,f,e)})};c.extend({style:function(a,b,d){if(!a||a.nodeType===3||a.nodeType===8)return w;if((b==="width"||b==="height")&&parseFloat(d)<0)d=w;var f=a.style||a,e=d!==w;if(!c.support.opacity&&b==="opacity"){if(e){f.zoom=1;b=parseInt(d,10)+""==="NaN"?"":"alpha(opacity="+d*100+")";a=f.filter||c.curCSS(a,"filter")||"";f.filter= +Na.test(a)?a.replace(Na,b):b}return f.filter&&f.filter.indexOf("opacity=")>=0?parseFloat(Oa.exec(f.filter)[1])/100+"":""}if(ha.test(b))b=Pa;b=b.replace(ia,ja);if(e)f[b]=d;return f[b]},css:function(a,b,d,f){if(b==="width"||b==="height"){var e,j=b==="width"?pb:qb;function i(){e=b==="width"?a.offsetWidth:a.offsetHeight;f!=="border"&&c.each(j,function(){f||(e-=parseFloat(c.curCSS(a,"padding"+this,true))||0);if(f==="margin")e+=parseFloat(c.curCSS(a,"margin"+this,true))||0;else e-=parseFloat(c.curCSS(a, +"border"+this+"Width",true))||0})}a.offsetWidth!==0?i():c.swap(a,ob,i);return Math.max(0,Math.round(e))}return c.curCSS(a,b,d)},curCSS:function(a,b,d){var f,e=a.style;if(!c.support.opacity&&b==="opacity"&&a.currentStyle){f=Oa.test(a.currentStyle.filter||"")?parseFloat(RegExp.$1)/100+"":"";return f===""?"1":f}if(ha.test(b))b=Pa;if(!d&&e&&e[b])f=e[b];else if(rb){if(ha.test(b))b="float";b=b.replace(lb,"-$1").toLowerCase();e=a.ownerDocument.defaultView;if(!e)return null;if(a=e.getComputedStyle(a,null))f= +a.getPropertyValue(b);if(b==="opacity"&&f==="")f="1"}else if(a.currentStyle){d=b.replace(ia,ja);f=a.currentStyle[b]||a.currentStyle[d];if(!mb.test(f)&&nb.test(f)){b=e.left;var j=a.runtimeStyle.left;a.runtimeStyle.left=a.currentStyle.left;e.left=d==="fontSize"?"1em":f||0;f=e.pixelLeft+"px";e.left=b;a.runtimeStyle.left=j}}return f},swap:function(a,b,d){var f={};for(var e in b){f[e]=a.style[e];a.style[e]=b[e]}d.call(a);for(e in b)a.style[e]=f[e]}});if(c.expr&&c.expr.filters){c.expr.filters.hidden=function(a){var b= +a.offsetWidth,d=a.offsetHeight,f=a.nodeName.toLowerCase()==="tr";return b===0&&d===0&&!f?true:b>0&&d>0&&!f?false:c.curCSS(a,"display")==="none"};c.expr.filters.visible=function(a){return!c.expr.filters.hidden(a)}}var sb=J(),tb=//gi,ub=/select|textarea/i,vb=/color|date|datetime|email|hidden|month|number|password|range|search|tel|text|time|url|week/i,N=/=\?(&|$)/,ka=/\?/,wb=/(\?|&)_=.*?(&|$)/,xb=/^(\w+:)?\/\/([^\/?#]+)/,yb=/%20/g,zb=c.fn.load;c.fn.extend({load:function(a,b,d){if(typeof a!== +"string")return zb.call(this,a);else if(!this.length)return this;var f=a.indexOf(" ");if(f>=0){var e=a.slice(f,a.length);a=a.slice(0,f)}f="GET";if(b)if(c.isFunction(b)){d=b;b=null}else if(typeof b==="object"){b=c.param(b,c.ajaxSettings.traditional);f="POST"}var j=this;c.ajax({url:a,type:f,dataType:"html",data:b,complete:function(i,o){if(o==="success"||o==="notmodified")j.html(e?c("
").append(i.responseText.replace(tb,"")).find(e):i.responseText);d&&j.each(d,[i.responseText,o,i])}});return this}, +serialize:function(){return c.param(this.serializeArray())},serializeArray:function(){return this.map(function(){return this.elements?c.makeArray(this.elements):this}).filter(function(){return this.name&&!this.disabled&&(this.checked||ub.test(this.nodeName)||vb.test(this.type))}).map(function(a,b){a=c(this).val();return a==null?null:c.isArray(a)?c.map(a,function(d){return{name:b.name,value:d}}):{name:b.name,value:a}}).get()}});c.each("ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split(" "), +function(a,b){c.fn[b]=function(d){return this.bind(b,d)}});c.extend({get:function(a,b,d,f){if(c.isFunction(b)){f=f||d;d=b;b=null}return c.ajax({type:"GET",url:a,data:b,success:d,dataType:f})},getScript:function(a,b){return c.get(a,null,b,"script")},getJSON:function(a,b,d){return c.get(a,b,d,"json")},post:function(a,b,d,f){if(c.isFunction(b)){f=f||d;d=b;b={}}return c.ajax({type:"POST",url:a,data:b,success:d,dataType:f})},ajaxSetup:function(a){c.extend(c.ajaxSettings,a)},ajaxSettings:{url:location.href, +global:true,type:"GET",contentType:"application/x-www-form-urlencoded",processData:true,async:true,xhr:A.XMLHttpRequest&&(A.location.protocol!=="file:"||!A.ActiveXObject)?function(){return new A.XMLHttpRequest}:function(){try{return new A.ActiveXObject("Microsoft.XMLHTTP")}catch(a){}},accepts:{xml:"application/xml, text/xml",html:"text/html",script:"text/javascript, application/javascript",json:"application/json, text/javascript",text:"text/plain",_default:"*/*"}},lastModified:{},etag:{},ajax:function(a){function b(){e.success&& +e.success.call(k,o,i,x);e.global&&f("ajaxSuccess",[x,e])}function d(){e.complete&&e.complete.call(k,x,i);e.global&&f("ajaxComplete",[x,e]);e.global&&!--c.active&&c.event.trigger("ajaxStop")}function f(q,p){(e.context?c(e.context):c.event).trigger(q,p)}var e=c.extend(true,{},c.ajaxSettings,a),j,i,o,k=a&&a.context||e,n=e.type.toUpperCase();if(e.data&&e.processData&&typeof e.data!=="string")e.data=c.param(e.data,e.traditional);if(e.dataType==="jsonp"){if(n==="GET")N.test(e.url)||(e.url+=(ka.test(e.url)? +"&":"?")+(e.jsonp||"callback")+"=?");else if(!e.data||!N.test(e.data))e.data=(e.data?e.data+"&":"")+(e.jsonp||"callback")+"=?";e.dataType="json"}if(e.dataType==="json"&&(e.data&&N.test(e.data)||N.test(e.url))){j=e.jsonpCallback||"jsonp"+sb++;if(e.data)e.data=(e.data+"").replace(N,"="+j+"$1");e.url=e.url.replace(N,"="+j+"$1");e.dataType="script";A[j]=A[j]||function(q){o=q;b();d();A[j]=w;try{delete A[j]}catch(p){}z&&z.removeChild(C)}}if(e.dataType==="script"&&e.cache===null)e.cache=false;if(e.cache=== +false&&n==="GET"){var r=J(),u=e.url.replace(wb,"$1_="+r+"$2");e.url=u+(u===e.url?(ka.test(e.url)?"&":"?")+"_="+r:"")}if(e.data&&n==="GET")e.url+=(ka.test(e.url)?"&":"?")+e.data;e.global&&!c.active++&&c.event.trigger("ajaxStart");r=(r=xb.exec(e.url))&&(r[1]&&r[1]!==location.protocol||r[2]!==location.host);if(e.dataType==="script"&&n==="GET"&&r){var z=s.getElementsByTagName("head")[0]||s.documentElement,C=s.createElement("script");C.src=e.url;if(e.scriptCharset)C.charset=e.scriptCharset;if(!j){var B= +false;C.onload=C.onreadystatechange=function(){if(!B&&(!this.readyState||this.readyState==="loaded"||this.readyState==="complete")){B=true;b();d();C.onload=C.onreadystatechange=null;z&&C.parentNode&&z.removeChild(C)}}}z.insertBefore(C,z.firstChild);return w}var E=false,x=e.xhr();if(x){e.username?x.open(n,e.url,e.async,e.username,e.password):x.open(n,e.url,e.async);try{if(e.data||a&&a.contentType)x.setRequestHeader("Content-Type",e.contentType);if(e.ifModified){c.lastModified[e.url]&&x.setRequestHeader("If-Modified-Since", +c.lastModified[e.url]);c.etag[e.url]&&x.setRequestHeader("If-None-Match",c.etag[e.url])}r||x.setRequestHeader("X-Requested-With","XMLHttpRequest");x.setRequestHeader("Accept",e.dataType&&e.accepts[e.dataType]?e.accepts[e.dataType]+", */*":e.accepts._default)}catch(ga){}if(e.beforeSend&&e.beforeSend.call(k,x,e)===false){e.global&&!--c.active&&c.event.trigger("ajaxStop");x.abort();return false}e.global&&f("ajaxSend",[x,e]);var g=x.onreadystatechange=function(q){if(!x||x.readyState===0||q==="abort"){E|| +d();E=true;if(x)x.onreadystatechange=c.noop}else if(!E&&x&&(x.readyState===4||q==="timeout")){E=true;x.onreadystatechange=c.noop;i=q==="timeout"?"timeout":!c.httpSuccess(x)?"error":e.ifModified&&c.httpNotModified(x,e.url)?"notmodified":"success";var p;if(i==="success")try{o=c.httpData(x,e.dataType,e)}catch(v){i="parsererror";p=v}if(i==="success"||i==="notmodified")j||b();else c.handleError(e,x,i,p);d();q==="timeout"&&x.abort();if(e.async)x=null}};try{var h=x.abort;x.abort=function(){x&&h.call(x); +g("abort")}}catch(l){}e.async&&e.timeout>0&&setTimeout(function(){x&&!E&&g("timeout")},e.timeout);try{x.send(n==="POST"||n==="PUT"||n==="DELETE"?e.data:null)}catch(m){c.handleError(e,x,null,m);d()}e.async||g();return x}},handleError:function(a,b,d,f){if(a.error)a.error.call(a.context||a,b,d,f);if(a.global)(a.context?c(a.context):c.event).trigger("ajaxError",[b,a,f])},active:0,httpSuccess:function(a){try{return!a.status&&location.protocol==="file:"||a.status>=200&&a.status<300||a.status===304||a.status=== +1223||a.status===0}catch(b){}return false},httpNotModified:function(a,b){var d=a.getResponseHeader("Last-Modified"),f=a.getResponseHeader("Etag");if(d)c.lastModified[b]=d;if(f)c.etag[b]=f;return a.status===304||a.status===0},httpData:function(a,b,d){var f=a.getResponseHeader("content-type")||"",e=b==="xml"||!b&&f.indexOf("xml")>=0;a=e?a.responseXML:a.responseText;e&&a.documentElement.nodeName==="parsererror"&&c.error("parsererror");if(d&&d.dataFilter)a=d.dataFilter(a,b);if(typeof a==="string")if(b=== +"json"||!b&&f.indexOf("json")>=0)a=c.parseJSON(a);else if(b==="script"||!b&&f.indexOf("javascript")>=0)c.globalEval(a);return a},param:function(a,b){function d(i,o){if(c.isArray(o))c.each(o,function(k,n){b||/\[\]$/.test(i)?f(i,n):d(i+"["+(typeof n==="object"||c.isArray(n)?k:"")+"]",n)});else!b&&o!=null&&typeof o==="object"?c.each(o,function(k,n){d(i+"["+k+"]",n)}):f(i,o)}function f(i,o){o=c.isFunction(o)?o():o;e[e.length]=encodeURIComponent(i)+"="+encodeURIComponent(o)}var e=[];if(b===w)b=c.ajaxSettings.traditional; +if(c.isArray(a)||a.jquery)c.each(a,function(){f(this.name,this.value)});else for(var j in a)d(j,a[j]);return e.join("&").replace(yb,"+")}});var la={},Ab=/toggle|show|hide/,Bb=/^([+-]=)?([\d+-.]+)(.*)$/,W,va=[["height","marginTop","marginBottom","paddingTop","paddingBottom"],["width","marginLeft","marginRight","paddingLeft","paddingRight"],["opacity"]];c.fn.extend({show:function(a,b){if(a||a===0)return this.animate(K("show",3),a,b);else{a=0;for(b=this.length;a").appendTo("body");f=e.css("display");if(f==="none")f="block";e.remove();la[d]=f}c.data(this[a],"olddisplay",f)}}a=0;for(b=this.length;a=0;f--)if(d[f].elem===this){b&&d[f](true);d.splice(f,1)}});b||this.dequeue();return this}});c.each({slideDown:K("show",1),slideUp:K("hide",1),slideToggle:K("toggle",1),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"}},function(a,b){c.fn[a]=function(d,f){return this.animate(b,d,f)}});c.extend({speed:function(a,b,d){var f=a&&typeof a==="object"?a:{complete:d||!d&&b||c.isFunction(a)&&a,duration:a,easing:d&&b||b&&!c.isFunction(b)&&b};f.duration=c.fx.off?0:typeof f.duration=== +"number"?f.duration:c.fx.speeds[f.duration]||c.fx.speeds._default;f.old=f.complete;f.complete=function(){f.queue!==false&&c(this).dequeue();c.isFunction(f.old)&&f.old.call(this)};return f},easing:{linear:function(a,b,d,f){return d+f*a},swing:function(a,b,d,f){return(-Math.cos(a*Math.PI)/2+0.5)*f+d}},timers:[],fx:function(a,b,d){this.options=b;this.elem=a;this.prop=d;if(!b.orig)b.orig={}}});c.fx.prototype={update:function(){this.options.step&&this.options.step.call(this.elem,this.now,this);(c.fx.step[this.prop]|| +c.fx.step._default)(this);if((this.prop==="height"||this.prop==="width")&&this.elem.style)this.elem.style.display="block"},cur:function(a){if(this.elem[this.prop]!=null&&(!this.elem.style||this.elem.style[this.prop]==null))return this.elem[this.prop];return(a=parseFloat(c.css(this.elem,this.prop,a)))&&a>-10000?a:parseFloat(c.curCSS(this.elem,this.prop))||0},custom:function(a,b,d){function f(j){return e.step(j)}this.startTime=J();this.start=a;this.end=b;this.unit=d||this.unit||"px";this.now=this.start; +this.pos=this.state=0;var e=this;f.elem=this.elem;if(f()&&c.timers.push(f)&&!W)W=setInterval(c.fx.tick,13)},show:function(){this.options.orig[this.prop]=c.style(this.elem,this.prop);this.options.show=true;this.custom(this.prop==="width"||this.prop==="height"?1:0,this.cur());c(this.elem).show()},hide:function(){this.options.orig[this.prop]=c.style(this.elem,this.prop);this.options.hide=true;this.custom(this.cur(),0)},step:function(a){var b=J(),d=true;if(a||b>=this.options.duration+this.startTime){this.now= +this.end;this.pos=this.state=1;this.update();this.options.curAnim[this.prop]=true;for(var f in this.options.curAnim)if(this.options.curAnim[f]!==true)d=false;if(d){if(this.options.display!=null){this.elem.style.overflow=this.options.overflow;a=c.data(this.elem,"olddisplay");this.elem.style.display=a?a:this.options.display;if(c.css(this.elem,"display")==="none")this.elem.style.display="block"}this.options.hide&&c(this.elem).hide();if(this.options.hide||this.options.show)for(var e in this.options.curAnim)c.style(this.elem, +e,this.options.orig[e]);this.options.complete.call(this.elem)}return false}else{e=b-this.startTime;this.state=e/this.options.duration;a=this.options.easing||(c.easing.swing?"swing":"linear");this.pos=c.easing[this.options.specialEasing&&this.options.specialEasing[this.prop]||a](this.state,e,0,1,this.options.duration);this.now=this.start+(this.end-this.start)*this.pos;this.update()}return true}};c.extend(c.fx,{tick:function(){for(var a=c.timers,b=0;b
"; +a.insertBefore(b,a.firstChild);d=b.firstChild;f=d.firstChild;e=d.nextSibling.firstChild.firstChild;this.doesNotAddBorder=f.offsetTop!==5;this.doesAddBorderForTableAndCells=e.offsetTop===5;f.style.position="fixed";f.style.top="20px";this.supportsFixedPosition=f.offsetTop===20||f.offsetTop===15;f.style.position=f.style.top="";d.style.overflow="hidden";d.style.position="relative";this.subtractsBorderForOverflowNotVisible=f.offsetTop===-5;this.doesNotIncludeMarginInBodyOffset=a.offsetTop!==j;a.removeChild(b); +c.offset.initialize=c.noop},bodyOffset:function(a){var b=a.offsetTop,d=a.offsetLeft;c.offset.initialize();if(c.offset.doesNotIncludeMarginInBodyOffset){b+=parseFloat(c.curCSS(a,"marginTop",true))||0;d+=parseFloat(c.curCSS(a,"marginLeft",true))||0}return{top:b,left:d}},setOffset:function(a,b,d){if(/static/.test(c.curCSS(a,"position")))a.style.position="relative";var f=c(a),e=f.offset(),j=parseInt(c.curCSS(a,"top",true),10)||0,i=parseInt(c.curCSS(a,"left",true),10)||0;if(c.isFunction(b))b=b.call(a, +d,e);d={top:b.top-e.top+j,left:b.left-e.left+i};"using"in b?b.using.call(a,d):f.css(d)}};c.fn.extend({position:function(){if(!this[0])return null;var a=this[0],b=this.offsetParent(),d=this.offset(),f=/^body|html$/i.test(b[0].nodeName)?{top:0,left:0}:b.offset();d.top-=parseFloat(c.curCSS(a,"marginTop",true))||0;d.left-=parseFloat(c.curCSS(a,"marginLeft",true))||0;f.top+=parseFloat(c.curCSS(b[0],"borderTopWidth",true))||0;f.left+=parseFloat(c.curCSS(b[0],"borderLeftWidth",true))||0;return{top:d.top- +f.top,left:d.left-f.left}},offsetParent:function(){return this.map(function(){for(var a=this.offsetParent||s.body;a&&!/^body|html$/i.test(a.nodeName)&&c.css(a,"position")==="static";)a=a.offsetParent;return a})}});c.each(["Left","Top"],function(a,b){var d="scroll"+b;c.fn[d]=function(f){var e=this[0],j;if(!e)return null;if(f!==w)return this.each(function(){if(j=wa(this))j.scrollTo(!a?f:c(j).scrollLeft(),a?f:c(j).scrollTop());else this[d]=f});else return(j=wa(e))?"pageXOffset"in j?j[a?"pageYOffset": +"pageXOffset"]:c.support.boxModel&&j.document.documentElement[d]||j.document.body[d]:e[d]}});c.each(["Height","Width"],function(a,b){var d=b.toLowerCase();c.fn["inner"+b]=function(){return this[0]?c.css(this[0],d,false,"padding"):null};c.fn["outer"+b]=function(f){return this[0]?c.css(this[0],d,false,f?"margin":"border"):null};c.fn[d]=function(f){var e=this[0];if(!e)return f==null?null:this;if(c.isFunction(f))return this.each(function(j){var i=c(this);i[d](f.call(this,j,i[d]()))});return"scrollTo"in +e&&e.document?e.document.compatMode==="CSS1Compat"&&e.document.documentElement["client"+b]||e.document.body["client"+b]:e.nodeType===9?Math.max(e.documentElement["client"+b],e.body["scroll"+b],e.documentElement["scroll"+b],e.body["offset"+b],e.documentElement["offset"+b]):f===w?c.css(e,d):this.css(d,typeof f==="string"?f:f+"px")}});A.jQuery=A.$=c})(window); diff --git a/docs/_build/html/_static/minus.png b/docs/_build/html/_static/minus.png new file mode 100644 index 0000000..da1c562 Binary files /dev/null and b/docs/_build/html/_static/minus.png differ diff --git a/docs/_build/html/_static/plus.png b/docs/_build/html/_static/plus.png new file mode 100644 index 0000000..b3cb374 Binary files /dev/null and b/docs/_build/html/_static/plus.png differ diff --git a/docs/_build/html/_static/pygments.css b/docs/_build/html/_static/pygments.css new file mode 100644 index 0000000..1a14f2a --- /dev/null +++ b/docs/_build/html/_static/pygments.css @@ -0,0 +1,62 @@ +.highlight .hll { background-color: #ffffcc } +.highlight { background: #eeffcc; } +.highlight .c { color: #408090; font-style: italic } /* Comment */ +.highlight .err { border: 1px solid #FF0000 } /* Error */ +.highlight .k { color: #007020; font-weight: bold } /* Keyword */ +.highlight .o { color: #666666 } /* Operator */ +.highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #007020 } /* Comment.Preproc */ +.highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */ +.highlight .gd { color: #A00000 } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .gr { color: #FF0000 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #303030 } /* Generic.Output */ +.highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #0040D0 } /* Generic.Traceback */ +.highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #007020 } /* Keyword.Pseudo */ +.highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #902000 } /* Keyword.Type */ +.highlight .m { color: #208050 } /* Literal.Number */ +.highlight .s { color: #4070a0 } /* Literal.String */ +.highlight .na { color: #4070a0 } /* Name.Attribute */ +.highlight .nb { color: #007020 } /* Name.Builtin */ +.highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */ +.highlight .no { color: #60add5 } /* Name.Constant */ +.highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */ +.highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */ +.highlight .ne { color: #007020 } /* Name.Exception */ +.highlight .nf { color: #06287e } /* Name.Function */ +.highlight .nl { color: #002070; font-weight: bold } /* Name.Label */ +.highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */ +.highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #bb60d5 } /* Name.Variable */ +.highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mf { color: #208050 } /* Literal.Number.Float */ +.highlight .mh { color: #208050 } /* Literal.Number.Hex */ +.highlight .mi { color: #208050 } /* Literal.Number.Integer */ +.highlight .mo { color: #208050 } /* Literal.Number.Oct */ +.highlight .sb { color: #4070a0 } /* Literal.String.Backtick */ +.highlight .sc { color: #4070a0 } /* Literal.String.Char */ +.highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #4070a0 } /* Literal.String.Double */ +.highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */ +.highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */ +.highlight .sx { color: #c65d09 } /* Literal.String.Other */ +.highlight .sr { color: #235388 } /* Literal.String.Regex */ +.highlight .s1 { color: #4070a0 } /* Literal.String.Single */ +.highlight .ss { color: #517918 } /* Literal.String.Symbol */ +.highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */ +.highlight .vc { color: #bb60d5 } /* Name.Variable.Class */ +.highlight .vg { color: #bb60d5 } /* Name.Variable.Global */ +.highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */ +.highlight .il { color: #208050 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/docs/_build/html/_static/searchtools.js b/docs/_build/html/_static/searchtools.js new file mode 100644 index 0000000..663be4c --- /dev/null +++ b/docs/_build/html/_static/searchtools.js @@ -0,0 +1,560 @@ +/* + * searchtools.js_t + * ~~~~~~~~~~~~~~~~ + * + * Sphinx JavaScript utilties for the full-text search. + * + * :copyright: Copyright 2007-2011 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/** + * helper function to return a node containing the + * search summary for a given text. keywords is a list + * of stemmed words, hlwords is the list of normal, unstemmed + * words. the first one is used to find the occurance, the + * latter for highlighting it. + */ + +jQuery.makeSearchSummary = function(text, keywords, hlwords) { + var textLower = text.toLowerCase(); + var start = 0; + $.each(keywords, function() { + var i = textLower.indexOf(this.toLowerCase()); + if (i > -1) + start = i; + }); + start = Math.max(start - 120, 0); + var excerpt = ((start > 0) ? '...' : '') + + $.trim(text.substr(start, 240)) + + ((start + 240 - text.length) ? '...' : ''); + var rv = $('
').text(excerpt); + $.each(hlwords, function() { + rv = rv.highlightText(this, 'highlighted'); + }); + return rv; +} + + +/** + * Porter Stemmer + */ +var Stemmer = function() { + + var step2list = { + ational: 'ate', + tional: 'tion', + enci: 'ence', + anci: 'ance', + izer: 'ize', + bli: 'ble', + alli: 'al', + entli: 'ent', + eli: 'e', + ousli: 'ous', + ization: 'ize', + ation: 'ate', + ator: 'ate', + alism: 'al', + iveness: 'ive', + fulness: 'ful', + ousness: 'ous', + aliti: 'al', + iviti: 'ive', + biliti: 'ble', + logi: 'log' + }; + + var step3list = { + icate: 'ic', + ative: '', + alize: 'al', + iciti: 'ic', + ical: 'ic', + ful: '', + ness: '' + }; + + var c = "[^aeiou]"; // consonant + var v = "[aeiouy]"; // vowel + var C = c + "[^aeiouy]*"; // consonant sequence + var V = v + "[aeiou]*"; // vowel sequence + + var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem + + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; + + if (w.length < 3) + return w; + + var re; + var re2; + var re3; + var re4; + + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); + + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; + + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); + + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } + } + + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} + + +/** + * Search Module + */ +var Search = { + + _index : null, + _queued_query : null, + _pulse_status : -1, + + init : function() { + var params = $.getQueryParameters(); + if (params.q) { + var query = params.q[0]; + $('input[name="q"]')[0].value = query; + this.performSearch(query); + } + }, + + loadIndex : function(url) { + $.ajax({type: "GET", url: url, data: null, success: null, + dataType: "script", cache: true}); + }, + + setIndex : function(index) { + var q; + this._index = index; + if ((q = this._queued_query) !== null) { + this._queued_query = null; + Search.query(q); + } + }, + + hasIndex : function() { + return this._index !== null; + }, + + deferQuery : function(query) { + this._queued_query = query; + }, + + stopPulse : function() { + this._pulse_status = 0; + }, + + startPulse : function() { + if (this._pulse_status >= 0) + return; + function pulse() { + Search._pulse_status = (Search._pulse_status + 1) % 4; + var dotString = ''; + for (var i = 0; i < Search._pulse_status; i++) + dotString += '.'; + Search.dots.text(dotString); + if (Search._pulse_status > -1) + window.setTimeout(pulse, 500); + }; + pulse(); + }, + + /** + * perform a search for something + */ + performSearch : function(query) { + // create the required interface elements + this.out = $('#search-results'); + this.title = $('

' + _('Searching') + '

').appendTo(this.out); + this.dots = $('').appendTo(this.title); + this.status = $('

').appendTo(this.out); + this.output = $('