Collect the directory to move

2012-11-10 18:43:19 -06:00 · 2012-11-10 18:43:19 -06:00 · 4bc9f00d95
commit 4bc9f00d95
parent 23a2d3a446
12 changed files with 1108 additions and 0 deletions
--- a/llvm_cbuilder_tests/init.py
+++ b/llvm_cbuilder_tests/init.py
--- a/llvm_cbuilder_tests/test_atomic_add.py
+++ b/llvm_cbuilder_tests/test_atomic_add.py
@ -0,0 +1,117 @@
+'''
+Base on the test_pthread.py and extend to use atomic instructions
+'''
+
+from llvm.core import *
+from llvm.passes import *
+from llvm.ee import *
+from llvm_cbuilder import *
+import llvm_cbuilder.shortnames as C
+import unittest, logging
+import sys
+
+# logging.basicConfig(level=logging.DEBUG)
+
+NUM_OF_THREAD = 4
+REPEAT = 10000
+
+def gen_test_worker(mod):
+    cb = CBuilder.new_function(mod, 'worker', C.void, [C.pointer(C.int)])
+    pval = cb.args[0]
+    one = cb.constant(pval.type.pointee, 1)
+
+    ct = cb.var(C.int, 0)
+    limit = cb.constant(C.int, REPEAT)
+    with cb.loop() as loop:
+        with loop.condition() as setcond:
+            setcond( ct < limit )
+
+        with loop.body():
+            cb.atomic_add(pval, one, 'acq_rel')
+            ct += one
+
+    cb.ret()
+    cb.close()
+    return cb.function
+
+def gen_test_pthread(mod):
+    cb = CBuilder.new_function(mod, 'manager', C.int, [C.int])
+    arg = cb.args[0]
+
+    worker_func = cb.get_function_named('worker')
+    pthread_create = cb.get_function_named('pthread_create')
+    pthread_join = cb.get_function_named('pthread_join')
+
+
+    NULL = cb.constant_null(C.void_p)
+    cast_to_null = lambda x: x.cast(C.void_p)
+
+    threads = cb.array(C.void_p, NUM_OF_THREAD)
+
+    for tid in range(NUM_OF_THREAD):
+        pthread_create_args = [threads[tid].reference(),
+                               NULL,
+                               worker_func,
+                               arg.reference()]
+        pthread_create(*map(cast_to_null, pthread_create_args))
+
+    worker_func(arg.reference())
+
+    for tid in range(NUM_OF_THREAD):
+        pthread_join_args = threads[tid], NULL
+        pthread_join(*map(cast_to_null, pthread_join_args))
+
+
+    cb.ret(arg)
+    cb.close()
+    return cb.function
+
+class TestAtomicAdd(unittest.TestCase):
+    @unittest.skipIf(sys.platform == 'win32', "test uses pthreads, not supported on Windows")
+    def test_atomic_add(self):
+        mod = Module.new(__name__)
+        # add pthread functions
+
+        mod.add_function(Type.function(C.int,
+                                       [C.void_p, C.void_p, C.void_p, C.void_p]),
+                         'pthread_create')
+
+        mod.add_function(Type.function(C.int,
+                                       [C.void_p, C.void_p]),
+                         'pthread_join')
+
+        lf_test_worker = gen_test_worker(mod)
+        lf_test_pthread = gen_test_pthread(mod)
+        logging.debug(mod)
+        mod.verify()
+
+        # optimize
+        fpm = FunctionPassManager.new(mod)
+        mpm = PassManager.new()
+        pmb = PassManagerBuilder.new()
+        pmb.vectorize = True
+        pmb.opt_level = 3
+        pmb.populate(fpm)
+        pmb.populate(mpm)
+
+        fpm.run(lf_test_worker)
+        fpm.run(lf_test_pthread)
+        mpm.run(mod)
+        logging.debug(mod)
+        mod.verify()
+
+        # run
+        exe = CExecutor(mod)
+        exe.engine.get_pointer_to_function(mod.get_function_named('worker'))
+        func = exe.get_ctype_function(lf_test_pthread, 'int, int')
+
+        inarg = 1234
+        gold = inarg + (NUM_OF_THREAD + 1) * REPEAT
+
+        for _ in range(1000): # run many many times to catch race condition
+            self.assertEqual(func(inarg), gold, "Unexpected race condition")
+
+
+if __name__ == '__main__':
+    unittest.main()
+
--- a/llvm_cbuilder_tests/test_atomic_cmpxchg.py
+++ b/llvm_cbuilder_tests/test_atomic_cmpxchg.py
@ -0,0 +1,124 @@
+'''
+Base on the test_pthread.py and extend to use atomic instructions
+'''
+
+from llvm.core import *
+from llvm.passes import *
+from llvm.ee import *
+from llvm_cbuilder import *
+import llvm_cbuilder.shortnames as C
+import unittest, logging
+import sys
+
+# logging.basicConfig(level=logging.DEBUG)
+
+NUM_OF_THREAD = 4
+REPEAT = 10000
+
+def gen_test_worker(mod):
+    cb = CBuilder.new_function(mod, 'worker', C.void, [C.pointer(C.int)])
+    pval = cb.args[0]
+    one = cb.constant(pval.type.pointee, 1)
+
+    ct = cb.var(C.int, 0)
+    limit = cb.constant(C.int, REPEAT)
+    with cb.loop() as loop:
+        with loop.condition() as setcond:
+            setcond( ct < limit )
+
+        with loop.body():
+            oldval = pval.atomic_load('acquire')
+            updated = oldval + one
+            castmp = pval.atomic_cmpxchg(oldval, updated, 'release')
+
+            with cb.ifelse( castmp == oldval ) as ifelse:
+                with ifelse.then():
+                    ct += one
+
+    cb.ret()
+    cb.close()
+    return cb.function
+
+def gen_test_pthread(mod):
+    cb = CBuilder.new_function(mod, 'manager', C.int, [C.int])
+    arg = cb.args[0]
+
+    worker_func = cb.get_function_named('worker')
+    pthread_create = cb.get_function_named('pthread_create')
+    pthread_join = cb.get_function_named('pthread_join')
+
+
+    NULL = cb.constant_null(C.void_p)
+    cast_to_null = lambda x: x.cast(C.void_p)
+
+    threads = cb.array(C.void_p, NUM_OF_THREAD)
+
+    for tid in range(NUM_OF_THREAD):
+        pthread_create_args = [threads[tid].reference(),
+                               NULL,
+                               worker_func,
+                               arg.reference()]
+        pthread_create(*map(cast_to_null, pthread_create_args))
+
+    worker_func(arg.reference())
+
+    for tid in range(NUM_OF_THREAD):
+        pthread_join_args = threads[tid], NULL
+        pthread_join(*map(cast_to_null, pthread_join_args))
+
+
+    cb.ret(arg)
+    cb.close()
+    return cb.function
+
+class TestAtomicCmpXchg(unittest.TestCase):
+    @unittest.skipIf(sys.platform == 'win32', "test uses pthreads, not supported on Windows")
+    def test_atomic_cmpxchg(self):
+        mod = Module.new(__name__)
+        # add pthread functions
+
+        mod.add_function(Type.function(C.int,
+                                       [C.void_p, C.void_p, C.void_p, C.void_p]),
+                         'pthread_create')
+
+        mod.add_function(Type.function(C.int,
+                                       [C.void_p, C.void_p]),
+                         'pthread_join')
+
+        lf_test_worker = gen_test_worker(mod)
+        lf_test_pthread = gen_test_pthread(mod)
+        logging.debug(mod)
+        mod.verify()
+
+        # optimize
+        fpm = FunctionPassManager.new(mod)
+        mpm = PassManager.new()
+        pmb = PassManagerBuilder.new()
+        pmb.vectorize = True
+        pmb.opt_level = 3
+        pmb.populate(fpm)
+        pmb.populate(mpm)
+
+        fpm.run(lf_test_worker)
+        fpm.run(lf_test_pthread)
+        mpm.run(mod)
+        logging.debug(mod)
+        mod.verify()
+
+        # run
+        exe = CExecutor(mod)
+        exe.engine.get_pointer_to_function(mod.get_function_named('worker'))
+        func = exe.get_ctype_function(lf_test_pthread, 'int, int')
+
+        inarg = 1234
+        gold = inarg + (NUM_OF_THREAD + 1) * REPEAT
+
+        for _ in range(1000): # run many many times to catch race condition
+            res = func(inarg)
+            self.assertEqual(res, gold,
+                             "Unexpected race condition: res = %d" % res)
+
+
+if __name__ == '__main__':
+    unittest.main()
+
--- a/llvm_cbuilder_tests/test_cstr_collide.py
+++ b/llvm_cbuilder_tests/test_cstr_collide.py
@ -0,0 +1,17 @@
+from llvm.core import *
+from llvm_cbuilder import *
+from llvm_cbuilder import shortnames as C
+
+import unittest
+
+class TestCstrCollide(unittest.TestCase):
+    def test_same_string(self):
+        mod = Module.new(__name__)
+        cb = CBuilder.new_function(mod, 'test_cstr_collide', C.void, [])
+
+        a = cb.constant_string("hello")
+        b = cb.constant_string("hello")
+        self.assertEqual(a.value, b.value)
+
+if __name__ == '__main__':
+    unittest.main()
--- a/llvm_cbuilder_tests/test_isprime.py
+++ b/llvm_cbuilder_tests/test_isprime.py
@ -0,0 +1,125 @@
+from llvm.core import *
+from llvm.passes import *
+from llvm.ee import *
+from llvm_cbuilder import *
+import llvm_cbuilder.shortnames as C
+import unittest, logging
+
+def is_prime(x):
+    if x <= 2:
+        return True
+    if (x % 2) == 0:
+        return False
+    for y in range(2, int(1 + x**0.5)):
+        if (x % y) == 0:
+            return False
+    return True
+
+def gen_is_prime(mod):
+    functype = Type.function(C.int, [C.int])
+    func = mod.add_function(functype, 'isprime')
+
+    cb = CBuilder(func)
+
+    arg = cb.args[0]
+
+    two = cb.constant(C.int, 2)
+    true = one = cb.constant(C.int, 1)
+    false = zero = cb.constant(C.int, 0)
+
+    with cb.ifelse( arg <= two ) as ifelse:
+        with ifelse.then():
+            cb.ret(true)
+
+    with cb.ifelse( (arg % two) == zero ) as ifelse:
+        with ifelse.then():
+            cb.ret(false)
+
+    idx = cb.var(C.int, 3, name='idx')
+    with cb.loop() as loop:
+        with loop.condition() as setcond:
+            setcond( idx < arg )
+
+        with loop.body():
+            with cb.ifelse( (arg % idx) == zero ) as ifelse:
+                with ifelse.then():
+                    cb.ret(false)
+            # increment
+            idx += two
+
+    cb.ret(true)
+    cb.close()
+    return func
+
+
+def gen_is_prime_fast(mod):
+    functype = Type.function(C.int, [C.int])
+    func = mod.add_function(functype, 'isprime_fast')
+
+    cb = CBuilder(func)
+
+    arg = cb.args[0]
+
+    two = cb.constant(C.int, 2)
+    true = one = cb.constant(C.int, 1)
+    false = zero = cb.constant(C.int, 0)
+
+    with cb.ifelse( arg <= two ) as ifelse:
+        with ifelse.then():
+            cb.ret(true)
+
+    with cb.ifelse( (arg % two) == zero ) as ifelse:
+        with ifelse.then():
+            cb.ret(false)
+
+    idx = cb.var(C.int, 3, name='idx')
+
+    sqrt = cb.get_intrinsic(INTR_SQRT, [C.float])
+
+    looplimit = one + sqrt(arg.cast(C.float)).cast(C.int)
+
+
+    with cb.loop() as loop:
+        with loop.condition() as setcond:
+            setcond( idx < looplimit )
+
+        with loop.body():
+            with cb.ifelse( (arg % idx) == zero ) as ifelse:
+                with ifelse.then():
+                    cb.ret(false)
+            # increment
+            idx += two
+
+
+    cb.ret(true)
+    cb.close()
+    return func
+
+class TestIsPrime(unittest.TestCase):
+    def test_isprime(self):
+        mod = Module.new(__name__)
+        lf_isprime = gen_is_prime(mod)
+        logging.debug(mod)
+        mod.verify()
+
+        exe = CExecutor(mod)
+        func = exe.get_ctype_function(lf_isprime, 'bool, int')
+        for x in range(2, 1000):
+            msg = "Failed at x = %d" % x
+            self.assertEqual(func(x), is_prime(x), msg)
+
+    def test_isprime_fast(self):
+        mod = Module.new(__name__)
+        lf_isprime = gen_is_prime_fast(mod)
+        logging.debug(mod)
+        mod.verify()
+
+        exe = CExecutor(mod)
+        func = exe.get_ctype_function(lf_isprime, 'bool, int')
+        for x in range(2, 1000):
+            msg = "Failed at x = %d" % x
+            self.assertEqual(func(x), is_prime(x), msg)
+
+if __name__ == '__main__':
+    unittest.main()
+
--- a/llvm_cbuilder_tests/test_loopcontrol.py
+++ b/llvm_cbuilder_tests/test_loopcontrol.py
@ -0,0 +1,135 @@
+from llvm.core import *
+from llvm.passes import *
+from llvm.ee import *
+from llvm_cbuilder import *
+import llvm_cbuilder.shortnames as C
+import unittest, logging
+
+def loopbreak(d):
+    z = 0
+    for x in range(100):
+        for y in range(100):
+            z += x + y
+            if z > 50:
+                break
+        z -= d
+    return z
+
+def gen_loopbreak(mod):
+    functype = Type.function(C.int, [C.int])
+    func = mod.add_function(functype, 'loopbreak')
+
+    cb = CBuilder(func)
+
+    d = cb.args[0]
+    x = cb.var(C.int)
+    y = cb.var(C.int)
+    z = cb.var(C.int)
+
+    one = cb.constant(C.int, 1)
+    zero = cb.constant(C.int, 0)
+    limit = cb.constant(C.int, 100)
+    fifty = cb.constant(C.int, 50)
+
+    z.assign(zero)
+    x.assign(zero)
+    with cb.loop() as outer:
+        with outer.condition() as setcond:
+            setcond( x < limit )
+
+        with outer.body():
+            y.assign(zero)
+            with cb.loop() as inner:
+                with inner.condition() as setcond:
+                    setcond( y < limit )
+
+                with inner.body():
+                    z += x + y
+                    with cb.ifelse( z > fifty ) as ifelse:
+                        with ifelse.then():
+                            inner.break_loop()
+                    y += one
+            z -= d
+            x += one
+
+    cb.ret(z)
+    cb.close()
+    return func
+
+def loopcontinue(d):
+    z = 0
+    for x in range(100):
+        for y in range(100):
+            z += x + y
+            if z > 50:
+                continue
+            z += d
+    return z
+
+def gen_loopcontinue(mod):
+    functype = Type.function(C.int, [C.int])
+    func = mod.add_function(functype, 'loopcontinue')
+
+    cb = CBuilder(func)
+
+    d = cb.args[0]
+    x = cb.var(C.int)
+    y = cb.var(C.int)
+    z = cb.var(C.int)
+
+    one = cb.constant(C.int, 1)
+    zero = cb.constant(C.int, 0)
+    limit = cb.constant(C.int, 100)
+    fifty = cb.constant(C.int, 50)
+
+    z.assign(zero)
+    x.assign(zero)
+    with cb.loop() as outer:
+        with outer.condition() as setcond:
+            setcond( x < limit )
+
+        with outer.body():
+            y.assign(zero)
+            with cb.loop() as inner:
+                with inner.condition() as setcond:
+                    setcond( y < limit )
+
+                with inner.body():
+                    z += x + y
+                    y += one
+                    with cb.ifelse( z > fifty ) as ifelse:
+                        with ifelse.then():
+                            inner.continue_loop()
+                    z += d
+            x += one
+
+    cb.ret(z)
+    cb.close()
+    return func
+
+class TestLoopControl(unittest.TestCase):
+    def test_loopbreak(self):
+        mod = Module.new(__name__)
+        lfunc = gen_loopbreak(mod)
+        logging.debug(mod)
+        mod.verify()
+
+        exe = CExecutor(mod)
+        func = exe.get_ctype_function(lfunc, 'int, int')
+        for x in range(100):
+            self.assertEqual(func(x), loopbreak(x))
+
+    def test_loopcontinue(self):
+        mod = Module.new(__name__)
+        lfunc = gen_loopcontinue(mod)
+        logging.debug(mod)
+        mod.verify()
+
+        exe = CExecutor(mod)
+        func = exe.get_ctype_function(lfunc, 'int, int')
+        for x in range(100):
+            self.assertEqual(func(x), loopcontinue(x))
+
+if __name__ == '__main__':
+    unittest.main()
+
--- a/llvm_cbuilder_tests/test_nestedloops.py
+++ b/llvm_cbuilder_tests/test_nestedloops.py
@ -0,0 +1,128 @@
+from llvm.core import *
+from llvm.passes import *
+from llvm.ee import *
+from llvm_cbuilder import *
+import llvm_cbuilder.shortnames as C
+import unittest, logging
+
+def nestedloop1(d):
+    z = 0
+    for x in range(100):
+        for y in range(100):
+            z += x * d + int(y / d)
+    return z
+
+def gen_nestedloop1(mod):
+    functype = Type.function(C.int, [C.int])
+    func = mod.add_function(functype, 'nestedloop1')
+
+    cb = CBuilder(func)
+
+    d = cb.args[0]
+    x = cb.var(C.int)
+    y = cb.var(C.int)
+    z = cb.var(C.int)
+
+    one = cb.constant(C.int, 1)
+    zero = cb.constant(C.int, 0)
+    limit = cb.constant(C.int, 100)
+
+    z.assign(zero)
+    x.assign(zero)
+    with cb.loop() as outer:
+        with outer.condition() as setcond:
+            setcond( x < limit )
+
+        with outer.body():
+            y.assign(zero)
+            with cb.loop() as inner:
+                with inner.condition() as setcond:
+                    setcond( y < limit )
+
+                with inner.body():
+                    z += x * d + y / d
+                    y += one
+            x += one
+
+    cb.ret(z)
+    cb.close()
+    return func
+
+
+def nestedloop2(d):
+    z = 0
+    for x in range(1, 100):
+        for y in range(1, 100):
+            if x > y:
+                z += int(x / y) * d
+            else:
+                z += int(y / x) * d
+    return z
+
+def gen_nestedloop2(mod):
+    functype = Type.function(C.int, [C.int])
+    func = mod.add_function(functype, 'nestedloop2')
+
+    cb = CBuilder(func)
+
+    d = cb.args[0]
+    x = cb.var(C.int)
+    y = cb.var(C.int)
+    z = cb.var(C.int)
+
+    one = cb.constant(C.int, 1)
+    zero = cb.constant(C.int, 0)
+    limit = cb.constant(C.int, 100)
+
+    z.assign(zero)
+    x.assign(one)
+    with cb.loop() as outer:
+        with outer.condition() as setcond:
+            setcond( x < limit )
+
+        with outer.body():
+            y.assign(one)
+            with cb.loop() as inner:
+                with inner.condition() as setcond:
+                    setcond( y < limit )
+
+                with inner.body():
+                    with cb.ifelse(x > y) as ifelse:
+                        with ifelse.then():
+                            z += x / y * d
+                        with ifelse.otherwise():
+                            z += y / x * d
+                    y += one
+            x += one
+
+    cb.ret(z)
+    cb.close()
+    return func
+
+
+class TestNestedLoop(unittest.TestCase):
+    def test_nestedloop1(self):
+        mod = Module.new(__name__)
+        lfunc = gen_nestedloop1(mod)
+        logging.debug(mod)
+        mod.verify()
+
+        exe = CExecutor(mod)
+        func = exe.get_ctype_function(lfunc, 'int, int')
+        for x in range(1, 100):
+            self.assertEqual(func(x), int(nestedloop1(x)))
+
+    def test_nestedloop2(self):
+        mod = Module.new(__name__)
+        lfunc = gen_nestedloop2(mod)
+        logging.debug(mod)
+        mod.verify()
+
+        exe = CExecutor(mod)
+        func = exe.get_ctype_function(lfunc, 'int, int')
+        for x in range(1, 100):
+            self.assertEqual(func(x), int(nestedloop2(x)))
+
+if __name__ == '__main__':
+    unittest.main()
+
--- a/llvm_cbuilder_tests/test_print.py
+++ b/llvm_cbuilder_tests/test_print.py
@ -0,0 +1,68 @@
+from llvm.core import *
+from llvm.passes import *
+from llvm.ee import *
+from llvm_cbuilder import *
+import llvm_cbuilder.shortnames as C
+import sys, unittest, logging
+from subprocess import Popen, PIPE
+
+def gen_debugprint(mod):
+    functype = Type.function(C.void, [])
+    func = mod.add_function(functype, 'debugprint')
+
+    cb = CBuilder(func)
+    fmt = cb.constant_string("Show %d %.3f %.3e\n")
+
+    an_int = cb.constant(C.int, 123)
+    a_float = cb.constant(C.double, 1.234)
+    a_double = cb.constant(C.double, 1e-31)
+    cb.printf(fmt, an_int, a_float, a_double)
+
+    cb.debug('an_int =', an_int, 'a_float =', a_float, 'a_double =', a_double)
+
+    cb.ret()
+    cb.close()
+    return func
+
+def main_debugprint():
+    # generate code
+    mod = Module.new(__name__)
+    lfunc = gen_debugprint(mod)
+    logging.debug(mod)
+    mod.verify()
+    # run
+    exe = CExecutor(mod)
+    func = exe.get_ctype_function(lfunc, 'void')
+    func()
+
+class TestPrint(unittest.TestCase):
+    def test_debugprint(self):
+        p = Popen([sys.executable, __file__, "-child"], stdout=PIPE)
+        p.wait()
+
+        # The encode(utf-8) is for Python 3 compatibility
+        lines = p.stdout.read().encode('utf-8').splitlines(False)
+
+        # Try to account for variations in the system printf
+        if lines[0].find('e-031') >= 0:
+            expect = [
+                'Show 123 1.234 1.000e-031',
+                'an_int = 123 a_float = 1.234000e+000 a_double = 1.000000e-031',
+                ]
+        else:
+            expect = [
+                'Show 123 1.234 1.000e-31',
+                'an_int = 123 a_float = 1.234000e+00 a_double = 1.000000e-31',
+                ]
+        self.assertEqual(expect, lines)
+
+        p.stdout.close()
+
+if __name__ == '__main__':
+    try:
+        if sys.argv[1] == '-child':
+            main_debugprint()
+    except IndexError:
+        unittest.main()
+
+
--- a/llvm_cbuilder_tests/test_pthread.py
+++ b/llvm_cbuilder_tests/test_pthread.py
@ -0,0 +1,92 @@
+from llvm.core import *
+from llvm.passes import *
+from llvm.ee import *
+from llvm_cbuilder import *
+import llvm_cbuilder.shortnames as C
+import unittest, logging
+import sys
+
+# logging.basicConfig(level=logging.DEBUG)
+
+NUM_OF_THREAD = 4
+
+def gen_test_worker(mod):
+    cb = CBuilder.new_function(mod, 'worker', C.void, [C.pointer(C.int)])
+    pval = cb.args[0]
+    val = pval.load()
+    one = cb.constant(val.type, 1)
+    pval.store(val + one)
+    cb.ret()
+    cb.close()
+
+def gen_test_pthread(mod):
+    cb = CBuilder.new_function(mod, 'manager', C.int, [C.int])
+    arg = cb.args[0]
+
+    worker_func = cb.get_function_named('worker')
+    pthread_create = cb.get_function_named('pthread_create')
+    pthread_join = cb.get_function_named('pthread_join')
+
+
+    NULL = cb.constant_null(C.void_p)
+    cast_to_null = lambda x: x.cast(C.void_p)
+
+    threads = cb.array(C.void_p, NUM_OF_THREAD)
+
+    for tid in range(NUM_OF_THREAD):
+        pthread_create_args = [threads[tid].reference(),
+                               NULL,
+                               worker_func,
+                               arg.reference()]
+        pthread_create(*map(cast_to_null, pthread_create_args))
+
+    worker_func(arg.reference())
+
+    for tid in range(NUM_OF_THREAD):
+        pthread_join_args = threads[tid], NULL
+        pthread_join(*map(cast_to_null, pthread_join_args))
+
+    cb.ret(arg)
+    cb.close()
+    return cb.function
+
+class TestPThread(unittest.TestCase):
+    @unittest.skipIf(sys.platform == 'win32', "pthreads not supported on Windows")
+    def test_pthread(self):
+        mod = Module.new(__name__)
+        # add pthread functions
+
+        mod.add_function(Type.function(C.int,
+                                       [C.void_p, C.void_p, C.void_p, C.void_p]),
+                         'pthread_create')
+
+        mod.add_function(Type.function(C.int,
+                                       [C.void_p, C.void_p]),
+                         'pthread_join')
+
+        gen_test_worker(mod)
+        lf_test_pthread = gen_test_pthread(mod)
+        logging.debug(mod)
+        mod.verify()
+
+        exe = CExecutor(mod)
+        exe.engine.get_pointer_to_function(mod.get_function_named('worker'))
+        func = exe.get_ctype_function(lf_test_pthread, 'int, int')
+
+        inarg = 1234
+        gold = inarg + NUM_OF_THREAD + 1
+        self.assertLessEqual(func(inarg), gold)
+        # Cannot determine the exact return value due to untamed race condition
+
+        count_race = 0
+        for _ in range(2**12):
+            if func(inarg) != gold:
+                count_race += 1
+
+        if count_race > 0:
+            logging.info("Race condition occured %d times.", count_race)
+            logging.info("Race condition is expected.")
+
+if __name__ == '__main__':
+    unittest.main()
+
--- a/llvm_cbuilder_tests/test_struct.py
+++ b/llvm_cbuilder_tests/test_struct.py
@ -0,0 +1,52 @@
+from llvm.core import *
+from llvm_cbuilder import *
+import llvm_cbuilder.shortnames as C
+import unittest, ctypes
+
+class Vector2D(CStruct):
+    _fields_ = [
+        ('x', C.float),
+        ('y', C.float),
+    ]
+
+class Vector2DCtype(ctypes.Structure):
+    _fields_ = [
+        ('x', ctypes.c_float),
+        ('y', ctypes.c_float),
+    ]
+
+def gen_vector2d_dist(mod):
+    functype = Type.function(C.float, [C.pointer(Vector2D.llvm_type())])
+    func = mod.add_function(functype, 'vector2d_dist')
+
+    cb = CBuilder(func)
+    vec = cb.var(Vector2D, cb.args[0].load())
+    dist = vec.x * vec.x + vec.y * vec.y
+
+    cb.ret(dist)
+    cb.close()
+    return func
+
+
+class TestStruct(unittest.TestCase):
+    def test_vector2d_dist(self):
+        # prepare module
+        mod = Module.new('mod')
+        lfunc = gen_vector2d_dist(mod)
+        mod.verify()
+        # run
+        exe = CExecutor(mod)
+        func = exe.get_ctype_function(lfunc, ctypes.c_float, ctypes.POINTER(Vector2DCtype))
+
+        from random import random
+        pydist = lambda x, y: x * x + y * y
+        for _ in range(100):
+            x, y = random(), random()
+            vec = Vector2DCtype(x=x, y=y)
+            ans = func(ctypes.pointer(vec))
+            gold = pydist(x, y)
+
+            self.assertLess(abs(ans-gold)/gold, 1e-6)
+
+if __name__ == '__main__':
+    unittest.main()
--- a/llvm_cbuilder_tests/test_translate.py
+++ b/llvm_cbuilder_tests/test_translate.py
@ -0,0 +1,94 @@
+
+from llvm.core import Module
+from llvm_cbuilder import *
+from llvm_cbuilder.translator import translate
+import llvm_cbuilder.shortnames as C
+import unittest, logging
+
+#logging.basicConfig(level=logging.DEBUG)
+
+class FooIf(CDefinition):
+    _name_ = 'foo_if'
+    _retty_ = C.int
+    _argtys_ = [('x', C.int),
+                ('y', C.int),]
+
+    def body(self, x, y):
+        @translate
+        def _():
+            if x > y:
+                return x - y
+            else:
+                return y - x
+
+
+class FooWhile(CDefinition):
+    _name_ = 'foo_while'
+    _retty_ = C.int
+    _argtys_ = [('x', C.int)]
+
+    def body(self, x):
+        y = self.var_copy(x)
+
+        @translate
+        def _():
+            while x > 0:
+                x -= 1
+                y += x
+            return y
+
+class FooForRange(CDefinition):
+    _name_ = 'foo_for_range'
+    _retty_ = C.int
+    _argtys_ = [('x', C.int)]
+
+    def body(self, x):
+        y = self.var(x.type, 0)
+
+        @translate
+        def _():
+            for i in range(x + 1):
+                y += i
+            return y
+
+
+class TestTranslate(unittest.TestCase):
+    def test_if(self):
+        mod = Module.new(__name__)
+        lfoo = FooIf()(mod)
+
+        print(mod)
+        mod.verify()
+
+        exe = CExecutor(mod)
+        foo = exe.get_ctype_function(lfoo, 'int, int')
+        self.assertEqual(foo(10, 20), 20 - 10)
+        self.assertEqual(foo(23, 17), 23 - 17)
+
+    def test_whileloop(self):
+        mod = Module.new(__name__)
+        lfoo = FooWhile()(mod)
+
+        print(mod)
+        mod.verify()
+
+        exe = CExecutor(mod)
+        foo = exe.get_ctype_function(lfoo, 'int')
+        self.assertEqual(foo(10), sum(range(10+1)))
+        self.assertEqual(foo(1324), sum(range(1324+1)))
+
+    def test_forloop(self):
+        mod = Module.new(__name__)
+        lfoo = FooForRange()(mod)
+
+        print(mod)
+        mod.verify()
+
+        exe = CExecutor(mod)
+        foo = exe.get_ctype_function(lfoo, 'int')
+        self.assertEqual(foo(10), sum(range(10+1)))
+        self.assertEqual(foo(1324), sum(range(1324+1)))
+
+if __name__ == '__main__':
+    unittest.main()
+
--- a/llvm_cbuilder_tests/test_vectorarith.py
+++ b/llvm_cbuilder_tests/test_vectorarith.py
@ -0,0 +1,156 @@
+from llvm_cbuilder import *
+from llvm_cbuilder import shortnames as C
+from llvm_cbuilder.translator import translate
+from ctypes import *
+from llvm.core import *
+from llvm.passes import *
+import numpy as np
+import unittest
+import logging
+floatv4 = C.vector(C.float, 4)
+
+class VectorArith(CDefinition):
+    _name_ = 'vector_arith'
+    _argtys_ = [('a', floatv4),
+                ('b', floatv4),
+                ('c', floatv4),]
+    _retty_ = floatv4
+
+    def body(self, a, b, c):
+        '''
+        Arguments
+        ---------
+        a, b, c -- must be vectors
+        '''
+        @translate
+        def _(): # write like python in here
+            return a * b + c
+
+class VectorArithDriver1(CDefinition):
+    _name_ = 'vector_arith_driver_1'
+    _argtys_ = [('A', C.pointer(C.float)),
+                ('B', C.pointer(C.float)),
+                ('C', C.pointer(C.float)),
+                ('D', C.pointer(C.float)),
+                ('n', C.int),]
+
+    def body(self, Aary, Bary, Cary, Dary, n):
+        '''
+        This version uses vector load to fetch array elements as vectors.
+
+        '''
+        vecarith = self.depends(VectorArith())
+        elem_per_vec = self.constant(C.int, floatv4.count)
+        with self.for_range(0, n, elem_per_vec) as (loop, i):
+            # Aary[i:] offset the array at i
+            a = Aary[i:].vector_load(4, align=1)  # unaligned vector load
+            b = Bary[i:].vector_load(4, align=1)
+            c = Cary[i:].vector_load(4, align=1)
+            r = vecarith(a, b, c)
+            Dary[i:].vector_store(r, align=1)
+            #    self.debug(r[0], r[1], r[2], r[3])
+        self.ret()
+
+
+class VectorArithDriver2(CDefinition):
+    _name_ = 'vector_arith_driver_2'
+    _argtys_ = [('A', C.pointer(C.float)),
+                ('B', C.pointer(C.float)),
+                ('C', C.pointer(C.float)),
+                ('D', C.pointer(C.float)),
+                ('n', C.int),]
+
+    def body(self, Aary, Bary, Cary, Dary, n):
+        '''
+        This version loads element of vector individually.
+        This style generates scalar ld/st instead of vector ld/st.
+        '''
+        vecarith = self.depends(VectorArith())
+        a = self.var(floatv4)
+        b = self.var(floatv4)
+        c = self.var(floatv4)
+        elem_per_vec = self.constant(C.int, floatv4.count)
+        with self.for_range(0, n, elem_per_vec) as (outer, i):
+            with self.for_range(elem_per_vec) as (inner, j):
+                a[j] = Aary[i + j]
+                b[j] = Bary[i + j]
+                c[j] = Cary[i + j]
+            r = vecarith(a, b, c)
+            Dary[i:].vector_store(r, align=1)
+            #    self.debug(r[0], r[1], r[2], r[3])
+        self.ret()
+
+
+
+def aligned_zeros(shape, boundary=16, dtype=float, order='C'):
+    '''
+    Is there a better way to allocate aligned memory?
+    '''
+    N = np.prod(shape)
+    d = np.dtype(dtype)
+    tmp = np.zeros(N * d.itemsize + boundary, dtype=np.uint8)
+    address = tmp.__array_interface__['data'][0]
+    offset = (boundary - address % boundary) % boundary
+    viewed = tmp[offset:offset + N * d.itemsize].view(dtype=d)
+    return viewed.reshape(shape, order=order)
+
+class TestVectorArith(unittest.TestCase):
+    def test_vector_arith_1(self):
+        self.run_and_test_udt(VectorArithDriver1(), 16) # aligned for SSE
+        self.run_and_test_udt(VectorArithDriver1(), 20) # misaligned for SSE
+
+    def test_vector_arith_2(self):
+        self.run_and_test_udt(VectorArithDriver2(), 16) # aligned for SSE
+        self.run_and_test_udt(VectorArithDriver2(), 20) # misaligned for SSE
+
+    def run_and_test_udt(self, udt, align):
+        module = Module.new('mod.test.vectoriarith')
+
+        ldriver = udt(module)
+
+        pm = PassManager.new()
+        pmb = PassManagerBuilder.new()
+        pmb.opt = 3
+        pmb.vectorize = True
+        pmb.populate(pm)
+        pm.run(module)
+
+        print(module.to_native_assembly())
+
+        exe = CExecutor(module)
+
+        float_p = POINTER(c_float)
+
+        driver = exe.get_ctype_function(ldriver,
+                                        None,
+                                        float_p, float_p, float_p,
+                                        float_p,
+                                        c_int)
+
+        # prepare for execution
+
+        n = 4*10
+
+        Aary = aligned_zeros(n, boundary=align, dtype=np.float32)
+        Bary = aligned_zeros(n, boundary=align, dtype=np.float32)
+        Cary = aligned_zeros(n, boundary=align, dtype=np.float32)
+        Dary = aligned_zeros(n, boundary=align, dtype=np.float32)
+
+        Aary[:] = range(n)
+        Bary[:] = range(n, 2 * n)
+        Cary[:] = range(2 * n, 3 * n)
+
+        golden = Aary * Bary + Cary
+
+        getptr = lambda ary: ary.ctypes.data_as(float_p)
+
+        driver(getptr(Aary), getptr(Bary), getptr(Cary), getptr(Dary), n)
+
+        for x, y in zip(golden, Dary):
+            self.assertEqual(x, y)
+
+
+if __name__ == '__main__':
+    unittest.main()
+
+