diff --git a/notebook/parallel_vectorize.ipynb b/notebook/parallel_vectorize.ipynb new file mode 100644 index 0000000..b0bdaff --- /dev/null +++ b/notebook/parallel_vectorize.ipynb @@ -0,0 +1,160 @@ +{ + "metadata": { + "name": "parallel_vectorize" + }, + "nbformat": 2, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "source": [ + "Parallel Vectorize", + "------------------", + "", + "The `parallel_vectorize.py` module contains a set of llvmpy code generators", + "for creating mulithreaded _ufunc_. ", + "It depends on the new `numpy.fromfunc` for turning arbitrary function pointers into _ufunc_.", + "", + "From LLVM Function", + "------------------", + "", + "The `parallel_vectorize_from_func` method generates multithreaded _ufunc_ from LLVM functions.", + "", + "First, we will implement a workload function:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from llvm_cbuilder import *", + "from llvm_cbuilder import shortnames as C", + "from llvm.core import *", + "", + "# Implement a workload", + "class Square(CDefinition):", + " _name_ = 'square'", + " _retty_ = C.double # 1 output: double", + " _argtys_ = [('x', C.double)] # 1 input: double", + " ", + " def body(self, x):", + " self.ret(x * x)", + "", + "m = Module.new('my_module')", + "llvm_square = Square()(m) # Generate a llvm function", + "print(llvm_square) " + ], + "language": "python", + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "", + "define double @square(double %x) {", + "decl:", + " %x1 = alloca double", + " br label %body", + "", + "body: ; preds = %decl", + " store double %x, double* %x1", + " %0 = load double* %x1", + " %1 = load double* %x1", + " %2 = fmul double %0, %1", + " ret double %2", + "}", + "" + ] + } + ], + "prompt_number": 1 + }, + { + "cell_type": "markdown", + "source": [ + "Then, we will generate a _ufunc_ from `llvm_square`:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from llvm.ee import *", + "engine = EngineBuilder.new(m).create() # Generate JIT engine", + "", + "from parallel_vectorize import parallel_vectorize_from_func", + "ufunc_square = parallel_vectorize_from_func(llvm_square, engine) # Generate UFunc" + ], + "language": "python", + "outputs": [], + "prompt_number": 2 + }, + { + "cell_type": "markdown", + "source": [ + "We are ready to use `ufunc_square` as a regular _ufunc_." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import numpy as np", + "A = np.arange(10., dtype=np.double)", + "ufunc_square(A)" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 3, + "text": [ + "array([ 0., 1., 4., 9., 16., 25., 36., 49., 64., 81.])" + ] + } + ], + "prompt_number": 3 + }, + { + "cell_type": "markdown", + "source": [ + "Here's another example that uses three inputs:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "class SumOfThree(CDefinition):", + " _name_ = 'sum.of.three'", + " _retty_ = C.int", + " _argtys_ = [('x', C.int),", + " ('y', C.int),", + " ('z', C.int)]", + " def body(self, x, y, z):", + " self.ret( x + y + z )", + "", + "llvm_sum3 = SumOfThree()(m)", + "ufunc_sum3 = parallel_vectorize_from_func(llvm_sum3, engine)", + "A = np.arange(10, dtype=np.int32)", + "B = A * 10", + "C = A * 100", + "ufunc_sum3(A, B, C)" + ], + "language": "python", + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 4, + "text": [ + "array([ 0, 111, 222, 333, 444, 555, 666, 777, 888, 999], dtype=int32)" + ] + } + ], + "prompt_number": 4 + } + ] + } + ] +} \ No newline at end of file diff --git a/parallel_vectorize.py b/parallel_vectorize.py index 210dea8..a3fc743 100644 --- a/parallel_vectorize.py +++ b/parallel_vectorize.py @@ -416,10 +416,14 @@ def parallel_vectorize_from_func(lfunc, engine=None): from python. (This needs Jay's numpy.fromfunc). Otherwise, return the specialized ufunc as a llvm.core.Function ''' + import multiprocessing + NUM_CPU = multiprocessing.cpu_count() + fntype = lfunc.type.pointee - def_spuf = SpecializedParallelUFunc(ParallelUFuncPlatform(num_thread=2), - UFuncCoreGeneric(fntype), - CFuncRef(lfunc)) + def_spuf = SpecializedParallelUFunc( + ParallelUFuncPlatform(num_thread=NUM_CPU), + UFuncCoreGeneric(fntype), + CFuncRef(lfunc)) spuf = def_spuf(lfunc.module) if engine is None: return spuf @@ -432,10 +436,10 @@ def parallel_vectorize_from_func(lfunc, engine=None): # TODO refactor typemap = { - 'i8' : np.uint8, - 'i16' : np.uint16, - 'i32' : np.uint32, - 'i64' : np.uint64, + 'i8' : np.int8, + 'i16' : np.int16, + 'i32' : np.int32, + 'i64' : np.int64, 'float' : np.float32, 'double' : np.float64, }