diff --git a/parallel_vectorize.py b/parallel_vectorize.py index bef04eb..b5a03f5 100644 --- a/parallel_vectorize.py +++ b/parallel_vectorize.py @@ -95,7 +95,7 @@ class ParallelUFunc(CDefinition): which should be implemented in subclass or mixin. ''' - _name_ = 'parallel_ufunc_%(ThreadCount)d' + #_name_ = 'parallel_ufunc_%(ThreadCount)d' _argtys_ = [ ('func', C.void_p), ('worker', C.void_p), @@ -105,8 +105,18 @@ class ParallelUFunc(CDefinition): ('data', C.void_p), ] - def body(self, func, worker, args, dimensions, steps, data, ThreadCount=1): + @classmethod + def specialize(cls, num_thread): + name = 'parallel_ufunc_%d' % num_thread + newcls = type(name, (cls,), { + '_name_' : name, + 'ThreadCount': num_thread, + }) + return newcls + + def body(self, func, worker, args, dimensions, steps, data): # Setup variables + ThreadCount = self.ThreadCount common = self.var(ContextCommon, name='common') workqueues = self.array(WorkQueue, ThreadCount, name='workqueues') contexts = self.array(Context, ThreadCount, name='contexts') @@ -318,9 +328,7 @@ class UFuncCore(CDefinition): ''' raise NotImplementedError - class SpecializedParallelUFunc(CDefinition): - _name_ = 'specialized_parallel_ufunc_%(ThreadCount)d_%(FuncName)s' _argtys_ = [ ('args', C.pointer(C.char_p)), ('dimensions', C.pointer(C.intp)), @@ -328,15 +336,27 @@ class SpecializedParallelUFunc(CDefinition): ('data', C.void_p), ] - def body(self, args, dimensions, steps, data, - PUFuncDef, CoreDef, Func, FuncName, ThreadCount=1): - pufunc = self.depends(PUFuncDef, ThreadCount=ThreadCount) - core = self.depends(CoreDef) - func = self.depends(Func) + def body(self, args, dimensions, steps, data,): + pufunc = self.depends(self.PUFuncDef) + core = self.depends(self.CoreDef) + func = self.depends(self.FuncDef) to_void_p = lambda x: x.cast(C.void_p) pufunc(to_void_p(func), to_void_p(core), args, dimensions, steps, data) self.ret() + @classmethod + def specialize(cls, pufunc_def, core_def, func_def): + name = 'specialized_%s_%s_%s'% (pufunc_def._name_, + core_def._name_, + func_def._name_) + newcls = type(name, (cls,), { + '_name_' : name, + 'PUFuncDef': pufunc_def, + 'CoreDef' : core_def, + 'FuncDef' : func_def, + }) + return newcls + class PThreadAPI(CExternal): pthread_t = C.void_p diff --git a/test_parallel_vectorize.py b/test_parallel_vectorize.py index ed2010d..ab28363 100644 --- a/test_parallel_vectorize.py +++ b/test_parallel_vectorize.py @@ -14,6 +14,7 @@ class UFuncCore_D_D(UFuncCore): ''' Specialize UFuncCore for double input, double output. ''' + _name_ = UFuncCore._name_ + '_d_d' def _do_work(self, common, item, tid): ufunc_type = Type.function(C.double, [C.double]) ufunc_ptr = CFunc(self, common.func.cast(C.pointer(ufunc_type)).value) @@ -47,12 +48,10 @@ class Tester(CDefinition): ArgCount = 2 WorkCount = 10000 - sppufunc = self.depends(SpecializedParallelUFunc, - PUFuncDef = ParallelUFuncPosix, - CoreDef = UFuncCore_D_D, - Func = Work_D_D, - FuncName = Work_D_D._name_, - ThreadCount = 2) + PUfuncDef = ParallelUFuncPosix.specialize(num_thread=2) + SPUF = SpecializedParallelUFunc.specialize( + PUfuncDef, UFuncCore_D_D, Work_D_D) + sppufunc = self.depends(SPUF) # real work NULL = self.constant_null(C.void_p) diff --git a/test_parallel_vectorize_numpy.py b/test_parallel_vectorize_numpy.py index dc3557f..7b1ca51 100644 --- a/test_parallel_vectorize_numpy.py +++ b/test_parallel_vectorize_numpy.py @@ -4,12 +4,11 @@ import numpy as np def main(): module = Module.new(__name__) - sppufunc = SpecializedParallelUFunc.define(module, - PUFuncDef = ParallelUFuncPosix, - CoreDef = UFuncCore_D_D, - Func = Work_D_D, - FuncName = Work_D_D._name_, - ThreadCount = 2) + PUfuncDef = ParallelUFuncPosix.specialize(num_thread=2) + SPUF = SpecializedParallelUFunc.specialize( + PUfuncDef, UFuncCore_D_D, Work_D_D) + sppufunc = SPUF.define(module) + module.verify() mpm = PassManager.new()