diff --git a/llvm_cbuilder/builder.py b/llvm_cbuilder/builder.py index 3fce585..88516df 100644 --- a/llvm_cbuilder/builder.py +++ b/llvm_cbuilder/builder.py @@ -434,7 +434,8 @@ class CDefinition(CBuilder): @classmethod def define(cls, module, **kws): functype = lc.Type.function(cls._retty_, [v for k, v in cls._argtys_]) - func = module.get_or_insert_function(functype, name=cls._name_) + name = cls._name_ % kws + func = module.get_or_insert_function(functype, name=name) if not func.is_declaration: # already defined? raise NameError(func) diff --git a/parallel_vectorize.py b/parallel_vectorize.py index 42aea26..2d9421f 100644 --- a/parallel_vectorize.py +++ b/parallel_vectorize.py @@ -95,7 +95,7 @@ class ParallelUFunc(CDefinition): which should be implemented in subclass or mixin. ''' - _name_ = 'parallel_ufunc' + _name_ = 'parallel_ufunc_%(ThreadCount)d' _argtys_ = [ ('func', C.void_p), ('worker', C.void_p), @@ -317,7 +317,9 @@ class UFuncCore(CDefinition): ''' raise NotImplementedError -class _SpecializedParallelUFunc(ParallelUFunc, ParallelUFuncPosixMixin): + +class SpecializedParallelUFunc(CDefinition): + _name_ = 'specialized_parallel_ufunc_%(ThreadCount)d_%(FuncName)s' _argtys_ = [ ('args', C.pointer(C.char_p)), ('dimensions', C.pointer(C.intp)), @@ -325,8 +327,14 @@ class _SpecializedParallelUFunc(ParallelUFunc, ParallelUFuncPosixMixin): ('data', C.void_p), ] - def body(self, args, dimensions, steps, data, ThreadCount=1): - pass + def body(self, args, dimensions, steps, data, + PUFuncDef, CoreDef, Func, FuncName, ThreadCount=1): + pufunc = self.depends(PUFuncDef, ThreadCount=ThreadCount) + core = self.depends(CoreDef) + func = self.depends(Func) + to_void_p = lambda x: x.cast(C.void_p) + pufunc(to_void_p(func), to_void_p(core), args, dimensions, steps, data) + self.ret() class PThreadAPI(CExternal): pthread_t = C.void_p diff --git a/test_parallel_vectorize.py b/test_parallel_vectorize.py index 4fde372..e16d206 100644 --- a/test_parallel_vectorize.py +++ b/test_parallel_vectorize.py @@ -47,10 +47,17 @@ class Tester(CDefinition): ArgCount = 2 WorkCount = 10000 - parallel_ufunc = self.depends(ParallelUFuncPosix, - ThreadCount=ThreadCount) - ufunc_core = self.depends(UFuncCore_D_D) - worker = self.depends(Work_D_D) +# parallel_ufunc = self.depends(ParallelUFuncPosix, +# ThreadCount=ThreadCount) +# ufunc_core = self.depends(UFuncCore_D_D) +# worker = self.depends(Work_D_D) + + sppufunc = self.depends(SpecializedParallelUFunc, + PUFuncDef = ParallelUFuncPosix, + CoreDef = UFuncCore_D_D, + Func = Work_D_D, + FuncName = Work_D_D._name_, + ThreadCount = 2) # real work NULL = self.constant_null(C.void_p) @@ -83,8 +90,7 @@ class Tester(CDefinition): i += self.constant(C.intp, 1) # call parallel ufunc - parallel_ufunc(worker.cast(C.void_p), ufunc_core.cast(C.void_p), args, - dims, steps, NULL) + sppufunc(args, dims, steps, NULL) # check error outbase = args_double[-1]