diff --git a/pygpu/elemwise.py b/pygpu/elemwise.py index cb8dfec..523db00 100644 --- a/pygpu/elemwise.py +++ b/pygpu/elemwise.py @@ -271,10 +271,10 @@ def __init__(self, context, arguments, operation, preamble="", self.preamble = preamble self.contig_src = contiguous_kernel.render(preamble=self.preamble, - name="elemk", + name="elem_contig", arguments=self.arguments, expression=self.operation) - self.contig_k = gpuarray.GpuKernel(self.contig_src, "elemk", + self.contig_k = gpuarray.GpuKernel(self.contig_src, "elem_contig", context=self.context, cluda=True, **self.flags) self._speckey = None @@ -308,15 +308,16 @@ def prepare_args_contig(self, args, n, offsets): kernel_args.insert(0, numpy.asarray(n, dtype='uint32')) return kernel_args - def render_basic(self, nd): - return basic_kernel.render(preamble=self.preamble, name="elemk", + def render_basic(self, nd, name="elemk"): + return basic_kernel.render(preamble=self.preamble, name=name, nd=nd, arguments=self.arguments, expression=self.expression) @lfu_cache() def _make_basic(self, nd): - src = self.render_basic(nd) - return gpuarray.GpuKernel(src, "elemk", context=self.context, + name = "elem_" + str(nd) + src = self.render_basic(nd, name=name) + return gpuarray.GpuKernel(src, name, context=self.context, cluda=True, **self.flags) def prepare_args_basic(self, args, n, dims, strs, offsets): diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a3cf5b4..985918a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -112,6 +112,7 @@ SET(headers compyte/kernel.h compyte/types.h compyte/util.h + compyte/pygpua_cuda.h ) INSTALL(FILES ${headers} DESTINATION include/compyte) diff --git a/src/compyte/numpy_compat.h b/src/compyte/numpy_compat.h new file mode 100644 index 0000000..167a347 --- /dev/null +++ b/src/compyte/numpy_compat.h @@ -0,0 +1,35 @@ +/* + * This file isn't recommanded. Using it make your code not able to work on OpenCL. + * + * But it allow faster conversion to this new library of existing code + */ +#include "extension.h" +#ifndef COMPYTE_BUFFER_CUDA_H +#define COMPYTE_BUFFER_CUDA_H + +int PyGpuArray_NDIM(PyGpuArrayObject *arr) { + return arr->ga.nd; +} +size_t *PyGpuArray_DIMS(PyGpuArrayObject *arr) { + return arr->ga.dimensions; +} + +ssize_t *PyGpuArray_STRIDES(PyGpuArrayObject* arr) { + return arr->ga.strides; +} +size_t PyGpuArray_DIM(PyGpuArrayObject* arr, int n) { + return arr->ga.dimensions[n]; +} +ssize_t PyGpuArray_STRIDE(PyGpuArrayObject* arr, int n) { + return arr->ga.strides[n]; +} +//int PyGpuArray_ITEMSIZE(PyGpuArrayObject* arr) +size_t PyGpuArray_SIZE(PyGpuArrayObject* arr) { + size_t size = 1; + for(int i=0; i< arr->ga.nd; i++) { + size *= arr->ga.dimensions[i]; + } + return size; +} + +#endif