From 1d2b76e30e03e182cd937fe97425d1cd03ff6409 Mon Sep 17 00:00:00 2001 From: Frederic Date: Tue, 12 Nov 2013 20:04:19 -0500 Subject: [PATCH 1/4] Use different kernel name for elemwise. --- pygpu/elemwise.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pygpu/elemwise.py b/pygpu/elemwise.py index cb8dfec..523db00 100644 --- a/pygpu/elemwise.py +++ b/pygpu/elemwise.py @@ -271,10 +271,10 @@ def __init__(self, context, arguments, operation, preamble="", self.preamble = preamble self.contig_src = contiguous_kernel.render(preamble=self.preamble, - name="elemk", + name="elem_contig", arguments=self.arguments, expression=self.operation) - self.contig_k = gpuarray.GpuKernel(self.contig_src, "elemk", + self.contig_k = gpuarray.GpuKernel(self.contig_src, "elem_contig", context=self.context, cluda=True, **self.flags) self._speckey = None @@ -308,15 +308,16 @@ def prepare_args_contig(self, args, n, offsets): kernel_args.insert(0, numpy.asarray(n, dtype='uint32')) return kernel_args - def render_basic(self, nd): - return basic_kernel.render(preamble=self.preamble, name="elemk", + def render_basic(self, nd, name="elemk"): + return basic_kernel.render(preamble=self.preamble, name=name, nd=nd, arguments=self.arguments, expression=self.expression) @lfu_cache() def _make_basic(self, nd): - src = self.render_basic(nd) - return gpuarray.GpuKernel(src, "elemk", context=self.context, + name = "elem_" + str(nd) + src = self.render_basic(nd, name=name) + return gpuarray.GpuKernel(src, name, context=self.context, cluda=True, **self.flags) def prepare_args_basic(self, args, n, dims, strs, offsets): From a02ad8f0e5842b064b3799bc70ef69a679080064 Mon Sep 17 00:00:00 2001 From: Frederic Date: Tue, 12 Nov 2013 20:41:20 -0500 Subject: [PATCH 2/4] Add numpy c-api macro/fct to help make the conversion faster. --- src/CMakeLists.txt | 1 + src/compyte/pygpua_cuda.h | 40 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 src/compyte/pygpua_cuda.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a3cf5b4..985918a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -112,6 +112,7 @@ SET(headers compyte/kernel.h compyte/types.h compyte/util.h + compyte/pygpua_cuda.h ) INSTALL(FILES ${headers} DESTINATION include/compyte) diff --git a/src/compyte/pygpua_cuda.h b/src/compyte/pygpua_cuda.h new file mode 100644 index 0000000..34d3920 --- /dev/null +++ b/src/compyte/pygpua_cuda.h @@ -0,0 +1,40 @@ +/* + * This file isn't recommanded. Using it make your code not able to work on OpenCL. + * + * But it allow faster conversion to this new library of existing code + */ +#include "extension.h" +#ifndef COMPYTE_BUFFER_CUDA_H +#define COMPYTE_BUFFER_CUDA_H +CUdeviceptr (*cuda_get_ptr)(gpudata *g) = (CUdeviceptr (*)(gpudata *g))compyte_get_extension("cuda_get_ptr"); + +int PyGpuArray_NDIM(PyGpuArrayObject *arr) { + return arr->ga.nd; +} +size_t *PyGpuArray_DIMS(PyGpuArrayObject *arr) { + return arr->ga.dimensions; +} +//void *PyGpuArray_DATA(PyGpuArrayObject *arr) +char *PyGpuArray_BYTES(PyGpuArrayObject *arr){ + return ((char*) cuda_get_ptr(arr->ga.data)) + arr->ga.offset; +} + +ssize_t *PyGpuArray_STRIDES(PyGpuArrayObject* arr) { + return arr->ga.strides; +} +size_t PyGpuArray_DIM(PyGpuArrayObject* arr, int n) { + return arr->ga.dimensions[n]; +} +ssize_t PyGpuArray_STRIDE(PyGpuArrayObject* arr, int n) { + return arr->ga.strides[n]; +} +//int PyGpuArray_ITEMSIZE(PyGpuArrayObject* arr) +size_t PyGpuArray_SIZE(PyGpuArrayObject* arr) { + size_t size = 1; + for(int i=0; i< arr->ga.nd; i++) { + size *= arr->ga.dimensions[i]; + } + return size; +} + +#endif From d5922c494535bdd71a7340217a0022da0f326785 Mon Sep 17 00:00:00 2001 From: Frederic Date: Wed, 13 Nov 2013 14:54:58 -0500 Subject: [PATCH 3/4] remove some fct that we don't want to provide from this interface. Also Arnaud raised that in C, we can't call an fct as an initializer. --- src/compyte/pygpua_cuda.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/compyte/pygpua_cuda.h b/src/compyte/pygpua_cuda.h index 34d3920..167a347 100644 --- a/src/compyte/pygpua_cuda.h +++ b/src/compyte/pygpua_cuda.h @@ -6,7 +6,6 @@ #include "extension.h" #ifndef COMPYTE_BUFFER_CUDA_H #define COMPYTE_BUFFER_CUDA_H -CUdeviceptr (*cuda_get_ptr)(gpudata *g) = (CUdeviceptr (*)(gpudata *g))compyte_get_extension("cuda_get_ptr"); int PyGpuArray_NDIM(PyGpuArrayObject *arr) { return arr->ga.nd; @@ -14,10 +13,6 @@ int PyGpuArray_NDIM(PyGpuArrayObject *arr) { size_t *PyGpuArray_DIMS(PyGpuArrayObject *arr) { return arr->ga.dimensions; } -//void *PyGpuArray_DATA(PyGpuArrayObject *arr) -char *PyGpuArray_BYTES(PyGpuArrayObject *arr){ - return ((char*) cuda_get_ptr(arr->ga.data)) + arr->ga.offset; -} ssize_t *PyGpuArray_STRIDES(PyGpuArrayObject* arr) { return arr->ga.strides; From 74281847f58981a2a70a591c595788b81dffd76e Mon Sep 17 00:00:00 2001 From: Frederic Date: Wed, 13 Nov 2013 14:56:47 -0500 Subject: [PATCH 4/4] rename header. --- src/compyte/{pygpua_cuda.h => numpy_compat.h} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/compyte/{pygpua_cuda.h => numpy_compat.h} (100%) diff --git a/src/compyte/pygpua_cuda.h b/src/compyte/numpy_compat.h similarity index 100% rename from src/compyte/pygpua_cuda.h rename to src/compyte/numpy_compat.h