diff --git a/scikits/cuda/cusparse.py b/scikits/cuda/cusparse.py index bcc3cbe..0a13c4e 100644 --- a/scikits/cuda/cusparse.py +++ b/scikits/cuda/cusparse.py @@ -5,6 +5,18 @@ import pycuda.gpuarray as gpuarray import pycuda.driver as drv +try: + import scipy.sparse + from scipy.sparse.sputils import isscalarlike + has_scipy = True +except ImportError: + has_scipy = False + + # copy of isscalarlike from scipy.sparse.sputils + def isscalarlike(x): + """Is x either a scalar, an array scalar, or a 0-dim array?""" + return np.isscalar(x) or (isdense(x) and x.ndim == 0) + toolkit_version = drv.get_version() if toolkit_version < (3, 2, 0): @@ -895,3 +907,468 @@ def csrgemm(handle, m, n, k, descrA, csrValA, csrRowPtrA, csrColIndA, csrColIndA, descrB, nnzB, csrValB, csrRowPtrB, csrColIndB, descrC, csrValC, csrRowPtrC, csrColIndC) return (descrC, csrValC, csrRowPtrC, csrColIndC) + + +class CSR(object): + """ cuSPARSE CSR (compressed sparse row) matrix object """ + def __init__(self, handle, descr, csrVal, csrRowPtr, csrColInd, shape): + + if csrRowPtr.size != (shape[0] + 1): + raise ValueError("size of RowPtr inconsistent with shape") + if csrVal.size != csrColInd.size: + raise ValueError("size of csrVal and csrColInd inconsistent") + if csrColInd.dtype != np.int32: + raise ValueError("csrColInd must be a 32-bit integer array") + if csrRowPtr.dtype != np.int32: + raise ValueError("csrRowPtr must be a 32-bit integer array") + # if input arrays are on the host, transfer them to the GPU + if isinstance(csrVal, np.ndarray): + csrVal = gpuarray.to_gpu(csrVal) + if isinstance(csrRowPtr, np.ndarray): + csrRowPtr = gpuarray.to_gpu(csrRowPtr) + if isinstance(csrColInd, np.ndarray): + csrColInd = gpuarray.to_gpu(csrColInd) + + self.handle = handle + self.descr = descr + self.Val = csrVal + self.RowPtr = csrRowPtr + self.ColInd = csrColInd + self.dtype = csrVal.dtype + self.shape = shape + + # also mirror scipy.sparse.csr_matrix property names for convenience + self.data = csrVal + self.indices = csrColInd + self.indptr = csrRowPtr + + # properties + self.__matrix_type = None + self.__index_base = None + self.__diag_type = None + self.__fill_mode = None + + # alternative constructor from dense ndarray, gpuarray or cuSPARSE matrix + @classmethod + def to_CSR(cls, A, handle): + """ convert dense numpy or gpuarray matrices as well as any + scipy.sparse matrix formats to cuSPARSE CSR. + """ + if has_scipy and isinstance(A, scipy.sparse.spmatrix): + """Convert scipy.sparse CSR, COO, BSR, etc to cuSPARSE CSR""" + # converting BSR, COO, etc to CSR + if A.dtype.char not in ['f', 'd', 'F', 'D']: + raise ValueError("unsupported numpy dtype {}".format(A.dtype)) + + if not isinstance(A, scipy.sparse.csr_matrix): + A = A.tocsr() + + # avoid .astype() calls if possible for speed + if A.indptr.dtype != np.int32: + csrRowPtr = gpuarray.to_gpu(A.indptr.astype(np.int32)) + else: + csrRowPtr = gpuarray.to_gpu(A.indptr) + + if A.indices.dtype != np.int32: + csrColInd = gpuarray.to_gpu(A.indices.astype(np.int32)) + else: + csrColInd = gpuarray.to_gpu(A.indices) + + csrVal = gpuarray.to_gpu(A.data) + descr = cusparseCreateMatDescr() + cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL) + cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO) + else: + """Take dense numpy array or pyCUDA gpuarray and convert to CSR """ + if not isinstance(A, pycuda.gpuarray.GPUArray): + A = np.asfortranarray(np.atleast_2d(A)) + A = gpuarray.to_gpu(A) + else: + # dense matrix must be column-major + if not A.flags.f_contiguous: + # TODO :an change to Fortran ordering be done directly on + # the gpuarray without going back to numpy? + A = A.get() + A = np.asfortranarray(A) + A = gpuarray.to_gpu(A) + + (handle, descr, csrVal, csrRowPtr, csrColInd) = dense2csr(A, + handle) + return cls(handle, descr, csrVal, csrRowPtr, csrColInd, A.shape) + + @property + def matrix_type(self): + """ matrix type """ + if self.__matrix_type is None: + return cusparseGetMatType(self.descr) + else: + return self.__matrix_type + + @matrix_type.setter + def matrix_type(self, matrix_type): + """ matrix type """ + self.__matrix_type = cusparseSetMatType(self.descr, matrix_type) + + @property + def index_base(self): + """ matrix index base """ + if self.__index_base is None: + return cusparseGetMatIndexBase(self.descr) + else: + return self.__index_base + + @index_base.setter + def index_base(self, index_base): + """ matrix index base """ + self.__index_base = cusparseSetMatIndexBase(self.descr, index_base) + + @property + def diag_type(self): + """ matrix diag type """ + if self.__diag_type is None: + return cusparseGetMatDiagType(self.descr) + else: + return self.__diag_type + + @diag_type.setter + def diag_type(self, diag_type): + """matrix diag type """ + self.__diag_type = cusparseSetMatDiagType(self.descr, diag_type) + + @property + def fill_mode(self): + """matrix fill mode """ + if self.__fill_mode is None: + return cusparseGetMatFillMode(self.descr) + else: + return self.__fill_mode + + @fill_mode.setter + def fill_mode(self, fill_mode): + """matrix fill mode """ + self.__fill_mode = cusparseSetMatFillMode(self.descr, fill_mode) + + @property + def nnz(self): + """ number of non-zeros """ + return self.Val.size + + # mirror the function name from scipy + def getnnz(self): + """ return number of non-zeros""" + return self.nnz + + def tocsr_scipy(self): + """ return as scipy csr_matrix in host memory """ + from scipy.sparse import csr_matrix + return csr_matrix((self.data.get(), + self.indices.get(), + self.indptr.get()), + shape=self.shape) + + def todense(self, lda=None, to_cpu=False, handle=None, stream=None, + autosync=True): + """ return dense gpuarray if to_cpu=False, numpy ndarray if to_cpu=True + """ + m, n = self.shape + if lda is None: + lda = m + else: + assert lda >= m + if handle is None: + handle = self.handle + if stream is not None: + cusparseSetStream(handle, stream.handle) + A = csr2dense(self.handle, m, n, self.descr, self.Val, self.RowPtr, + self.ColInd, lda=lda) + if autosync: + drv.Context.synchronize() + if to_cpu: + return A.get() + else: + return A + + def mv(self, x, transA=CUSPARSE_OPERATION_NON_TRANSPOSE, alpha=1.0, + beta=0.0, y=None, check_inputs=True, to_cpu=False, + autosync=True, handle=None, stream=None): + """ multiplication by dense vector x: y = alpha*transA(A)*x + beta*y. + """ + m, n = self.shape + + # try moving list or numpy array to GPU + if not isinstance(x, pycuda.gpuarray.GPUArray): + x = np.atleast_1d(x) # .astype(self.dtype) + x = gpuarray.to_gpu(x) + if handle is None: + handle = self.handle + if stream is not None: + cusparseSetStream(handle, stream.handle) + y = csrmv(handle, self.descr, self.Val, self.RowPtr, self.ColInd, m, n, + x, transA=transA, alpha=alpha, beta=beta, y=y, + check_inputs=check_inputs) + if autosync: + drv.Context.synchronize() + if to_cpu: + return y.get() + else: + return y + + def mm(self, B, transA=CUSPARSE_OPERATION_NON_TRANSPOSE, alpha=1.0, + beta=0.0, C=None, ldb=None, ldc=None, check_inputs=True, + to_cpu=False, autosync=True, handle=None, stream=None): + """ multiplication by dense matrix B: C = alpha*transA(A)*B + beta*C. + """ + m, k = self.shape + # try moving list or numpy array to GPU + if not isinstance(B, pycuda.gpuarray.GPUArray): + B = np.atleast_2d(B) # .astype(self.dtype) + B = gpuarray.to_gpu(B) + n = B.shape[1] + if handle is None: + handle = self.handle + if stream is not None: + cusparseSetStream(handle, stream.handle) + C = csrmm(handle=self.handle, m=m, n=n, k=k, descrA=self.descr, + csrValA=self.Val, csrRowPtrA=self.RowPtr, + csrColIndA=self.ColInd, B=B, C=C, transA=transA, + alpha=alpha, beta=beta, ldb=ldb, ldc=ldc, + check_inputs=check_inputs) + if autosync: + drv.Context.synchronize() + if to_cpu: + return C.get() + else: + return C + + def mm2(self, B, transA=CUSPARSE_OPERATION_NON_TRANSPOSE, + transB=CUSPARSE_OPERATION_NON_TRANSPOSE, alpha=1.0, beta=0.0, + C=None, ldb=None, ldc=None, check_inputs=True, to_cpu=False, + autosync=True, handle=None, stream=None): + """ multiplication by dense matrix B: C = alpha*transA(A)*B + beta*C. + version 2 + """ + if toolkit_version < (5, 5, 0): + raise ImportError("mm2 not implemented prior to CUDA v5.5") + m, k = self.shape + # try moving list or numpy array to GPU + if not isinstance(B, pycuda.gpuarray.GPUArray): + B = np.atleast_2d(B) # .astype(self.dtype) + B = gpuarray.to_gpu(B) + if transB == CUSPARSE_OPERATION_NON_TRANSPOSE: + n = B.shape[1] + else: + n = B.shape[0] + if handle is None: + handle = self.handle + if stream is not None: + cusparseSetStream(handle, stream.handle) + C = csrmm2(handle=self.handle, m=m, n=n, k=k, descrA=self.descr, + csrValA=self.Val, csrRowPtrA=self.RowPtr, + csrColIndA=self.ColInd, B=B, C=C, transA=transA, + transB=transB, alpha=alpha, beta=beta, ldb=ldb, ldc=ldc, + check_inputs=check_inputs) + if autosync: + drv.Context.synchronize() + if to_cpu: + return C.get() + else: + return C + + def geam(self, B, alpha=1.0, beta=1.0, check_inputs=True, autosync=True, + handle=None, stream=None): + """ addition of sparse matrix B: C = alpha*A + beta*B """ + if toolkit_version < (5, 0, 0): + raise ImportError("geam not implemented prior to CUDA v5.0") + m, n = self.shape + if not isinstance(B, CSR): + # try converting B to cuSPARSE CSR + B = CSR.to_CSR(B, handle=self.handle) + if self.shape != B.shape: + raise ValueError("Incompatible shapes") + if handle is None: + handle = self.handle + if stream is not None: + cusparseSetStream(handle, stream.handle) + descrC, ValC, RowPtrC, ColIndC = csrgeam( + handle=self.handle, m=m, n=n, descrA=self.descr, csrValA=self.Val, + csrRowPtrA=self.RowPtr, csrColIndA=self.ColInd, descrB=B.descr, + csrValB=B.Val, csrRowPtrB=B.RowPtr, csrColIndB=B.ColInd, + alpha=alpha, beta=beta, nnzA=self.nnz, nnzB=B.nnz, + check_inputs=True) + C = CSR(self.handle, descr=descrC, csrVal=ValC, csrRowPtr=RowPtrC, + csrColInd=ColIndC, shape=self.shape) + if autosync: + drv.Context.synchronize() + return C + + def gemm(self, B, transA=CUSPARSE_OPERATION_NON_TRANSPOSE, + transB=CUSPARSE_OPERATION_NON_TRANSPOSE, check_inputs=True, + autosync=True, handle=None, stream=None): + """ multiplication by sparse matrix B: C = transA(A) * transB(B) """ + if toolkit_version < (5, 0, 0): + raise ImportError("gemm not implemented prior to CUDA v5.0") + + if transA == CUSPARSE_OPERATION_NON_TRANSPOSE: + m, k = self.shape + else: + k, m = self.shape + if not isinstance(B, CSR): + # try converting B to cuSPARSE CSR + B = CSR.to_CSR(B, handle=self.handle) + if transB == CUSPARSE_OPERATION_NON_TRANSPOSE: + n = B.shape[1] + else: + n = B.shape[0] + if handle is None: + handle = self.handle + if stream is not None: + cusparseSetStream(handle, stream.handle) + descrC, ValC, RowPtrC, ColIndC = csrgemm( + handle=self.handle, m=m, n=n, k=k, descrA=self.descr, + csrValA=self.Val, csrRowPtrA=self.RowPtr, csrColIndA=self.ColInd, + descrB=B.descr, csrValB=B.Val, csrRowPtrB=B.RowPtr, + csrColIndB=B.ColInd, nnzA=self.nnz, nnzB=B.nnz, transA=transA, + transB=transB, check_inputs=True) + if autosync: + drv.Context.synchronize() + C = CSR(self.handle, descr=descrC, csrVal=ValC, csrRowPtr=RowPtrC, + csrColInd=ColIndC, shape=(m, n)) + return C + + """ + start of: subset of methods in scipy.sparse.compressed._cs_matrix + """ + @property + def A(self): + "The transpose operator." + return self.todense() + + @property + def T(self): + "The transpose operator." + return self.transpose() + + @property + def H(self): + "The adjoint operator." + return self.getH() + + @property + def real(self): + "The transpose operator." + return self._real() + + @property + def imag(self): + "The transpose operator." + return self._imag() + + @property + def size(self): + "The adjoint operator." + return self.getnnz() + + def transpose(self): + m, n = self.shape + # use csr2csc to perform the transpose + cscVal, cscColPtr, cscRowInd = csr2csc( + self.handle, m, n, self.Val, self.RowPtr, self.ColInd, self.nnz) + drv.Context.synchronize() + return CSR(self.handle, copyMatDescr(self.descr), cscVal, cscColPtr, + cscRowInd, self.shape) + + def getH(self): + return self.transpose().conj() + + def conjugate(self): + return self.conj() + + # implement _with_data similar to scipy.sparse.data._data_matrix + def _with_data(self, data, copy=True): + """Returns a matrix with the same sparsity structure as self, + but with different data. By default the structure arrays + (i.e. .indptr and .indices) are copied. + """ + if copy: + return self.__class__(self.handle, copyMatDescr(self.descr), data, + self.RowPtr.copy(), self.ColInd.copy(), + self.shape) + else: + return self.__class__(self.handle, self.descr, data, + self.RowPtr, self.ColInd, + self.shape) + + """ + end of: subset of methods in scipy.sparse.compressed._cs_matrix + """ + + """ + start of: subset of methods in scipy.sparse.data._data_matrix + """ + def conj(self): + return self._with_data(self.data.conj()) + # return CSR(self.handle, self.descr, self.Val.conj(), self.RowPtr, + # self.ColInd, self.shape) + + def _real(self): + return self._with_data(self.data.real) + # return CSR(self.handle, self.descr, self.Val.real, self.RowPtr, + # self.ColInd, self.shape) + + def _imag(self): + return self._with_data(self.data.imag) + # return CSR(self.handle, self.descr, self.Val.imag, self.RowPtr, + # self.ColInd, self.shape) + + def __abs__(self): + return self._with_data(abs(self.data)) + + def __neg__(self): + return self._with_data(abs(self.data)) + + def __imul__(self, other): # self *= other + if isscalarlike(other): + self.data *= other + return self + else: + return NotImplemented + + def __itruediv__(self, other): # self /= other + if isscalarlike(other): + recip = 1.0 / other + self.data *= recip + return self + else: + return NotImplemented + + def astype(self, t): + return self._with_data(self.data.astype(t)) + + def copy(self): + return self._with_data(self.data.copy(), copy=True) + + def _mul_scalar(self, other): + return self._with_data(self.data * other) + + """ + end of: subset of methods in scipy.sparse.data._data_matrix + """ + + def __del__(self): + """ cleanup descriptor upon object deletion """ + cusparseDestroyMatDescr(self.descr) + # don't destroy the handle as other objects may be using it + + def __repr__(self): + rstr = "CSR matrix:\n" + rstr += "\tshape = {}\n".format(self.shape) + rstr += "\tdtype = {}\n".format(self.dtype) + rstr += "\tMatrixType = {}\n".format(self.matrix_type) + rstr += "\tIndexBase = {}\n".format(self.index_base) + rstr += "\tDiagType = {}\n".format(self.diag_type) + rstr += "\tFillMode = {}\n".format(self.fill_mode) + rstr += "\tcontext = {}\n\n".format(self.handle) + rstr += "\tnnz = {}\n".format(self.nnz) + rstr += "\tRowPtr = {}\n".format(self.RowPtr) + rstr += "\tVal = {}\n".format(self.Val) + return rstr diff --git a/tests/test_cusparse.py b/tests/test_cusparse.py index 57dc513..c698862 100644 --- a/tests/test_cusparse.py +++ b/tests/test_cusparse.py @@ -708,3 +708,365 @@ def test_csrgemm(): cusparseDestroy(handle) cusparseDestroyMatDescr(descrA) cusparseDestroyMatDescr(descrB) + + +def test_CSR_construction(): + n = 64 + h = cusparseCreate() + try: + for dtype in cusparse_dtypes: + A = 2*np.eye(n) + + A_scipy_csr = scipy.sparse.csr_matrix(A) + + # generate a CSR matrix from a dense gpuarray + A_CSR = CSR.to_CSR(gpuarray.to_gpu(A), h) + assert_equal(A_CSR.Val.get(), A_scipy_csr.data) + + # generate a CSR matrix from a dense numpy array + A_CSR = CSR.to_CSR(A, h) + assert_equal(A_CSR.Val.get(), A_scipy_csr.data) + + # generate a CSR matrix from a list of lists + A_CSR = CSR.to_CSR(A.tolist(), h) + assert_equal(A_CSR.Val.get(), A_scipy_csr.data) + + # generate a csSPARSE CSR matrix from a scipy.sparse CSR matrix + A_CSR = CSR.to_CSR(A_scipy_csr, h) + assert_equal(A_CSR.Val.get(), A_scipy_csr.data) + + # generate a csSPARSE CSR matrix from a scipy.sparse BSR matrix + A_CSR = CSR.to_CSR(scipy.sparse.bsr_matrix(A), h) + assert_equal(A_CSR.Val.get(), A_scipy_csr.data) + + # generate a csSPARSE CSR matrix from a scipy.sparse COO matrix + A_CSR = CSR.to_CSR(scipy.sparse.coo_matrix(A), h) + assert_equal(A_CSR.Val.get(), A_scipy_csr.data) + + # generate a csSPARSE CSR matrix from a scipy.sparse CSC matrix + A_CSR = CSR.to_CSR(scipy.sparse.csc_matrix(A), h) + assert_equal(A_CSR.Val.get(), A_scipy_csr.data) + + # generate a csSPARSE CSR matrix from a scipy.sparse DIA matrix + A_CSR = CSR.to_CSR(scipy.sparse.dia_matrix(A), h) + assert_equal(A_CSR.Val.get(), A_scipy_csr.data) + + # generate a csSPARSE CSR matrix from a scipy.sparse DOK matrix + A_CSR = CSR.to_CSR(scipy.sparse.dok_matrix(A), h) + assert_equal(A_CSR.Val.get(), A_scipy_csr.data) + finally: + cusparseDestroy(h) + + +def test_CSR_properties(): + n = 64 + h = cusparseCreate() + A = 2*np.eye(n) + + # generate a CSR matrix from a dense numpy array + A_CSR = CSR.to_CSR(A, h) + + # test shape + assert A_CSR.shape == A.shape + + # test matrix_type property + assert A_CSR.matrix_type == CUSPARSE_MATRIX_TYPE_GENERAL + A_CSR.matrix_type = CUSPARSE_MATRIX_TYPE_SYMMETRIC + assert A_CSR.matrix_type == CUSPARSE_MATRIX_TYPE_SYMMETRIC + assert cusparseGetMatType(A_CSR.descr) == CUSPARSE_MATRIX_TYPE_SYMMETRIC + + # test index_base property + assert A_CSR.index_base == CUSPARSE_INDEX_BASE_ZERO + A_CSR.index_base = CUSPARSE_INDEX_BASE_ONE + assert A_CSR.index_base == CUSPARSE_INDEX_BASE_ONE + assert cusparseGetMatIndexBase(A_CSR.descr) == CUSPARSE_INDEX_BASE_ONE + + # test diag_type property + assert A_CSR.diag_type == CUSPARSE_DIAG_TYPE_NON_UNIT + A_CSR.diag_type = CUSPARSE_DIAG_TYPE_UNIT + assert A_CSR.diag_type == CUSPARSE_DIAG_TYPE_UNIT + assert cusparseGetMatDiagType(A_CSR.descr) == CUSPARSE_DIAG_TYPE_UNIT + + # test fill_mode property + assert A_CSR.fill_mode == CUSPARSE_FILL_MODE_LOWER + A_CSR.fill_mode = CUSPARSE_FILL_MODE_UPPER + assert A_CSR.fill_mode == CUSPARSE_FILL_MODE_UPPER + assert cusparseGetMatFillMode(A_CSR.descr) == CUSPARSE_FILL_MODE_UPPER + + # verify setting value outside the valid range raises an exception + def set_mat_type(A, t): + A_CSR.matrix_type = t + assert_raises(CUSPARSE_STATUS_INVALID_VALUE, set_mat_type, A_CSR, 100) + + # test get nnz + assert A_CSR.nnz == n + assert A_CSR.getnnz() == n + + # verify that nnz can't be set + def set_nnz(A, nnz): + A_CSR.nnz = nnz + assert_raises(AttributeError, set_nnz, A_CSR, 5) + + +def test_CSR_properties2(): + n = 64 + h = cusparseCreate() + try: + for dtype in cusparse_complex_dtypes: + A = 2*np.eye(n).astype(dtype) + # add a couple of complex values + A[0, 1] = 1j + A[1, 0] = -1j + A_CSR = CSR.to_CSR(A, h) + orig_data = A_CSR.data.get() + orig_indices = A_CSR.indices.get() + orig_indptr = A_CSR.indptr.get() + + assert_equal(abs(A_CSR).data.get(), abs(orig_data)) + assert_equal(-A_CSR.data.get(), -orig_data) + + A_CSR *= 2 + assert_almost_equal(A_CSR.data.get(), 2*orig_data) + + # this test requires from __future__ import division in Python 2.x + A_CSR /= 2 # A_CSR._with_data(A_CSR.data/2) + assert_almost_equal(A_CSR.data.get(), orig_data) + + assert_equal(A_CSR.conj().data.get(), np.conj(orig_data)) + assert_equal(A_CSR.real.data.get(), orig_data.real) + assert_equal(A_CSR.imag.data.get(), orig_data.imag) + assert_equal(A_CSR.A.get(), A) + assert_equal(A_CSR.T.A.get(), A.T) + + # make sure the above didn't modify the original data + assert_equal(A_CSR.data.get(), orig_data) + assert_equal(A_CSR.indices.get(), orig_indices) + assert_equal(A_CSR.indptr.get(), orig_indptr) + + assert_equal(A_CSR.H.A.get(), A.T.conj()) + assert_equal(A_CSR.size, A_CSR.nnz) + finally: + cusparseDestroy(h) + + +def test_CSR_todense(): + n = 64 + h = cusparseCreate() + try: + dtype = np.float32 + A = 2*np.eye(n) + + # generate a CSR matrix from a dense numpy array + A_CSR = CSR.to_CSR(A.astype(dtype), h) + + # convert cusparseCSR back to a dense matrix + A_dense = A_CSR.todense(to_cpu=False) + assert type(A_dense) == gpuarray.GPUArray + assert_equal(A_dense.get(), A) + + A_dense = A_CSR.todense(to_cpu=True) + assert type(A_dense) == np.ndarray + assert_equal(A_dense, A) + finally: + cusparseDestroy(h) + + +def test_CSR_tocsr_scipy(): + n = 64 + h = cusparseCreate() + try: + dtype = np.float32 + A = 2*np.eye(n) + + # generate a CSR matrix from a dense numpy array + A_CSR = CSR.to_CSR(A.astype(dtype), h) + + # convert cusparseCSR back to a dense matrix + csr_scipy = A_CSR.tocsr_scipy() + # assert_equal(A_CSR.data.get(), csr_scipy.data) + # assert_equal(A_CSR.indices.get(), csr_scipy.indices) + # assert_equal(A_CSR.indptr.get(), csr_scipy.indptr) + + assert_equal(csr_scipy.indptr, scipy.sparse.csr_matrix(A).indptr) + assert_equal(csr_scipy.indices, scipy.sparse.csr_matrix(A).indices) + assert_equal(csr_scipy.data, scipy.sparse.csr_matrix(A).data) + finally: + cusparseDestroy(h) + + +def test_CSR_mv(): + n = 64 + h = cusparseCreate() + try: + A = 2*np.eye(n) + + dtype = np.float32 + x_cpu = np.ones((n, ), dtype=dtype) + for dtype in cusparse_dtypes: + + # generate a CSR matrix from a dense numpy array + A_CSR = CSR.to_CSR(A.astype(dtype), h) + x = gpuarray.to_gpu(x_cpu.astype(dtype)) + + # test default operation + y = A_CSR.mv(x) + assert_almost_equal(y.get(), A.diagonal()) + # transpose will be the same for this matrix + y = A_CSR.mv(x, transA=CUSPARSE_OPERATION_TRANSPOSE) + assert_almost_equal(y.get(), A.diagonal()) + + alpha = 5.0 + y = A_CSR.mv(x, alpha=alpha) + assert_almost_equal(y.get(), alpha*A.diagonal()) + + # repeat with non-zero beta and initialize with previous y + beta = 2.5 + y = A_CSR.mv(x, alpha=alpha, beta=beta, y=y) + assert_almost_equal(y.get(), (alpha+alpha*beta)*A.diagonal()) + finally: + cusparseDestroy(h) + + +def test_CSR_mm(): + m = 64 + n = 10 + h = cusparseCreate() + try: + A = 2*np.eye(m) + + dtype = np.float32 + B_cpu = np.ones((m, n), dtype=dtype, order='F') + for dtype in cusparse_dtypes: + A = A.astype(dtype) + + # generate a CSR matrix from a dense numpy array + A_CSR = CSR.to_CSR(A, h) + B = gpuarray.to_gpu(B_cpu.astype(dtype)) + + # test default operation + C = A_CSR.mm(B) + assert_almost_equal(C.get(), np.dot(A, B_cpu)) + # transpose will be the same for this matrix + C = A_CSR.mm(B_cpu.astype(dtype), transA=CUSPARSE_OPERATION_TRANSPOSE) + assert_almost_equal(C.get(), np.dot(A.T, B_cpu)) + + alpha = 5.0 + C = A_CSR.mm(B, alpha=alpha) + assert_almost_equal(C.get(), alpha*np.dot(A, B_cpu)) + + # repeat with non-zero beta and initialize with previous C + beta = 2.5 + C = A_CSR.mm(B, alpha=alpha, beta=beta, C=C) + assert_almost_equal(C.get(), (alpha+alpha*beta)*np.dot(A, B_cpu)) + finally: + cusparseDestroy(h) + + +def test_CSR_mm2(): + if toolkit_version < (5, 5, 0): + # skip for old CUDA versions + return + + A_cpu = np.asarray([[1, 0, 0], [0, 1, 0], [1, 0, 1], [0, 0, 3]]) + n = 5 + alpha = 2.0 + h = cusparseCreate() + try: + for transB in trans_list[:-1]: + for transA in trans_list: + for dtype in cusparse_dtypes: + m, k = A_cpu.shape + if transA == CUSPARSE_OPERATION_NON_TRANSPOSE: + if transB == CUSPARSE_OPERATION_NON_TRANSPOSE: + B_cpu = np.ones((k, n), dtype=dtype, order='F') + opB = B_cpu + else: + B_cpu = np.ones((n, k), dtype=dtype, order='F') + opB = B_cpu.T + opA = A_cpu + else: + if transB == CUSPARSE_OPERATION_NON_TRANSPOSE: + B_cpu = np.ones((m, n), dtype=dtype, order='F') + opB = B_cpu + else: + # cuSPARSE doesn't implement this case + continue + if transA == CUSPARSE_OPERATION_TRANSPOSE: + opA = A_cpu.T + else: + opA = np.conj(A_cpu).T + + expected_result = alpha * np.dot(opA, opB) + B = gpuarray.to_gpu(B_cpu) + + A_CSR = CSR.to_CSR(A_cpu.astype(dtype), h) + + # test mutliplication without passing in C + beta = 0.0 + C = A_CSR.mm2(B, transA=transA, transB=transB, alpha=alpha, + beta=beta, to_cpu=False) + assert_almost_equal(C.get(), expected_result) + + # repeat, but pass in previous y with beta = 1.0 + beta = 1.0 + C = A_CSR.mm2(B, transA=transA, transB=transB, alpha=alpha, + beta=beta, C=C, to_cpu=True) + assert_almost_equal(C, (1.0+beta)*expected_result) + + finally: + cusparseDestroy(h) + + +def test_CSR_geam(): + if toolkit_version < (5, 0, 0): + # skip for old CUDA versions + return + A_cpu = np.asarray([[1, 0, 0], [0, 1, 0], [1, 0, 1], [0, 0, 3]]) + B_cpu = np.asarray([[0, 1, 0], [0, 0, 1], [0, 0, 0], [0, 0, 0]]) + + h = cusparseCreate() + alpha = 0.3 + beta = 5.0 + + try: + for dtype in cusparse_dtypes: + A_CSR = CSR.to_CSR(A_cpu.astype(dtype), h) + B_CSR = CSR.to_CSR(B_cpu.astype(dtype), h) + C_CSR = A_CSR.geam(B_CSR, alpha=alpha, beta=beta) + + # compute on CPU with numpy and compare + C_cpu = alpha*A_cpu + beta*B_cpu + assert_almost_equal(C_CSR.todense(to_cpu=True), C_cpu) + finally: + cusparseDestroy(h) + + +def test_CSR_gemm(): + if toolkit_version < (5, 0, 0): + # skip for old CUDA versions + return + A_cpu = np.asarray([[1, 0, 0], [0, 1, 0], [1, 0, 1], [0, 0, 3]]) + + h = cusparseCreate() + B_cpu = A_cpu.T + try: + for transA in trans_list: + transB = transA + + if transA == CUSPARSE_OPERATION_NON_TRANSPOSE: + opA = A_cpu + opB = B_cpu + else: + opA = A_cpu.T + opB = B_cpu.T + + for dtype in cusparse_dtypes: + A_CSR = CSR.to_CSR(A_cpu.astype(dtype), h) + B_CSR = CSR.to_CSR(B_cpu.astype(dtype), h) + C_CSR = A_CSR.gemm(B_CSR, transA=transA, transB=transB) + + # compute on CPU with numpy and compare + C_cpu = np.dot(opA, opB) + assert_almost_equal(C_CSR.todense(to_cpu=True), C_cpu) + finally: + cusparseDestroy(h)