From 83c9200a43b1211ce08adcf56ccfbda600916fa0 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Tue, 21 May 2024 17:24:45 -0600 Subject: [PATCH] Sparse - sptrsv: final fixes --- sparse/src/KokkosSparse_sptrsv.hpp | 4 +-- ...okkosSparse_sptrsv_solve_tpl_spec_decl.hpp | 12 +++++---- ...osSparse_sptrsv_symbolic_tpl_spec_decl.hpp | 25 +++++++++---------- sparse/unit_test/Test_Sparse_sptrsv.hpp | 21 ++++++++-------- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/sparse/src/KokkosSparse_sptrsv.hpp b/sparse/src/KokkosSparse_sptrsv.hpp index b987ad6372..38f6a1528e 100644 --- a/sparse/src/KokkosSparse_sptrsv.hpp +++ b/sparse/src/KokkosSparse_sptrsv.hpp @@ -239,8 +239,8 @@ void sptrsv_symbolic(ExecutionSpace &space, KernelHandle *handle, Entries_Internal entries_i = entries; Values_Internal values_i = values; - typedef typename KernelHandle::SPTRSVHandleType sptrsvHandleType; - sptrsvHandleType *sh = handle->get_sptrsv_handle(); + // typedef typename KernelHandle::SPTRSVHandleType sptrsvHandleType; + // sptrsvHandleType *sh = handle->get_sptrsv_handle(); // auto nrows = sh->get_nrows(); std::string label = "KokkosSparse::sptrsv[TPL_CUSPARSE," diff --git a/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_decl.hpp index 24487d1f3e..12d08f75ff 100644 --- a/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_decl.hpp @@ -40,6 +40,8 @@ void sptrsv_solve_cusparse(ExecutionSpace& space, using size_type = typename KernelHandle::size_type; using scalar_type = typename KernelHandle::scalar_t; + const idx_type nrows = sptrsv_handle->get_nrows(); + #if (CUDA_VERSION >= 11030) using memory_space = typename KernelHandle::memory_space; @@ -49,7 +51,7 @@ void sptrsv_solve_cusparse(ExecutionSpace& space, // cusparseDnVecDescr_t vecBDescr, vecXDescr; - const idx_type nrows = sptrsv_handle->get_nrows(); + // const idx_type nrows = sptrsv_handle->get_nrows(); typename KernelHandle::SPTRSVcuSparseHandleType *h = sptrsv_handle->get_cuSparseHandle(); @@ -80,7 +82,7 @@ void sptrsv_solve_cusparse(ExecutionSpace& space, cusparseStatus_t status; - const idx_type nrows = sptrsv_handle->get_nrows(); + // const idx_type nrows = sptrsv_handle->get_nrows(); typename KernelHandle::SPTRSVcuSparseHandleType *h = sptrsv_handle->get_cuSparseHandle(); @@ -108,7 +110,7 @@ void sptrsv_solve_cusparse(ExecutionSpace& space, if (CUSPARSE_STATUS_SUCCESS != status) std::cout << "solve status error name " << (status) << std::endl; - } else if (std::is_same::value) { + } else if constexpr (std::is_same::value) { if (h->pBuffer == nullptr) { std::cout << " pBuffer invalid" << std::endl; } @@ -121,7 +123,7 @@ void sptrsv_solve_cusparse(ExecutionSpace& space, if (CUSPARSE_STATUS_SUCCESS != status) std::cout << "solve status error name " << (status) << std::endl; - } else if (std::is_same >::value) { + } else if constexpr (std::is_same_v >) { cuDoubleComplex cualpha; cualpha.x = 1.0; cualpha.y = 0.0; @@ -132,7 +134,7 @@ void sptrsv_solve_cusparse(ExecutionSpace& space, if (CUSPARSE_STATUS_SUCCESS != status) std::cout << "solve status error name " << (status) << std::endl; - } else if (std::is_same >::value) { + } else if constexpr (std::is_same_v >) { cuComplex cualpha; cualpha.x = 1.0; cualpha.y = 0.0; diff --git a/sparse/tpls/KokkosSparse_sptrsv_symbolic_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_sptrsv_symbolic_tpl_spec_decl.hpp index 9ecc07fdf3..f6f3860850 100644 --- a/sparse/tpls/KokkosSparse_sptrsv_symbolic_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_sptrsv_symbolic_tpl_spec_decl.hpp @@ -34,9 +34,9 @@ void sptrsv_analysis_cusparse(ExecutionSpace &space, KernelHandle *sptrsv_handle ain_nonzero_index_view_type entries, ain_values_scalar_view_type values, const bool trans) { using idx_type = typename KernelHandle::nnz_lno_t; - using size_type = typename KernelHandle::size_type; using scalar_type = typename KernelHandle::scalar_t; - using memory_space = typename KernelHandle::memory_space; + + const idx_type nrows = sptrsv_handle->get_nrows(); #if (CUDA_VERSION >= 11030) using nnz_scalar_view_t = typename KernelHandle::nnz_scalar_view_t; @@ -45,7 +45,6 @@ void sptrsv_analysis_cusparse(ExecutionSpace &space, KernelHandle *sptrsv_handle const bool is_lower = sptrsv_handle->is_lower_tri(); sptrsv_handle->create_cuSPARSE_Handle(trans, is_lower); - const idx_type nrows = sptrsv_handle->get_nrows(); typename KernelHandle::SPTRSVcuSparseHandleType *h = sptrsv_handle->get_cuSparseHandle(); @@ -122,11 +121,11 @@ void sptrsv_analysis_cusparse(ExecutionSpace &space, KernelHandle *sptrsv_handle int nnz = entries.extent_int(0); int pBufferSize; - const scalar_type *vals = values.data(); + // const scalar_type *vals = values.data(); if constexpr (std::is_same::value) { KOKKOS_CUSPARSE_SAFE_CALL(cusparseDcsrsv2_bufferSize(h->handle, h->transpose, nrows, nnz, h->descr, - values.data(), row_map.data(), entries.data(), + const_cast(values.data()), row_map.data(), entries.data(), h->info, &pBufferSize)); // pBuffer returned by cudaMalloc is automatically aligned to 128 bytes. @@ -140,9 +139,9 @@ void sptrsv_analysis_cusparse(ExecutionSpace &space, KernelHandle *sptrsv_handle KOKKOS_CUSPARSE_SAFE_CALL(cusparseDcsrsv2_analysis( h->handle, h->transpose, nrows, nnz, h->descr, values.data(), row_map.data(), entries.data(), h->info, h->policy, h->pBuffer)); - } else if (std::is_same::value) { + } else if constexpr (std::is_same::value) { KOKKOS_CUSPARSE_SAFE_CALL(cusparseScsrsv2_bufferSize(h->handle, h->transpose, nrows, nnz, h->descr, - values.data(), row_map.data(), entries.data(), h->info, + const_cast(values.data()), row_map.data(), entries.data(), h->info, &pBufferSize)); // pBuffer returned by cudaMalloc is automatically aligned to 128 bytes. @@ -155,9 +154,9 @@ void sptrsv_analysis_cusparse(ExecutionSpace &space, KernelHandle *sptrsv_handle KOKKOS_CUSPARSE_SAFE_CALL(cusparseScsrsv2_analysis(h->handle, h->transpose, nrows, nnz, h->descr, values.data(), row_map.data(), entries.data(), h->info, h->policy, h->pBuffer)); - } else if (std::is_same >::value) { + } else if constexpr (std::is_same >::value) { KOKKOS_CUSPARSE_SAFE_CALL(cusparseZcsrsv2_bufferSize(h->handle, h->transpose, nrows, nnz, h->descr, - reinterpret_cast(values.data()), row_map.data(), + reinterpret_cast(const_cast*>(values.data())), row_map.data(), entries.data(), h->info, &pBufferSize)); // pBuffer returned by cudaMalloc is automatically aligned to 128 bytes. @@ -169,11 +168,11 @@ void sptrsv_analysis_cusparse(ExecutionSpace &space, KernelHandle *sptrsv_handle << cudaGetErrorString(my_error) << std::endl; KOKKOS_CUSPARSE_SAFE_CALL(cusparseZcsrsv2_analysis(h->handle, h->transpose, nrows, nnz, - h->descr, reinterpret_cast(values.data()), + h->descr, reinterpret_cast(const_cast*>(values.data())), row_map.data(), entries.data(), h->info, h->policy, h->pBuffer)); - } else if (std::is_same >::value) { + } else if constexpr (std::is_same >::value) { KOKKOS_CUSPARSE_SAFE_CALL(cusparseCcsrsv2_bufferSize(h->handle, h->transpose, nrows, nnz, h->descr, - reinterpret_cast(values.data()), + reinterpret_cast(const_cast*>(values.data())), row_map.data(), entries.data(), h->info, &pBufferSize)); // pBuffer returned by cudaMalloc is automatically aligned to 128 bytes. @@ -186,7 +185,7 @@ void sptrsv_analysis_cusparse(ExecutionSpace &space, KernelHandle *sptrsv_handle KOKKOS_CUSPARSE_SAFE_CALL(cusparseCcsrsv2_analysis( h->handle, h->transpose, nrows, nnz, h->descr, - reinterpret_cast(values.data()), + reinterpret_cast(const_cast*>(values.data())), row_map.data(), entries.data(), h->info, h->policy, h->pBuffer)); } #endif // CUDA_VERSION >= 11030 diff --git a/sparse/unit_test/Test_Sparse_sptrsv.hpp b/sparse/unit_test/Test_Sparse_sptrsv.hpp index 900ef1faca..da9f7fd3da 100644 --- a/sparse/unit_test/Test_Sparse_sptrsv.hpp +++ b/sparse/unit_test/Test_Sparse_sptrsv.hpp @@ -233,9 +233,9 @@ struct SptrsvTest { } #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE - if (std::is_same::value && - std::is_same::value && - std::is_same::value) { + if (std::is_same_v && + std::is_same_v && + std::is_same_v) { { Kokkos::deep_copy(lhs, ZERO); KernelHandle kh; @@ -480,9 +480,9 @@ struct SptrsvTest { } #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE - if (std::is_same::value && - std::is_same::value && - std::is_same::value) { + if (std::is_same_v && + std::is_same_v && + std::is_same_v) { Kokkos::deep_copy(lhs, ZERO); KernelHandle kh; bool is_lower_tri = true; @@ -657,7 +657,7 @@ struct SptrsvTest { // not enough resource to partition bool run_streams_test = true; #ifdef KOKKOS_ENABLE_OPENMP - if (std::is_same::value) { + if (std::is_same_v) { int exec_concurrency = execution_space().concurrency(); if (exec_concurrency < nstreams) { run_streams_test = false; @@ -840,7 +840,6 @@ template ; TestStruct::run_test_sptrsv(); - std::cout << "Done with test_sptrsv" << std::endl; } template ::value && - std::is_same::value) { + if (std::is_same_v && + std::is_same_v && + std::is_same_v) { TestStruct::run_test_sptrsv_streams(2, 1); TestStruct::run_test_sptrsv_streams(2, 2); TestStruct::run_test_sptrsv_streams(2, 3); TestStruct::run_test_sptrsv_streams(2, 4); } #endif - std::cout << "Done with test_sptrsv_streams" << std::endl; } #define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \