diff --git a/sparse/impl/KokkosSparse_crs_to_bsr_impl.hpp b/sparse/impl/KokkosSparse_crs_to_bsr_impl.hpp index 7f1ff2171e..68c06fa5dd 100644 --- a/sparse/impl/KokkosSparse_crs_to_bsr_impl.hpp +++ b/sparse/impl/KokkosSparse_crs_to_bsr_impl.hpp @@ -99,6 +99,7 @@ template Bsr blocked_crs_to_bsr(const Crs &crs, size_t blockSize) { using bsr_value_type = typename Bsr::value_type; using bsr_ordinal_type = typename Bsr::ordinal_type; + using csr_size_type = typename Crs::non_const_size_type; // copy matrix data to host auto hRowMap = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), @@ -119,7 +120,7 @@ Bsr blocked_crs_to_bsr(const Crs &crs, size_t blockSize) { for (bsr_ordinal_type row = 0; row < bsr_ordinal_type(hRowMap.size()) - 1; ++row) { - for (size_t ci = hRowMap(row); ci < hRowMap(row + 1); ++ci) { + for (csr_size_type ci = hRowMap(row); ci < hRowMap(row + 1); ++ci) { bsr_ordinal_type col = hColInds(ci); bsr_value_type val = hVals(ci); diff --git a/sparse/impl/KokkosSparse_sptrsv_symbolic_spec.hpp b/sparse/impl/KokkosSparse_sptrsv_symbolic_spec.hpp index 5b9304356d..26ca206c8e 100644 --- a/sparse/impl/KokkosSparse_sptrsv_symbolic_spec.hpp +++ b/sparse/impl/KokkosSparse_sptrsv_symbolic_spec.hpp @@ -68,27 +68,29 @@ namespace Impl { /// \brief Implementation of KokkosSparse::sptrsv_symbolic template ::value, + KernelHandle, RowMapType, EntriesType, ValuesType>::value, bool eti_spec_avail = sptrsv_symbolic_eti_spec_avail< KernelHandle, RowMapType, EntriesType>::value> struct SPTRSV_SYMBOLIC { static void sptrsv_symbolic(const ExecutionSpace &space, KernelHandle *handle, const RowMapType row_map, - const EntriesType entries); + const EntriesType entries, + const ValuesType values); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of sptrsv_symbolic // Unification layer template -struct SPTRSV_SYMBOLIC +struct SPTRSV_SYMBOLIC { static void sptrsv_symbolic(const ExecutionSpace &space, KernelHandle *handle, const RowMapType row_map, - const EntriesType entries) { + const EntriesType entries, + const ValuesType /*values*/) { auto sptrsv_handle = handle->get_sptrsv_handle(); auto nrows = row_map.extent(0) - 1; sptrsv_handle->new_init_handle(nrows); @@ -129,6 +131,10 @@ struct SPTRSV_SYMBOLIC, \ Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const SCALAR_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ false, true>; #define KOKKOSSPARSE_SPTRSV_SYMBOLIC_ETI_SPEC_INST( \ @@ -147,6 +153,10 @@ struct SPTRSV_SYMBOLIC, \ Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const SCALAR_TYPE *, LAYOUT_TYPE, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ false, true>; #include diff --git a/sparse/src/KokkosSparse_sptrsv.hpp b/sparse/src/KokkosSparse_sptrsv.hpp index 1fef3e9f1b..b987ad6372 100644 --- a/sparse/src/KokkosSparse_sptrsv.hpp +++ b/sparse/src/KokkosSparse_sptrsv.hpp @@ -31,7 +31,7 @@ #include "KokkosSparse_sptrsv_symbolic_spec.hpp" #include "KokkosSparse_sptrsv_solve_spec.hpp" -#include "KokkosSparse_sptrsv_cuSPARSE_impl.hpp" +//#include "KokkosSparse_sptrsv_cuSPARSE_impl.hpp" namespace KokkosSparse { namespace Experimental { @@ -71,45 +71,58 @@ void sptrsv_symbolic(const ExecutionSpace &space, KernelHandle *handle, "sptrsv_symbolic: A entry type must match KernelHandle entry type (aka " "nnz_lno_t, and const doesn't matter)"); - typedef typename KernelHandle::const_size_type c_size_t; - typedef typename KernelHandle::const_nnz_lno_t c_lno_t; - typedef typename KernelHandle::const_nnz_scalar_t c_scalar_t; + using c_size_t = typename KernelHandle::const_size_type; + using c_lno_t = typename KernelHandle::const_nnz_lno_t; + using c_scalar_t = typename KernelHandle::const_nnz_scalar_t; - typedef typename KernelHandle::HandleExecSpace c_exec_t; - typedef typename KernelHandle::HandleTempMemorySpace c_temp_t; - typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; + using c_exec_t = typename KernelHandle::HandleExecSpace; + using c_temp_t = typename KernelHandle::HandleTempMemorySpace; + using c_persist_t = typename KernelHandle::HandlePersistentMemorySpace; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> - const_handle_type; + using const_handle_type = typename KokkosKernels::Experimental::KokkosKernelsHandle< + c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t>; const_handle_type tmp_handle(*handle); - typedef Kokkos::View< + using scalar_nnz_view_t_ = Kokkos::View::array_layout, + typename lno_row_view_t_::device_type>; + + using RowMap_Internal = Kokkos::View< typename lno_row_view_t_::const_value_type *, typename KokkosKernels::Impl::GetUnifiedLayout< lno_row_view_t_>::array_layout, typename lno_row_view_t_::device_type, - Kokkos::MemoryTraits > - RowMap_Internal; + Kokkos::MemoryTraits >; - typedef Kokkos::View< + using Entries_Internal = Kokkos::View< typename lno_nnz_view_t_::const_value_type *, typename KokkosKernels::Impl::GetUnifiedLayout< lno_nnz_view_t_>::array_layout, typename lno_nnz_view_t_::device_type, - Kokkos::MemoryTraits > - Entries_Internal; + Kokkos::MemoryTraits >; + + using Values_Internal = Kokkos::View< + typename scalar_nnz_view_t_::const_value_type *, + typename KokkosKernels::Impl::GetUnifiedLayout< + scalar_nnz_view_t_>::array_layout, + typename scalar_nnz_view_t_::device_type, + Kokkos::MemoryTraits >; #ifdef KK_TRISOLVE_TIMERS Kokkos::Timer timer_sptrsv; #endif - RowMap_Internal rowmap_i = rowmap; + RowMap_Internal rowmap_i = rowmap; Entries_Internal entries_i = entries; + Values_Internal values_i; + // Since the values were not provided + // the TPLs are not going to be used + // hard coding false for tpl_spec_avail KokkosSparse::Impl::SPTRSV_SYMBOLIC< ExecutionSpace, const_handle_type, RowMap_Internal, - Entries_Internal>::sptrsv_symbolic(space, &tmp_handle, rowmap_i, - entries_i); + Entries_Internal, Values_Internal, false>::sptrsv_symbolic(space, &tmp_handle, rowmap_i, + entries_i, values_i); #ifdef KK_TRISOLVE_TIMERS std::cout << " > sptrsv_symbolic time = " << timer_sptrsv.seconds() @@ -181,19 +194,39 @@ void sptrsv_symbolic(ExecutionSpace &space, KernelHandle *handle, "sptrsv_symbolic: A scalar type must match KernelHandle entry " "type (aka nnz_lno_t, and const doesn't matter)"); - typedef typename KernelHandle::const_size_type c_size_t; - typedef typename KernelHandle::const_nnz_lno_t c_lno_t; - typedef typename KernelHandle::const_nnz_scalar_t c_scalar_t; + using c_size_t = typename KernelHandle::const_size_type; + using c_lno_t = typename KernelHandle::const_nnz_lno_t; + using c_scalar_t = typename KernelHandle::const_nnz_scalar_t; - typedef typename KernelHandle::HandleExecSpace c_exec_t; - typedef typename KernelHandle::HandleTempMemorySpace c_temp_t; - typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; + using c_exec_t = typename KernelHandle::HandleExecSpace; + using c_temp_t = typename KernelHandle::HandleTempMemorySpace; + using c_persist_t = typename KernelHandle::HandlePersistentMemorySpace; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> - const_handle_type; + using const_handle_type = typename KokkosKernels::Experimental::KokkosKernelsHandle< + c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t>; const_handle_type tmp_handle(*handle); + using RowMap_Internal = Kokkos::View< + typename lno_row_view_t_::const_value_type *, + typename KokkosKernels::Impl::GetUnifiedLayout< + lno_row_view_t_>::array_layout, + typename lno_row_view_t_::device_type, + Kokkos::MemoryTraits >; + + using Entries_Internal = Kokkos::View< + typename lno_nnz_view_t_::const_value_type *, + typename KokkosKernels::Impl::GetUnifiedLayout< + lno_nnz_view_t_>::array_layout, + typename lno_nnz_view_t_::device_type, + Kokkos::MemoryTraits >; + + using Values_Internal = Kokkos::View< + typename scalar_nnz_view_t_::const_value_type *, + typename KokkosKernels::Impl::GetUnifiedLayout< + scalar_nnz_view_t_>::array_layout, + typename scalar_nnz_view_t_::device_type, + Kokkos::MemoryTraits >; + #ifdef KK_TRISOLVE_TIMERS Kokkos::Timer timer_sptrsv; #endif @@ -202,52 +235,56 @@ void sptrsv_symbolic(ExecutionSpace &space, KernelHandle *handle, KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE) { #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE if constexpr (std::is_same_v) { - using RowMap_Internal = Kokkos::View< - typename lno_row_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - lno_row_view_t_>::array_layout, - typename lno_row_view_t_::device_type, - Kokkos::MemoryTraits >; - - using Entries_Internal = Kokkos::View< - typename lno_nnz_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - lno_nnz_view_t_>::array_layout, - typename lno_nnz_view_t_::device_type, - Kokkos::MemoryTraits >; - - using Values_Internal = Kokkos::View< - typename scalar_nnz_view_t_::const_value_type *, - typename KokkosKernels::Impl::GetUnifiedLayout< - scalar_nnz_view_t_>::array_layout, - typename scalar_nnz_view_t_::device_type, - Kokkos::MemoryTraits >; - RowMap_Internal rowmap_i = rowmap; Entries_Internal entries_i = entries; Values_Internal values_i = values; typedef typename KernelHandle::SPTRSVHandleType sptrsvHandleType; sptrsvHandleType *sh = handle->get_sptrsv_handle(); - auto nrows = sh->get_nrows(); - - KokkosSparse::Impl::sptrsvcuSPARSE_symbolic< - ExecutionSpace, sptrsvHandleType, RowMap_Internal, Entries_Internal, - Values_Internal>(space, sh, nrows, rowmap_i, entries_i, values_i, - false); + // auto nrows = sh->get_nrows(); + + std::string label = "KokkosSparse::sptrsv[TPL_CUSPARSE," + + Kokkos::ArithTraits::name() + "]"; + Kokkos::Profiling::pushRegion(label); + // KokkosSparse::Impl::sptrsvcuSPARSE_symbolic< + // ExecutionSpace, sptrsvHandleType, RowMap_Internal, Entries_Internal, + // Values_Internal>(space, sh, nrows, rowmap_i, entries_i, values_i, + // false); + KokkosSparse::Impl::SPTRSV_SYMBOLIC< + ExecutionSpace, const_handle_type, RowMap_Internal, + Entries_Internal, Values_Internal>::sptrsv_symbolic(space, &tmp_handle, rowmap_i, + entries_i, values_i); + Kokkos::Profiling::popRegion(); } else { - (void)values; - KokkosSparse::Experimental::sptrsv_symbolic(space, handle, rowmap, - entries); + RowMap_Internal rowmap_i = rowmap; + Entries_Internal entries_i = entries; + Values_Internal values_i = values; + + KokkosSparse::Impl::SPTRSV_SYMBOLIC< + ExecutionSpace, const_handle_type, RowMap_Internal, + Entries_Internal, Values_Internal,false>::sptrsv_symbolic(space, &tmp_handle, rowmap_i, + entries_i, values_i); } #else // We better go to the native implementation - (void)values; - KokkosSparse::Experimental::sptrsv_symbolic(space, handle, rowmap, entries); + RowMap_Internal rowmap_i = rowmap; + Entries_Internal entries_i = entries; + Values_Internal values_i = values; + + KokkosSparse::Impl::SPTRSV_SYMBOLIC< + ExecutionSpace, const_handle_type, RowMap_Internal, + Entries_Internal, Values_Internal,false>::sptrsv_symbolic(space, &tmp_handle, rowmap_i, + entries_i, values_i); #endif } else { - (void)values; - KokkosSparse::Experimental::sptrsv_symbolic(space, handle, rowmap, entries); + RowMap_Internal rowmap_i = rowmap; + Entries_Internal entries_i = entries; + Values_Internal values_i = values; + + KokkosSparse::Impl::SPTRSV_SYMBOLIC< + ExecutionSpace, const_handle_type, RowMap_Internal, + Entries_Internal, Values_Internal,false>::sptrsv_symbolic(space, &tmp_handle, rowmap_i, + entries_i, values_i); } #ifdef KK_TRISOLVE_TIMERS std::cout << " + sptrsv_symbolic time = " << timer_sptrsv.seconds() @@ -302,9 +339,9 @@ template , @@ -350,55 +387,49 @@ void sptrsv_solve(ExecutionSpace &space, KernelHandle *handle, typename scalar_nnz_view_t_::device_type>::value, "sptrsv: rowmap and values have different device types."); - typedef typename KernelHandle::const_size_type c_size_t; - typedef typename KernelHandle::const_nnz_lno_t c_lno_t; - typedef typename KernelHandle::const_nnz_scalar_t c_scalar_t; + using c_size_t = typename KernelHandle::const_size_type; + using c_lno_t = typename KernelHandle::const_nnz_lno_t; + using c_scalar_t = typename KernelHandle::const_nnz_scalar_t; - typedef typename KernelHandle::HandleExecSpace c_exec_t; - typedef typename KernelHandle::HandleTempMemorySpace c_temp_t; - typedef typename KernelHandle::HandlePersistentMemorySpace c_persist_t; + using c_exec_t = typename KernelHandle::HandleExecSpace; + using c_temp_t = typename KernelHandle::HandleTempMemorySpace; + using c_persist_t = typename KernelHandle::HandlePersistentMemorySpace; - typedef typename KokkosKernels::Experimental::KokkosKernelsHandle< - c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t> - const_handle_type; + using const_handle_type = typename KokkosKernels::Experimental::KokkosKernelsHandle< + c_size_t, c_lno_t, c_scalar_t, c_exec_t, c_temp_t, c_persist_t>; const_handle_type tmp_handle(*handle); - typedef Kokkos::View< + using RowMap_Internal = Kokkos::View< typename lno_row_view_t_::const_value_type *, typename KokkosKernels::Impl::GetUnifiedLayout< lno_row_view_t_>::array_layout, typename lno_row_view_t_::device_type, - Kokkos::MemoryTraits > - RowMap_Internal; + Kokkos::MemoryTraits >; - typedef Kokkos::View< + using Entries_Internal = Kokkos::View< typename lno_nnz_view_t_::const_value_type *, typename KokkosKernels::Impl::GetUnifiedLayout< lno_nnz_view_t_>::array_layout, typename lno_nnz_view_t_::device_type, - Kokkos::MemoryTraits > - Entries_Internal; + Kokkos::MemoryTraits >; - typedef Kokkos::View< + using Values_Internal = Kokkos::View< typename scalar_nnz_view_t_::const_value_type *, typename KokkosKernels::Impl::GetUnifiedLayout< scalar_nnz_view_t_>::array_layout, typename scalar_nnz_view_t_::device_type, - Kokkos::MemoryTraits > - Values_Internal; + Kokkos::MemoryTraits >; - typedef Kokkos::View< + using BType_Internal = Kokkos::View< typename BType::const_value_type *, typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, typename BType::device_type, - Kokkos::MemoryTraits > - BType_Internal; + Kokkos::MemoryTraits >; - typedef Kokkos::View< + using XType_Internal = Kokkos::View< typename XType::non_const_value_type *, typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, - typename XType::device_type, Kokkos::MemoryTraits > - XType_Internal; + typename XType::device_type, Kokkos::MemoryTraits >; RowMap_Internal rowmap_i = rowmap; Entries_Internal entries_i = entries; @@ -412,33 +443,38 @@ void sptrsv_solve(ExecutionSpace &space, KernelHandle *handle, KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE) { #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE if constexpr (std::is_same_v) { - typedef typename KernelHandle::SPTRSVHandleType sptrsvHandleType; - sptrsvHandleType *sh = handle->get_sptrsv_handle(); - auto nrows = sh->get_nrows(); - - KokkosSparse::Impl::sptrsvcuSPARSE_solve< - ExecutionSpace, sptrsvHandleType, RowMap_Internal, Entries_Internal, - Values_Internal, BType_Internal, XType_Internal>( - space, sh, nrows, rowmap_i, entries_i, values_i, b_i, x_i, false); + // typedef typename KernelHandle::SPTRSVHandleType sptrsvHandleType; + // sptrsvHandleType *sh = handle->get_sptrsv_handle(); + // auto nrows = sh->get_nrows(); + + // KokkosSparse::Impl::sptrsvcuSPARSE_solve< + // ExecutionSpace, sptrsvHandleType, RowMap_Internal, Entries_Internal, + // Values_Internal, BType_Internal, XType_Internal>( + // space, sh, nrows, rowmap_i, entries_i, values_i, b_i, x_i, false); + KokkosSparse::Impl::SPTRSV_SOLVE< + ExecutionSpace, const_handle_type, RowMap_Internal, Entries_Internal, + Values_Internal, BType_Internal, + XType_Internal>::sptrsv_solve(space, &tmp_handle, rowmap_i, entries_i, + values_i, b_i, x_i); } else { KokkosSparse::Impl::SPTRSV_SOLVE< ExecutionSpace, const_handle_type, RowMap_Internal, Entries_Internal, Values_Internal, BType_Internal, - XType_Internal>::sptrsv_solve(space, &tmp_handle, rowmap_i, entries_i, + XType_Internal,false>::sptrsv_solve(space, &tmp_handle, rowmap_i, entries_i, values_i, b_i, x_i); } #else KokkosSparse::Impl::SPTRSV_SOLVE< ExecutionSpace, const_handle_type, RowMap_Internal, Entries_Internal, Values_Internal, BType_Internal, - XType_Internal>::sptrsv_solve(space, &tmp_handle, rowmap_i, entries_i, + XType_Internal,false>::sptrsv_solve(space, &tmp_handle, rowmap_i, entries_i, values_i, b_i, x_i); #endif } else { KokkosSparse::Impl::SPTRSV_SOLVE< ExecutionSpace, const_handle_type, RowMap_Internal, Entries_Internal, Values_Internal, BType_Internal, - XType_Internal>::sptrsv_solve(space, &tmp_handle, rowmap_i, entries_i, + XType_Internal, false>::sptrsv_solve(space, &tmp_handle, rowmap_i, entries_i, values_i, b_i, x_i); } @@ -783,16 +819,22 @@ void sptrsv_solve_streams(const std::vector &execspace_v, KokkosSparse::Experimental::SPTRSVAlgorithm::SPTRSV_CUSPARSE) { #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE // NOTE: assume all streams use the same SPTRSV_CUSPARSE algo. - KokkosSparse::Impl::sptrsvcuSPARSE_solve_streams< + // KokkosSparse::Impl::sptrsvcuSPARSE_solve_streams< + // ExecutionSpace, const_handle_type, RowMap_Internal, Entries_Internal, + // Values_Internal, BType_Internal, XType_Internal>( + // execspace_v, handle_i_v, rowmap_i_v, entries_i_v, values_i_v, b_i_v, + // x_i_v, false); + KokkosSparse::Impl::SPTRSV_SOLVE< ExecutionSpace, const_handle_type, RowMap_Internal, Entries_Internal, - Values_Internal, BType_Internal, XType_Internal>( - execspace_v, handle_i_v, rowmap_i_v, entries_i_v, values_i_v, b_i_v, - x_i_v, false); + Values_Internal, BType_Internal, + XType_Internal>::sptrsv_solve_streams(execspace_v, handle_i_v, + rowmap_i_v, entries_i_v, + values_i_v, b_i_v, x_i_v); #else KokkosSparse::Impl::SPTRSV_SOLVE< ExecutionSpace, const_handle_type, RowMap_Internal, Entries_Internal, Values_Internal, BType_Internal, - XType_Internal>::sptrsv_solve_streams(execspace_v, handle_i_v, + XType_Internal, false>::sptrsv_solve_streams(execspace_v, handle_i_v, rowmap_i_v, entries_i_v, values_i_v, b_i_v, x_i_v); #endif @@ -800,7 +842,7 @@ void sptrsv_solve_streams(const std::vector &execspace_v, KokkosSparse::Impl::SPTRSV_SOLVE< ExecutionSpace, const_handle_type, RowMap_Internal, Entries_Internal, Values_Internal, BType_Internal, - XType_Internal>::sptrsv_solve_streams(execspace_v, handle_i_v, + XType_Internal, false>::sptrsv_solve_streams(execspace_v, handle_i_v, rowmap_i_v, entries_i_v, values_i_v, b_i_v, x_i_v); } diff --git a/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_avail.hpp b/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_avail.hpp index 1a22146d01..c5df6f8a8f 100644 --- a/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_avail.hpp +++ b/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_avail.hpp @@ -26,6 +26,49 @@ struct sptrsv_solve_tpl_spec_avail { enum : bool { value = false }; }; +// cuSPARSE +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE + +#define KOKKOSSPARSE_SPTRSV_SOLVE_TPL_SPEC_AVAIL_CUSPARSE(SCALAR, \ + LAYOUT, MEMSPACE) \ + template <> \ + struct sptrsv_solve_tpl_spec_avail , \ + Kokkos::View< \ + const int *, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const int *, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const SCALAR *, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +KOKKOSSPARSE_SPTRSV_SOLVE_TPL_SPEC_AVAIL_CUSPARSE(float, Kokkos::LayoutLeft, \ + Kokkos::CudaSpace) +KOKKOSSPARSE_SPTRSV_SOLVE_TPL_SPEC_AVAIL_CUSPARSE(double, Kokkos::LayoutLeft, \ + Kokkos::CudaSpace) +KOKKOSSPARSE_SPTRSV_SOLVE_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, Kokkos::LayoutLeft, \ + Kokkos::CudaSpace) +KOKKOSSPARSE_SPTRSV_SOLVE_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, Kokkos::LayoutLeft, \ + Kokkos::CudaSpace) + +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSPARSE + + } // namespace Impl } // namespace KokkosSparse diff --git a/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_decl.hpp index d626bc54e9..5845021c96 100644 --- a/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_sptrsv_solve_tpl_spec_decl.hpp @@ -17,8 +17,371 @@ #ifndef KOKKOSPARSE_SPTRSV_SOLVE_TPL_SPEC_DECL_HPP_ #define KOKKOSPARSE_SPTRSV_SOLVE_TPL_SPEC_DECL_HPP_ + +// cuSPARSE +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE +#include "cusparse.h" +#include "KokkosSparse_Utils_cusparse.hpp" +#include "KokkosKernels_Handle.hpp" + namespace KokkosSparse { -namespace Impl {} +namespace Impl { + +template +void sptrsv_solve_cusparse(ExecutionSpace& space, + KernelHandle *sptrsv_handle, + RowMapType row_map, + EntriesType entries, + ValuesType values, + BType rhs, XType lhs, + const bool /*trans*/) { + using idx_type = typename KernelHandle::nnz_lno_t; + using size_type = typename KernelHandle::size_type; + using scalar_type = typename KernelHandle::scalar_t; + +#if (CUDA_VERSION >= 11030) + using memory_space = typename KernelHandle::memory_space; + + (void)row_map; + (void)entries; + (void)values; + + // cusparseDnVecDescr_t vecBDescr, vecXDescr; + + const idx_type nrows = sptrsv_handle->get_nrows(); + typename KernelHandle::SPTRSVcuSparseHandleType *h = + sptrsv_handle->get_cuSparseHandle(); + + KOKKOS_CUSPARSE_SAFE_CALL( + cusparseSetStream(h->handle, space.cuda_stream())); + + const scalar_type alpha = scalar_type(1.0); + + const cudaDataType cudaValueType = cuda_data_type_from(); + + // Create dense vector B (RHS) + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&(h->vecBDescr), static_cast(nrows), + (void *)rhs.data(), cudaValueType)); + + // Create dense vector X (LHS) + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&(h->vecXDescr), static_cast(nrows), + (void *)lhs.data(), cudaValueType)); + + // Solve + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_solve(h->handle, h->transpose, &alpha, h->matDescr, h->vecBDescr, + h->vecXDescr, cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT, h->spsvDescr)); + + // Destroy dense vector descriptors + KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecBDescr)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecXDescr)); + +#else // CUDA_VERSION < 11030 + + cusparseStatus_t status; + + typename KernelHandle::SPTRSVcuSparseHandleType *h = + sptrsv_handle->get_cuSparseHandle(); + + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream())); + + int nnz = entries.extent_int(0); + const int *rm = !std::is_same::value + ? sptrsv_handle->get_int_rowmap_ptr() + : (const int *)row_map.data(); + const int *ent = (const int *)entries.data(); + const scalar_type *vals = values.data(); + const scalar_type *bv = rhs.data(); + scalar_type *xv = lhs.data(); + + if constexpr (std::is_same_v) { + if (h->pBuffer == nullptr) { + std::cout << " pBuffer invalid" << std::endl; + } + + const scalar_type alpha = Kokkos::ArithTraits::one(); + status = cusparseDcsrsv2_solve( + h->handle, h->transpose, nrows, nnz, &alpha, h->descr, (double *)vals, + (int *)rm, (int *)ent, h->info, (double *)bv, (double *)xv, h->policy, + h->pBuffer); + + if (CUSPARSE_STATUS_SUCCESS != status) + std::cout << "solve status error name " << (status) << std::endl; + } else if (std::is_same::value) { + if (h->pBuffer == nullptr) { + std::cout << " pBuffer invalid" << std::endl; + } + + const scalar_type alpha = Kokkos::ArithTraits::one(); + status = cusparseScsrsv2_solve(h->handle, h->transpose, nrows, nnz, + &alpha, h->descr, (float *)vals, (int *)rm, + (int *)ent, h->info, (float *)bv, + (float *)xv, h->policy, h->pBuffer); + + if (CUSPARSE_STATUS_SUCCESS != status) + std::cout << "solve status error name " << (status) << std::endl; + } else if (std::is_same >::value) { + cuDoubleComplex cualpha; + cualpha.x = 1.0; + cualpha.y = 0.0; + status = cusparseZcsrsv2_solve( + h->handle, h->transpose, nrows, nnz, &cualpha, h->descr, + (cuDoubleComplex *)vals, (int *)rm, (int *)ent, h->info, + (cuDoubleComplex *)bv, (cuDoubleComplex *)xv, h->policy, h->pBuffer); + + if (CUSPARSE_STATUS_SUCCESS != status) + std::cout << "solve status error name " << (status) << std::endl; + } else if (std::is_same >::value) { + cuComplex cualpha; + cualpha.x = 1.0; + cualpha.y = 0.0; + status = cusparseCcsrsv2_solve( + h->handle, h->transpose, nrows, nnz, &cualpha, h->descr, + (cuComplex *)vals, (int *)rm, (int *)ent, h->info, (cuComplex *)bv, + (cuComplex *)xv, h->policy, h->pBuffer); + + if (CUSPARSE_STATUS_SUCCESS != status) + std::cout << "solve status error name " << (status) << std::endl; + } else { + throw std::runtime_error("CUSPARSE wrapper error: unsupported type.\n"); + } +#endif +} + +template +void sptrsv_solve_streams_cusparse( + const std::vector &execspace_v, + std::vector &handle_v, + const std::vector &row_map_v, + const std::vector &entries_v, + const std::vector &values_v, + const std::vector &rhs_v, + std::vector &lhs_v, bool /*trans*/ +) { + using idx_type = typename KernelHandle::nnz_lno_t; + using size_type = typename KernelHandle::size_type; + using scalar_type = typename KernelHandle::nnz_scalar_t; + using memory_space = typename KernelHandle::HandlePersistentMemorySpace; + using sptrsvHandleType = typename KernelHandle::SPTRSVHandleType; + using sptrsvCuSparseHandleType = + typename sptrsvHandleType::SPTRSVcuSparseHandleType; + + int nstreams = execspace_v.size(); +#if (CUDA_VERSION >= 11030) + (void)row_map_v; + (void)entries_v; + (void)values_v; + + const bool is_cuda_space = + std::is_same::value || + std::is_same::value || + std::is_same::value; + + const bool is_idx_type_supported = std::is_same::value || + std::is_same::value; + + if constexpr (!is_cuda_space) { + throw std::runtime_error( + "KokkosKernels sptrsvcuSPARSE_solve_streams: MEMORY IS NOT ALLOCATED " + "IN GPU DEVICE for CUSPARSE\n"); + } else if constexpr (!is_idx_type_supported) { + throw std::runtime_error( + "CUSPARSE requires local ordinals to be integer (32 bits or 64 " + "bits).\n"); + } else { + const scalar_type alpha = scalar_type(1.0); + + cudaDataType cudaValueType = cuda_data_type_from(); + + std::vector h_v(nstreams); + + for (int i = 0; i < nstreams; i++) { + sptrsvHandleType *sptrsv_handle = handle_v[i].get_sptrsv_handle(); + h_v[i] = sptrsv_handle->get_cuSparseHandle(); + + // Bind cuspare handle to a stream + KOKKOS_CUSPARSE_SAFE_CALL( + cusparseSetStream(h_v[i]->handle, execspace_v[i].cuda_stream())); + + int64_t nrows = static_cast(sptrsv_handle->get_nrows()); + + // Create dense vector B (RHS) + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec( + &(h_v[i]->vecBDescr), nrows, (void *)rhs_v[i].data(), cudaValueType)); + + // Create dense vector X (LHS) + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec( + &(h_v[i]->vecXDescr), nrows, (void *)lhs_v[i].data(), cudaValueType)); + } + + // Solve + for (int i = 0; i < nstreams; i++) { + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_solve( + h_v[i]->handle, h_v[i]->transpose, &alpha, h_v[i]->matDescr, + h_v[i]->vecBDescr, h_v[i]->vecXDescr, cudaValueType, + CUSPARSE_SPSV_ALG_DEFAULT, h_v[i]->spsvDescr)); + } + + // Destroy dense vector descriptors + for (int i = 0; i < nstreams; i++) { + KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h_v[i]->vecBDescr)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h_v[i]->vecXDescr)); + } + } +#else // CUDA_VERSION < 11030 + const bool is_cuda_space = + std::is_same::value || + std::is_same::value || + std::is_same::value; + + if constexpr (!is_cuda_space) { + throw std::runtime_error( + "KokkosKernels sptrsvcuSPARSE_solve_streams: MEMORY IS NOT ALLOCATED " + "IN GPU DEVICE for CUSPARSE\n"); + } else if constexpr (!std::is_same::value) { + throw std::runtime_error( + "CUSPARSE requires local ordinals to be integer.\n"); + } else { + const scalar_type alpha = scalar_type(1.0); + std::vector sptrsv_handle_v(nstreams); + std::vector h_v(nstreams); + std::vector rm_v(nstreams); + std::vector ent_v(nstreams); + std::vector vals_v(nstreams); + std::vector bv_v(nstreams); + std::vector xv_v(nstreams); + + for (int i = 0; i < nstreams; i++) { + sptrsv_handle_v[i] = handle_v[i].get_sptrsv_handle(); + h_v[i] = sptrsv_handle_v[i]->get_cuSparseHandle(); + + // Bind cuspare handle to a stream + KOKKOS_CUSPARSE_SAFE_CALL( + cusparseSetStream(h_v[i]->handle, execspace_v[i].cuda_stream())); + + if (h_v[i]->pBuffer == nullptr) { + std::cout << " pBuffer invalid on stream " << i << std::endl; + } + rm_v[i] = !std::is_same::value + ? sptrsv_handle_v[i]->get_int_rowmap_ptr() + : reinterpret_cast(row_map_v[i].data()); + ent_v[i] = reinterpret_cast(entries_v[i].data()); + vals_v[i] = values_v[i].data(); + bv_v[i] = rhs_v[i].data(); + xv_v[i] = lhs_v[i].data(); + } + + for (int i = 0; i < nstreams; i++) { + int nnz = entries_v[i].extent_int(0); + int nrows = static_cast(sptrsv_handle_v[i]->get_nrows()); + if (std::is_same::value) { + KOKKOS_CUSPARSE_SAFE_CALL(cusparseDcsrsv2_solve( + h_v[i]->handle, h_v[i]->transpose, nrows, nnz, + reinterpret_cast(&alpha), h_v[i]->descr, + reinterpret_cast(vals_v[i]), + reinterpret_cast(rm_v[i]), + reinterpret_cast(ent_v[i]), h_v[i]->info, + reinterpret_cast(bv_v[i]), + reinterpret_cast(xv_v[i]), h_v[i]->policy, + h_v[i]->pBuffer)); + } else if (std::is_same::value) { + KOKKOS_CUSPARSE_SAFE_CALL(cusparseScsrsv2_solve( + h_v[i]->handle, h_v[i]->transpose, nrows, nnz, + reinterpret_cast(&alpha), h_v[i]->descr, + reinterpret_cast(vals_v[i]), + reinterpret_cast(rm_v[i]), + reinterpret_cast(ent_v[i]), h_v[i]->info, + reinterpret_cast(bv_v[i]), + reinterpret_cast(xv_v[i]), h_v[i]->policy, + h_v[i]->pBuffer)); + } else if (std::is_same >::value) { + KOKKOS_CUSPARSE_SAFE_CALL(cusparseZcsrsv2_solve( + h_v[i]->handle, h_v[i]->transpose, nrows, nnz, + reinterpret_cast(&alpha), h_v[i]->descr, + reinterpret_cast(vals_v[i]), + reinterpret_cast(rm_v[i]), + reinterpret_cast(ent_v[i]), h_v[i]->info, + reinterpret_cast(bv_v[i]), + reinterpret_cast(xv_v[i]), h_v[i]->policy, + h_v[i]->pBuffer)); + } else if (std::is_same >::value) { + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCcsrsv2_solve( + h_v[i]->handle, h_v[i]->transpose, nrows, nnz, + reinterpret_cast(&alpha), h_v[i]->descr, + reinterpret_cast(vals_v[i]), + reinterpret_cast(rm_v[i]), + reinterpret_cast(ent_v[i]), h_v[i]->info, + reinterpret_cast(bv_v[i]), + reinterpret_cast(xv_v[i]), h_v[i]->policy, + h_v[i]->pBuffer)); + } else { + throw std::runtime_error("CUSPARSE wrapper error: unsupported type.\n"); + } + } + } +#endif +} + +#define KOKKOSSPARSE_SPTRSV_SOLVE_CUSPARSE(SCALAR, LAYOUT, MEMSPACE) \ + template<> \ + struct SPTRSV_SOLVE, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, true> { \ + \ + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle; \ + using RowMapType = Kokkos::View, Kokkos::MemoryTraits >; \ + using EntriesType = Kokkos::View, Kokkos::MemoryTraits >; \ + using ValuesType = Kokkos::View, Kokkos::MemoryTraits >; \ + using BType = Kokkos::View, Kokkos::MemoryTraits >; \ + using XType = Kokkos::View, Kokkos::MemoryTraits >; \ + \ + static void sptrsv_solve(const Kokkos::Cuda& space, \ + KernelHandle *handle, \ + const RowMapType row_map, \ + const EntriesType entries, \ + const ValuesType values, \ + const BType b, XType x) { \ + bool trans = false; \ + typename KernelHandle::SPTRSVHandleType *sptrsv_handle = handle->get_sptrsv_handle(); \ + std::string label = "KokkosSparse::sptrsv_solve[TPL_CUSPARSE," \ + + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + sptrsv_solve_cusparse(space, sptrsv_handle, row_map, entries, \ + values, b, x, trans); \ + Kokkos::Profiling::popRegion(); \ + } \ + \ + static void sptrsv_solve_streams(const std::vector& space_v, std::vector &handle_v, const std::vector &row_map_v, const std::vector &entries_v, const std::vector &values_v, const std::vector &b_v, std::vector &x_v) { \ + \ + std::string label = "KokkosSparse::sptrsv_solve_streams[TPL_CUSPARSE," \ + + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + sptrsv_solve_streams_cusparse(space_v, handle_v, row_map_v, \ + entries_v, values_v, b_v, x_v, \ + false); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSSPARSE_SPTRSV_SOLVE_CUSPARSE(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPTRSV_SOLVE_CUSPARSE(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPTRSV_SOLVE_CUSPARSE(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPTRSV_SOLVE_CUSPARSE(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) + +KOKKOSSPARSE_SPTRSV_SOLVE_CUSPARSE(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPTRSV_SOLVE_CUSPARSE(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPTRSV_SOLVE_CUSPARSE(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPTRSV_SOLVE_CUSPARSE(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) + +} // namespace Impl } // namespace KokkosSparse +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSPARSE #endif diff --git a/sparse/tpls/KokkosSparse_sptrsv_symbolic_tpl_spec_avail.hpp b/sparse/tpls/KokkosSparse_sptrsv_symbolic_tpl_spec_avail.hpp index c38de6dc31..fb7c7855be 100644 --- a/sparse/tpls/KokkosSparse_sptrsv_symbolic_tpl_spec_avail.hpp +++ b/sparse/tpls/KokkosSparse_sptrsv_symbolic_tpl_spec_avail.hpp @@ -20,11 +20,47 @@ namespace KokkosSparse { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct sptrsv_symbolic_tpl_spec_avail { enum : bool { value = false }; }; +// cuSPARSE +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE + +#define KOKKOSSPARSE_SPTRSV_SYMBOLIC_TPL_SPEC_AVAIL_CUSPARSE(SCALAR, \ + LAYOUT, MEMSPACE) \ + template <> \ + struct sptrsv_symbolic_tpl_spec_avail < \ + KokkosKernels::Experimental::KokkosKernelsHandle< \ + const int, const int, const SCALAR, \ + Kokkos::Cuda, MEMSPACE, MEMSPACE>, \ + Kokkos::View< \ + const int *, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const int *, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View< \ + const SCALAR *, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits > > { \ + enum : bool { value = true }; \ + }; + +KOKKOSSPARSE_SPTRSV_SYMBOLIC_TPL_SPEC_AVAIL_CUSPARSE(float, Kokkos::LayoutLeft, \ + Kokkos::CudaSpace) +KOKKOSSPARSE_SPTRSV_SYMBOLIC_TPL_SPEC_AVAIL_CUSPARSE(double, Kokkos::LayoutLeft, \ + Kokkos::CudaSpace) +KOKKOSSPARSE_SPTRSV_SYMBOLIC_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, Kokkos::LayoutLeft, \ + Kokkos::CudaSpace) +KOKKOSSPARSE_SPTRSV_SYMBOLIC_TPL_SPEC_AVAIL_CUSPARSE(Kokkos::complex, Kokkos::LayoutLeft, \ + Kokkos::CudaSpace) + +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSPARSE + } // namespace Impl } // namespace KokkosSparse diff --git a/sparse/tpls/KokkosSparse_sptrsv_symbolic_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_sptrsv_symbolic_tpl_spec_decl.hpp index 096704ad7b..9ecc07fdf3 100644 --- a/sparse/tpls/KokkosSparse_sptrsv_symbolic_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_sptrsv_symbolic_tpl_spec_decl.hpp @@ -17,8 +17,225 @@ #ifndef KOKKOSPARSE_SPTRSV_SYMBOLIC_TPL_SPEC_DECL_HPP_ #define KOKKOSPARSE_SPTRSV_SYMBOLIC_TPL_SPEC_DECL_HPP_ + +// cuSPARSE +#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE +#include "cusparse.h" +#include "KokkosSparse_Utils_cusparse.hpp" +#include "KokkosKernels_Handle.hpp" + namespace KokkosSparse { -namespace Impl {} +namespace Impl { + +template +void sptrsv_analysis_cusparse(ExecutionSpace &space, KernelHandle *sptrsv_handle, + ain_row_index_view_type row_map, + ain_nonzero_index_view_type entries, + ain_values_scalar_view_type values, const bool trans) { + using idx_type = typename KernelHandle::nnz_lno_t; + using size_type = typename KernelHandle::size_type; + using scalar_type = typename KernelHandle::scalar_t; + using memory_space = typename KernelHandle::memory_space; + +#if (CUDA_VERSION >= 11030) + using nnz_scalar_view_t = typename KernelHandle::nnz_scalar_view_t; + using KAT = Kokkos::ArithTraits; + + const bool is_lower = sptrsv_handle->is_lower_tri(); + sptrsv_handle->create_cuSPARSE_Handle(trans, is_lower); + + const idx_type nrows = sptrsv_handle->get_nrows(); + typename KernelHandle::SPTRSVcuSparseHandleType *h = + sptrsv_handle->get_cuSparseHandle(); + + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream())); + + int64_t nnz = static_cast(entries.extent(0)); + size_t pBufferSize; + const scalar_type alpha = KAT::one(); + + cusparseIndexType_t cudaCsrRowMapType = + cusparse_index_type_t_from(); + cusparseIndexType_t cudaCsrColIndType = + cusparse_index_type_t_from(); + cudaDataType cudaValueType = cuda_data_type_from(); + + // Create sparse matrix in CSR format + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( + &(h->matDescr), static_cast(nrows), + static_cast(nrows), nnz, (void *)row_map.data(), (void *)entries.data(), + (void *)values.data(), cudaCsrRowMapType, cudaCsrColIndType, + CUSPARSE_INDEX_BASE_ZERO, cudaValueType)); + + // Create dummy dense vector B (RHS) + nnz_scalar_view_t b_dummy(Kokkos::view_alloc(space, "b_dummy"), nrows); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&(h->vecBDescr_dummy), static_cast(nrows), + b_dummy.data(), cudaValueType)); + + // Create dummy dense vector X (LHS) + nnz_scalar_view_t x_dummy(Kokkos::view_alloc(space, "x_dummy"), nrows); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&(h->vecXDescr_dummy), static_cast(nrows), + x_dummy.data(), cudaValueType)); + + // Specify Lower|Upper fill mode + if (is_lower) { + cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_LOWER; + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMatSetAttribute(h->matDescr, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode))); + } else { + cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_UPPER; + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMatSetAttribute(h->matDescr, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode))); + } + + // Specify Unit|Non-Unit diagonal type. + cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT; + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMatSetAttribute(h->matDescr, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype))); + + // Allocate an external buffer for analysis + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_bufferSize(h->handle, h->transpose, &alpha, h->matDescr, h->vecBDescr_dummy, + h->vecXDescr_dummy, cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT, + h->spsvDescr, &pBufferSize)); + + // pBuffer returned by cudaMalloc is automatically aligned to 128 bytes. + KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&(h->pBuffer), pBufferSize)); + + // Run analysis + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_analysis(h->handle, h->transpose, &alpha, h->matDescr, h->vecBDescr_dummy, + h->vecXDescr_dummy, cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT, + h->spsvDescr, h->pBuffer)); + + // Destroy dummy dense vector descriptors + KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecBDescr_dummy)); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecXDescr_dummy)); +#else // CUDA_VERSION < 11030 + + bool is_lower = sptrsv_handle->is_lower_tri(); + sptrsv_handle->create_cuSPARSE_Handle(trans, is_lower); + + typename KernelHandle::SPTRSVcuSparseHandleType *h = + sptrsv_handle->get_cuSparseHandle(); + + KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream())); + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsrsv2Info(&(h->info))); + + // query how much memory used in csrsv2, and allocate the buffer + int nnz = entries.extent_int(0); + int pBufferSize; + + const scalar_type *vals = values.data(); + + if constexpr (std::is_same::value) { + KOKKOS_CUSPARSE_SAFE_CALL(cusparseDcsrsv2_bufferSize(h->handle, h->transpose, nrows, nnz, h->descr, + values.data(), row_map.data(), entries.data(), + h->info, &pBufferSize)); + + // pBuffer returned by cudaMalloc is automatically aligned to 128 bytes. + cudaError_t my_error; + my_error = cudaMalloc((void **)&(h->pBuffer), pBufferSize); + + if (cudaSuccess != my_error) + std::cout << "cudmalloc pBuffer error_t error name " + << cudaGetErrorString(my_error) << std::endl; + + KOKKOS_CUSPARSE_SAFE_CALL(cusparseDcsrsv2_analysis( + h->handle, h->transpose, nrows, nnz, h->descr, values.data(), + row_map.data(), entries.data(), h->info, h->policy, h->pBuffer)); + } else if (std::is_same::value) { + KOKKOS_CUSPARSE_SAFE_CALL(cusparseScsrsv2_bufferSize(h->handle, h->transpose, nrows, nnz, h->descr, + values.data(), row_map.data(), entries.data(), h->info, + &pBufferSize)); + + // pBuffer returned by cudaMalloc is automatically aligned to 128 bytes. + cudaError_t my_error; + my_error = cudaMalloc((void **)&(h->pBuffer), pBufferSize); + + if (cudaSuccess != my_error) + std::cout << "cudmalloc pBuffer error_t error name " + << cudaGetErrorString(my_error) << std::endl; + + KOKKOS_CUSPARSE_SAFE_CALL(cusparseScsrsv2_analysis(h->handle, h->transpose, nrows, nnz, h->descr, values.data(), + row_map.data(), entries.data(), h->info, h->policy, h->pBuffer)); + } else if (std::is_same >::value) { + KOKKOS_CUSPARSE_SAFE_CALL(cusparseZcsrsv2_bufferSize(h->handle, h->transpose, nrows, nnz, h->descr, + reinterpret_cast(values.data()), row_map.data(), + entries.data(), h->info, &pBufferSize)); + + // pBuffer returned by cudaMalloc is automatically aligned to 128 bytes. + cudaError_t my_error; + my_error = cudaMalloc((void **)&(h->pBuffer), pBufferSize); + + if (cudaSuccess != my_error) + std::cout << "cudmalloc pBuffer error_t error name " + << cudaGetErrorString(my_error) << std::endl; + + KOKKOS_CUSPARSE_SAFE_CALL(cusparseZcsrsv2_analysis(h->handle, h->transpose, nrows, nnz, + h->descr, reinterpret_cast(values.data()), + row_map.data(), entries.data(), h->info, h->policy, h->pBuffer)); + } else if (std::is_same >::value) { + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCcsrsv2_bufferSize(h->handle, h->transpose, nrows, nnz, h->descr, + reinterpret_cast(values.data()), + row_map.data(), entries.data(), h->info, &pBufferSize)); + + // pBuffer returned by cudaMalloc is automatically aligned to 128 bytes. + cudaError_t my_error; + my_error = cudaMalloc((void **)&(h->pBuffer), pBufferSize); + + if (cudaSuccess != my_error) + std::cout << "cudmalloc pBuffer error_t error name " + << cudaGetErrorString(my_error) << std::endl; + + KOKKOS_CUSPARSE_SAFE_CALL(cusparseCcsrsv2_analysis( + h->handle, h->transpose, nrows, nnz, h->descr, + reinterpret_cast(values.data()), + row_map.data(), entries.data(), h->info, h->policy, h->pBuffer)); + } +#endif // CUDA_VERSION >= 11030 +} // sptrsv_cusparse() + + +#define KOKKOSSPARSE_SPTRSV_SYMBOLIC_CUSPARSE(SCALAR, LAYOUT, MEMSPACE) \ + template<> \ + struct SPTRSV_SYMBOLIC, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >, \ + Kokkos::View, Kokkos::MemoryTraits >,true, true> { \ + \ + using KernelHandle = KokkosKernels::Experimental::KokkosKernelsHandle; \ + using RowMapType = Kokkos::View, Kokkos::MemoryTraits >; \ + using EntriesType = Kokkos::View, Kokkos::MemoryTraits >; \ + using ValuesType = Kokkos::View, Kokkos::MemoryTraits >; \ + \ + static void sptrsv_symbolic(const Kokkos::Cuda& space, \ + KernelHandle *handle, \ + const RowMapType row_map, \ + const EntriesType entries, \ + const ValuesType values) { \ + bool trans = false; \ + typename KernelHandle::SPTRSVHandleType *sptrsv_handle = handle->get_sptrsv_handle(); \ + std::string label = "KokkosSparse::sptrsv_symbolic[TPL_CUSPARSE," \ + + Kokkos::ArithTraits::name() + "]"; \ + Kokkos::Profiling::pushRegion(label); \ + sptrsv_analysis_cusparse(space, sptrsv_handle, row_map, entries, \ + values, trans); \ + Kokkos::Profiling::popRegion(); \ + } \ + }; + +KOKKOSSPARSE_SPTRSV_SYMBOLIC_CUSPARSE(float, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPTRSV_SYMBOLIC_CUSPARSE(double, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPTRSV_SYMBOLIC_CUSPARSE(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) +KOKKOSSPARSE_SPTRSV_SYMBOLIC_CUSPARSE(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaSpace) + +KOKKOSSPARSE_SPTRSV_SYMBOLIC_CUSPARSE(float, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPTRSV_SYMBOLIC_CUSPARSE(double, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPTRSV_SYMBOLIC_CUSPARSE(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) +KOKKOSSPARSE_SPTRSV_SYMBOLIC_CUSPARSE(Kokkos::complex, Kokkos::LayoutLeft, Kokkos::CudaUVMSpace) + +} // namespace Impl } // namespace KokkosSparse +#endif // KOKKOSKERNELS_ENABLE_TPL_CUSPARSE + + #endif diff --git a/sparse/unit_test/Test_Sparse_sptrsv.hpp b/sparse/unit_test/Test_Sparse_sptrsv.hpp index b8b35bc422..900ef1faca 100644 --- a/sparse/unit_test/Test_Sparse_sptrsv.hpp +++ b/sparse/unit_test/Test_Sparse_sptrsv.hpp @@ -113,8 +113,8 @@ struct SptrsvTest { }; static void run_test_sptrsv() { - scalar_t ZERO = scalar_t(0); - scalar_t ONE = scalar_t(1); + const scalar_t ZERO = Kokkos::ArithTraits::zero(); + const scalar_t ONE = Kokkos::ArithTraits::one(); const size_type nrows = 5; const size_type nnz = 10; @@ -236,24 +236,47 @@ struct SptrsvTest { if (std::is_same::value && std::is_same::value && std::is_same::value) { - Kokkos::deep_copy(lhs, ZERO); - KernelHandle kh; - bool is_lower_tri = false; - kh.create_sptrsv_handle(SPTRSVAlgorithm::SPTRSV_CUSPARSE, nrows, - is_lower_tri); - - sptrsv_symbolic(&kh, row_map, entries, values); - Kokkos::fence(); - - sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); - Kokkos::fence(); - - scalar_t sum = 0.0; - Kokkos::parallel_reduce(range_policy_t(0, lhs.extent(0)), - ReductionCheck(lhs), sum); - EXPECT_EQ(sum, lhs.extent(0)); - - kh.destroy_sptrsv_handle(); + { + Kokkos::deep_copy(lhs, ZERO); + KernelHandle kh; + bool is_lower_tri = false; + kh.create_sptrsv_handle(SPTRSVAlgorithm::SPTRSV_CUSPARSE, nrows, + is_lower_tri); + + sptrsv_symbolic(&kh, row_map, entries, values); + Kokkos::fence(); + + sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); + Kokkos::fence(); + + scalar_t sum = 0.0; + Kokkos::parallel_reduce(range_policy_t(0, lhs.extent(0)), + ReductionCheck(lhs), sum); + EXPECT_EQ(sum, lhs.extent(0)); + + kh.destroy_sptrsv_handle(); + } + { + Kokkos::deep_copy(lhs, ZERO); + KernelHandle kh; + bool is_lower_tri = false; + kh.create_sptrsv_handle(SPTRSVAlgorithm::SEQLVLSCHD_RP, nrows, + is_lower_tri); + + sptrsv_symbolic(&kh, row_map, entries, values); + Kokkos::fence(); + + sptrsv_solve(&kh, row_map, entries, values, rhs, lhs); + Kokkos::fence(); + + scalar_t sum = 0.0; + Kokkos::parallel_reduce(range_policy_t(0, lhs.extent(0)), + ReductionCheck(lhs), sum); + EXPECT_EQ(sum, lhs.extent(0)); + + + kh.destroy_sptrsv_handle(); + } } #endif @@ -817,6 +840,7 @@ template ; TestStruct::run_test_sptrsv(); + std::cout << "Done with test_sptrsv" << std::endl; } template (); \ + } \ + TEST_F(TestCategory, \ + sparse##_##sptrsv_streams##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ test_sptrsv_streams(); \ }