diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cfdd8ad7..c56774cd9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ Unreleased ### Fixed - Fix benchmark result token +- Fix set_ptr bug ### Changed - Update cuda version of allgebra diff --git a/include/monolish/common/monolish_coo.hpp b/include/monolish/common/monolish_coo.hpp index 7059b3fe5..2e180dca3 100644 --- a/include/monolish/common/monolish_coo.hpp +++ b/include/monolish/common/monolish_coo.hpp @@ -508,7 +508,7 @@ template class COO { * @param c col_index * @param v value * @note - * - # of computation: 3 + * - # of computation: 3nnz * - Multi-threading: false * - GPU acceleration: false **/ @@ -523,13 +523,28 @@ template class COO { * @param c col_index * @param v value * @note - * - # of computation: 3 + * - # of computation: 3nnz * - Multi-threading: false * - GPU acceleration: false **/ void set_ptr(const size_t rN, const size_t cN, const std::vector &r, const std::vector &c, const size_t vsize, const Float *v); + /** + * @brief Set COO array from std::vector + * @param rN # of row + * @param cN # of column + * @param r row_index + * @param c col_index + * @param v value + * @note + * - # of computation: 3nnz + * - Multi-threading: false + * - GPU acceleration: false + **/ + void set_ptr(const size_t rN, const size_t cN, const std::vector &r, + const std::vector &c, const size_t vsize, const Float v); + /** * @brief get # of row * @note diff --git a/include/monolish/common/monolish_crs.hpp b/include/monolish/common/monolish_crs.hpp index 78cc1e84f..99e76e6c1 100644 --- a/include/monolish/common/monolish_crs.hpp +++ b/include/monolish/common/monolish_crs.hpp @@ -258,7 +258,7 @@ template class CRS { *elements (size nnz) * @param value value index, which stores the non-zero elements (size nnz) * @note - * - # of computation: 3 + * - # of computation: 3nnz * - Multi-threading: false * - GPU acceleration: false **/ @@ -275,7 +275,7 @@ template class CRS { *elements (size nnz) * @param value value index, which stores the non-zero elements (size nnz) * @note - * - # of computation: 3 + * - # of computation: 3nnz * - Multi-threading: false * - GPU acceleration: false **/ @@ -283,6 +283,24 @@ template class CRS { const std::vector &colind, const size_t vsize, const Float *value); + /** + * @brief Set CRS array from std::vector + * @param M # of row + * @param N # of col + * @param rowptr row_ptr, which stores the starting points of the rows of the + *arrays value and col_ind (size M+1) + * @param colind col_ind, which stores the column numbers of the non-zero + *elements (size nnz) + * @param value value index, which stores the non-zero elements (size nnz) + * @note + * - # of computation: 3nnz + * - Multi-threading: false + * - GPU acceleration: false + **/ + void set_ptr(const size_t M, const size_t N, const std::vector &rowptr, + const std::vector &colind, const size_t vsize, + const Float value); + /** * @brief print all elements to standard I/O * @param force_cpu Ignore device status and output CPU data diff --git a/include/monolish/common/monolish_dense.hpp b/include/monolish/common/monolish_dense.hpp index 25f0fe75e..6cc6549e5 100644 --- a/include/monolish/common/monolish_dense.hpp +++ b/include/monolish/common/monolish_dense.hpp @@ -288,7 +288,7 @@ template class Dense { * @param N # of col * @param value value (size nnz) * @note - * - # of computation: 1 + * - # of computation: nnz * - Multi-threading: false * - GPU acceleration: false **/ @@ -300,12 +300,24 @@ template class Dense { * @param N # of col * @param value value (size nnz) * @note - * - # of computation: 1 + * - # of computation: nnz * - Multi-threading: false * - GPU acceleration: false **/ void set_ptr(const size_t M, const size_t N, const Float *value); + /** + * @brief Set Dense array from std::vector + * @param M # of row + * @param N # of col + * @param value value (size nnz) + * @note + * - # of computation: nnz + * - Multi-threading: false + * - GPU acceleration: false + **/ + void set_ptr(const size_t M, const size_t N, const Float value); + /** * @brief get # of row * @note @@ -609,6 +621,13 @@ template class Dense { * - GPU acceleration: false */ void resize(size_t N, Float Val = 0) { + if (first + N < alloc_nnz) { + for (size_t i = val_nnz; i < N; ++i) { + begin()[i] = Val; + } + val_nnz = N; + return; + } if (get_device_mem_stat()) { throw std::runtime_error("Error, GPU matrix cant use resize"); } diff --git a/include/monolish/common/monolish_tensor_coo.hpp b/include/monolish/common/monolish_tensor_coo.hpp index d17fb5d63..29be8e451 100644 --- a/include/monolish/common/monolish_tensor_coo.hpp +++ b/include/monolish/common/monolish_tensor_coo.hpp @@ -199,7 +199,7 @@ template class tensor_COO { * @param indix index fo tensor * @param v value * @note - * - # of computation: 3 + * - # of computation: 3nnz * - Multi-threading: false * - GPU acceleration: false **/ @@ -214,7 +214,7 @@ template class tensor_COO { * @param vsize size of value * @param v value * @note - * - # of computation: 3 + * - # of computation: 3nnz * - Multi-threading: false * - GPU acceleration: false **/ @@ -222,6 +222,21 @@ template class tensor_COO { const std::vector> &index, const size_t vsize, const Float *v); + /** + * @brief Set tensor_COO array from array + * @param shape shape of tensor + * @param indix index fo tensor + * @param vsize size of value + * @param v value + * @note + * - # of computation: 3nnz + * - Multi-threading: false + * - GPU acceleration: false + **/ + void set_ptr(const std::vector &shape, + const std::vector> &index, + const size_t vsize, const Float v); + /** * @brief get shape * @note diff --git a/include/monolish/common/monolish_tensor_dense.hpp b/include/monolish/common/monolish_tensor_dense.hpp index 90f730d3a..adcf113e6 100644 --- a/include/monolish/common/monolish_tensor_dense.hpp +++ b/include/monolish/common/monolish_tensor_dense.hpp @@ -271,7 +271,7 @@ template class tensor_Dense { * @param shape shape of tensor * @param value value (size nnz) * @note - * - # of computation: 1 + * - # of computation: nnz * - Multi-threading: false * - GPU acceleration: false **/ @@ -283,12 +283,23 @@ template class tensor_Dense { * @param shape shape of tensor * @param value value (size nnz) * @note - * - # of computation: 1 + * - # of computation: nnz * - Multi-threading: false * - GPU acceleration: false **/ void set_ptr(const std::vector &shape, const Float *value); + /** + * @brief Set tensor_Dense array from array + * @param shape shape of tensor + * @param value value (size nnz) + * @note + * - # of computation: nnz + * - Multi-threading: false + * - GPU acceleration: false + **/ + void set_ptr(const std::vector &shape, const Float value); + /** * @brief get shape * @note diff --git a/src/utils/copy/copy_coo.cpp b/src/utils/copy/copy_coo.cpp index ebbcbbdde..b8666b830 100644 --- a/src/utils/copy/copy_coo.cpp +++ b/src/utils/copy/copy_coo.cpp @@ -17,10 +17,12 @@ template void COO::operator=(const matrix::COO &mat) { assert(monolish::util::is_same_device_mem_stat(*this, mat)); // value copy - internal::vcopy(get_nnz(), data(), mat.data(), get_device_mem_stat()); + internal::vcopy(get_nnz(), mat.begin(), begin(), get_device_mem_stat()); logger.util_out(); } +template void COO::operator=(const matrix::COO &mat); +template void COO::operator=(const matrix::COO &mat); template void COO::set_ptr(const size_t rN, const size_t cN, @@ -31,9 +33,8 @@ void COO::set_ptr(const size_t rN, const size_t cN, col_index = c; row_index = r; resize(vsize); - for (size_t i = 0; i < vsize; ++i) { - data()[i] = v[i]; - } + + internal::vcopy(get_nnz(), v, begin(), false); rowN = rN; colN = cN; @@ -48,6 +49,31 @@ template void COO::set_ptr(const size_t rN, const size_t cN, const std::vector &c, const size_t vsize, const float *v); +template +void COO::set_ptr(const size_t rN, const size_t cN, + const std::vector &r, const std::vector &c, + const size_t vsize, const T v) { + Logger &logger = Logger::get_instance(); + logger.util_in(monolish_func); + col_index = c; + row_index = r; + resize(vsize); + + internal::vbroadcast(get_nnz(), v, begin(), false); + + rowN = rN; + colN = cN; + logger.util_out(); +} +template void COO::set_ptr(const size_t rN, const size_t cN, + const std::vector &r, + const std::vector &c, + const size_t vsize, const double v); +template void COO::set_ptr(const size_t rN, const size_t cN, + const std::vector &r, + const std::vector &c, const size_t vsize, + const float v); + template void COO::set_ptr(const size_t rN, const size_t cN, const std::vector &r, const std::vector &c, diff --git a/src/utils/copy/copy_crs.cpp b/src/utils/copy/copy_crs.cpp index bd80c869e..bd80b4d03 100644 --- a/src/utils/copy/copy_crs.cpp +++ b/src/utils/copy/copy_crs.cpp @@ -17,21 +17,8 @@ template void CRS::operator=(const CRS &mat) { assert(monolish::util::is_same_device_mem_stat(*this, mat)); val_create_flag = true; - if (mat.get_device_mem_stat() == true) { -#if MONOLISH_USE_NVIDIA_GPU - internal::vcopy(mat.row_ptr.size(), mat.row_ptr.data(), row_ptr.data(), - true); - internal::vcopy(mat.col_ind.size(), mat.col_ind.data(), col_ind.data(), - true); - internal::vcopy(mat.get_nnz(), mat.begin(), begin(), true); -#endif - } else { - internal::vcopy(mat.row_ptr.size(), mat.row_ptr.data(), row_ptr.data(), - false); - internal::vcopy(mat.col_ind.size(), mat.col_ind.data(), col_ind.data(), - false); - internal::vcopy(mat.get_nnz(), mat.begin(), begin(), false); - } + // value copy + internal::vcopy(get_nnz(), mat.begin(), begin(), get_device_mem_stat()); logger.util_out(); } @@ -49,12 +36,13 @@ void CRS::set_ptr(const size_t M, const size_t N, row_ptr = rowptr; val_create_flag = true; resize(vsize); - for (size_t i = 0; i < vsize; ++i) { - data()[i] = value[i]; - } + + internal::vcopy(get_nnz(), value, begin(), false); rowN = M; colN = N; + + compute_hash(); logger.util_out(); } template void CRS::set_ptr(const size_t M, const size_t N, @@ -66,6 +54,35 @@ template void CRS::set_ptr(const size_t M, const size_t N, const std::vector &colind, const size_t vsize, const float *value); +template +void CRS::set_ptr(const size_t M, const size_t N, + const std::vector &rowptr, + const std::vector &colind, const size_t vsize, + const T value) { + Logger &logger = Logger::get_instance(); + logger.util_in(monolish_func); + col_ind = colind; + row_ptr = rowptr; + val_create_flag = true; + resize(vsize); + + internal::vbroadcast(get_nnz(), value, begin(), false); + + rowN = M; + colN = N; + + compute_hash(); + logger.util_out(); +} +template void CRS::set_ptr(const size_t M, const size_t N, + const std::vector &rowptr, + const std::vector &colind, + const size_t vsize, const double value); +template void CRS::set_ptr(const size_t M, const size_t N, + const std::vector &rowptr, + const std::vector &colind, + const size_t vsize, const float value); + template void CRS::set_ptr(const size_t M, const size_t N, const std::vector &rowptr, diff --git a/src/utils/copy/copy_dense.cpp b/src/utils/copy/copy_dense.cpp index a367b0475..449d1923e 100644 --- a/src/utils/copy/copy_dense.cpp +++ b/src/utils/copy/copy_dense.cpp @@ -17,11 +17,7 @@ template void Dense::operator=(const Dense &mat) { val_create_flag = true; // gpu copy - if (mat.get_device_mem_stat()) { - internal::vcopy(get_nnz(), mat.begin(), begin(), true); - } else { - internal::vcopy(get_nnz(), mat.begin(), begin(), false); - } + internal::vcopy(get_nnz(), mat.begin(), begin(), get_device_mem_stat()); logger.util_out(); } @@ -40,11 +36,7 @@ void Dense::operator=(const view_Dense, T> &mat) { val_create_flag = true; // gpu copy - if (mat.get_device_mem_stat()) { - internal::vcopy(get_nnz(), mat.begin(), begin(), true); - } else { - internal::vcopy(get_nnz(), mat.begin(), begin(), false); - } + internal::vcopy(get_nnz(), mat.begin(), begin(), get_device_mem_stat()); logger.util_out(); } @@ -65,11 +57,7 @@ void Dense::operator=(const view_Dense, T> &mat) { val_create_flag = true; // gpu copy - if (mat.get_device_mem_stat()) { - internal::vcopy(get_nnz(), mat.begin(), begin(), true); - } else { - internal::vcopy(get_nnz(), mat.begin(), begin(), false); - } + internal::vcopy(get_nnz(), mat.begin(), begin(), get_device_mem_stat()); logger.util_out(); } @@ -90,11 +78,7 @@ void Dense::operator=(const view_Dense, T> &mat) { val_create_flag = true; // gpu copy - if (mat.get_device_mem_stat()) { - internal::vcopy(get_nnz(), mat.begin(), begin(), true); - } else { - internal::vcopy(get_nnz(), mat.begin(), begin(), false); - } + internal::vcopy(get_nnz(), mat.begin(), begin(), get_device_mem_stat()); logger.util_out(); } @@ -110,9 +94,8 @@ void Dense::set_ptr(const size_t M, const size_t N, const T *value) { logger.util_in(monolish_func); val_create_flag = true; resize(M * N); - for (size_t i = 0; i < M * N; ++i) { - begin()[i] = value[i]; - } + + internal::vcopy(get_nnz(), value, begin(), false); rowN = M; colN = N; @@ -123,6 +106,24 @@ template void Dense::set_ptr(const size_t M, const size_t N, template void Dense::set_ptr(const size_t M, const size_t N, const float *value); +template +void Dense::set_ptr(const size_t M, const size_t N, const T value) { + Logger &logger = Logger::get_instance(); + logger.util_in(monolish_func); + val_create_flag = true; + resize(M * N); + + internal::vbroadcast(get_nnz(), value, begin(), false); + + rowN = M; + colN = N; + logger.util_out(); +} +template void Dense::set_ptr(const size_t M, const size_t N, + const double value); +template void Dense::set_ptr(const size_t M, const size_t N, + const float value); + template void Dense::set_ptr(const size_t M, const size_t N, const std::vector &value) { diff --git a/src/utils/copy/copy_tensor_coo.cpp b/src/utils/copy/copy_tensor_coo.cpp index 10e9e4ba9..5e906bab8 100644 --- a/src/utils/copy/copy_tensor_coo.cpp +++ b/src/utils/copy/copy_tensor_coo.cpp @@ -19,7 +19,7 @@ void tensor_COO::operator=(const tensor::tensor_COO &tens) { assert(monolish::util::is_same_device_mem_stat(*this, tens)); // value copy - internal::vcopy(get_nnz(), data(), tens.data(), get_device_mem_stat()); + internal::vcopy(get_nnz(), begin(), tens.begin(), get_device_mem_stat()); logger.util_out(); } @@ -34,9 +34,8 @@ void tensor_COO::set_ptr(const std::vector &shape, this->shape = shape; this->index = index; resize(vsize); - for (size_t i = 0; i < vsize; ++i) { - data()[i] = v[i]; - } + + internal::vcopy(get_nnz(), v, begin(), false); logger.util_out(); } @@ -49,6 +48,30 @@ tensor_COO::set_ptr(const std::vector &shape, const std::vector> &index, const size_t vsize, const float *v); +template +void tensor_COO::set_ptr(const std::vector &shape, + const std::vector> &index, + const size_t vsize, const T v) { + Logger &logger = Logger::get_instance(); + logger.util_in(monolish_func); + val_create_flag = true; + this->shape = shape; + this->index = index; + resize(vsize); + + internal::vbroadcast(get_nnz(), v, begin(), false); + + logger.util_out(); +} +template void +tensor_COO::set_ptr(const std::vector &shape, + const std::vector> &index, + const size_t vsize, const double v); +template void +tensor_COO::set_ptr(const std::vector &shape, + const std::vector> &index, + const size_t vsize, const float v); + template void tensor_COO::set_ptr(const std::vector &shape, const std::vector> &index, diff --git a/src/utils/copy/copy_tensor_dense.cpp b/src/utils/copy/copy_tensor_dense.cpp index e57ef31fa..4d65e0f7d 100644 --- a/src/utils/copy/copy_tensor_dense.cpp +++ b/src/utils/copy/copy_tensor_dense.cpp @@ -19,11 +19,7 @@ void tensor_Dense::operator=(const tensor_Dense &tens) { val_create_flag = true; // gpu copy - if (tens.get_device_mem_stat()) { - internal::vcopy(get_nnz(), tens.data(), data(), true); - } else { - internal::vcopy(get_nnz(), tens.data(), data(), false); - } + internal::vcopy(get_nnz(), tens.begin(), begin(), get_device_mem_stat()); logger.util_out(); } @@ -38,9 +34,8 @@ void tensor_Dense::set_ptr(const std::vector &shape, logger.util_in(monolish_func); val_create_flag = true; resize(shape); - for (size_t i = 0; i < get_nnz(); ++i) { - data()[i] = value[i]; - } + + internal::vcopy(get_nnz(), value, begin(), false); logger.util_out(); } @@ -49,6 +44,22 @@ template void tensor_Dense::set_ptr(const std::vector &shape, template void tensor_Dense::set_ptr(const std::vector &shape, const float *value); +template +void tensor_Dense::set_ptr(const std::vector &shape, const T value) { + Logger &logger = Logger::get_instance(); + logger.util_in(monolish_func); + val_create_flag = true; + resize(shape); + + internal::vbroadcast(get_nnz(), value, begin(), false); + + logger.util_out(); +} +template void tensor_Dense::set_ptr(const std::vector &shape, + const double value); +template void tensor_Dense::set_ptr(const std::vector &shape, + const float value); + template void tensor_Dense::set_ptr(const std::vector &shape, const std::vector &value) { diff --git a/src/utils/copy/copy_vector.cpp b/src/utils/copy/copy_vector.cpp index 94b851d9f..d6b2f2526 100644 --- a/src/utils/copy/copy_vector.cpp +++ b/src/utils/copy/copy_vector.cpp @@ -13,7 +13,7 @@ template void vector::operator=(const std::vector &vec) { val_create_flag = true; resize(vec.size()); - std::copy(vec.begin(), vec.end(), begin()); + internal::vcopy(vec.size(), vec.data(), begin(), false); logger.util_out(); } @@ -31,13 +31,7 @@ template void vector::operator=(const vector &vec) { val_create_flag = true; // gpu copy and recv - if (vec.get_device_mem_stat()) { -#if MONOLISH_USE_NVIDIA_GPU - internal::vcopy(vec.size(), vec.begin(), begin(), true); -#endif - } else { - internal::vcopy(vec.size(), vec.begin(), begin(), false); - } + internal::vcopy(vec.size(), vec.begin(), begin(), get_device_mem_stat()); logger.util_out(); } @@ -56,13 +50,7 @@ void vector::operator=(const view1D, T> &vec) { val_create_flag = true; // gpu copy and recv - if (vec.get_device_mem_stat()) { -#if MONOLISH_USE_NVIDIA_GPU - internal::vcopy(vec.size(), vec.begin() + vec.get_offset(), begin(), true); -#endif - } else { - internal::vcopy(vec.size(), vec.begin() + vec.get_offset(), begin(), false); - } + internal::vcopy(vec.size(), vec.begin(), begin(), get_device_mem_stat()); logger.util_out(); } @@ -81,13 +69,7 @@ void vector::operator=(const view1D, T> &vec) { val_create_flag = true; // gpu copy and recv - if (vec.get_device_mem_stat()) { -#if MONOLISH_USE_NVIDIA_GPU - internal::vcopy(vec.size(), vec.begin() + vec.get_offset(), begin(), true); -#endif - } else { - internal::vcopy(vec.size(), vec.begin() + vec.get_offset(), begin(), false); - } + internal::vcopy(vec.size(), vec.begin(), begin(), get_device_mem_stat()); logger.util_out(); } @@ -107,13 +89,7 @@ void vector::operator=(const view1D, T> &vec) { val_create_flag = true; // gpu copy and recv - if (vec.get_device_mem_stat()) { -#if MONOLISH_USE_NVIDIA_GPU - internal::vcopy(vec.size(), vec.begin() + vec.get_offset(), begin(), true); -#endif - } else { - internal::vcopy(vec.size(), vec.begin() + vec.get_offset(), begin(), false); - } + internal::vcopy(vec.size(), vec.begin(), begin(), get_device_mem_stat()); logger.util_out(); }