Skip to content

Commit

Permalink
Merge pull request #1421 from kokkos/release-candidate-3.6.01
Browse files Browse the repository at this point in the history
Release candidate 3.6.01
  • Loading branch information
lucbv authored Jun 23, 2022
2 parents e09389a + 87174c3 commit 42ab7a2
Show file tree
Hide file tree
Showing 29 changed files with 949 additions and 245 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# Change Log

## [3.6.01](https://github.com/kokkos/kokkos-kernels/tree/3.6.01) (2022-05-23)
[Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/3.6.00...3.6.01)

### Bug Fixes and Improvements:

- Improve spiluk numeric phase to avoid race conditions and processing in chunks [\#1390](https://github.com/kokkos/kokkos-kernels/pull/1390)
- Improve sptrsv symbolic phase performance (level scheduling) [\#1380](https://github.com/kokkos/kokkos-kernels/pull/1380)
- Restore BLAS-1 MV paths for 1 column [\#1354](https://github.com/kokkos/kokkos-kernels/pull/1354)
- Fix check that view has const type [\#1370](https://github.com/kokkos/kokkos-kernels/pull/1370)
- Fix check that view has const type part 2 [\#1394](https://github.com/kokkos/kokkos-kernels/pull/1394)

## [3.6.00](https://github.com/kokkos/kokkos-kernels/tree/3.6.00) (2022-02-18)
[Full Changelog](https://github.com/kokkos/kokkos-kernels/compare/3.5.00...3.6.00)

Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ IF(NOT KOKKOSKERNELS_HAS_TRILINOS)
ENDIF()
SET(KokkosKernels_VERSION_MAJOR 3)
SET(KokkosKernels_VERSION_MINOR 6)
SET(KokkosKernels_VERSION_PATCH 00)
SET(KokkosKernels_VERSION_PATCH 01)
SET(KokkosKernels_VERSION "${KokkosKernels_VERSION_MAJOR}.${KokkosKernels_VERSION_MINOR}.${KokkosKernels_VERSION_PATCH}")
MATH(EXPR KOKKOSKERNELS_VERSION "${KokkosKernels_VERSION_MAJOR} * 10000 + ${KokkosKernels_VERSION_MINOR} * 100 + ${KokkosKernels_VERSION_PATCH}")
ENDIF()
Expand Down
1 change: 1 addition & 0 deletions master_history.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ tag: 3.4.00 date: 04/26/2021 master: fe439b21 release: d3c33910
tag: 3.4.01 date: 05/20/2021 master: 564dccb3 release: 4c62eb86
tag: 3.5.00 date: 11/19/2021 master: 00189c0b release: f171533d
tag: 3.6.00 date: 04/06/2022 master: 8381db04 release: a7e683c4
tag: 3.6.01 date: 05/23/2022 master: e09389ae release: e1d8de42
12 changes: 4 additions & 8 deletions src/Kokkos_ArithTraits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1284,10 +1284,8 @@ class ArithTraits<std::complex<RealFloatType> > {
}
#else
static bool isInf(const std::complex<RealFloatType>& x) {
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
using std::isinf;
#endif
return isinf(real(x)) || isinf(imag(x));
return Kokkos::Experimental::isinf(real(x)) ||
Kokkos::Experimental::isinf(imag(x));
}
#endif
#ifdef KOKKOS_ENABLE_SYCL
Expand All @@ -1307,10 +1305,8 @@ class ArithTraits<std::complex<RealFloatType> > {
}
#else
static bool isNan(const std::complex<RealFloatType>& x) {
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
using std::isnan;
#endif
return isnan(real(x)) || isnan(imag(x));
return Kokkos::Experimental::isnan(real(x)) ||
Kokkos::Experimental::isnan(imag(x));
}
#endif
static mag_type abs(const std::complex<RealFloatType>& x) {
Expand Down
26 changes: 14 additions & 12 deletions src/blas/KokkosBlas1_nrm2w.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ nrm2w(const XVector& x, const XVector& w) {
typename XVector::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
XVector_Internal;

typedef Kokkos::View<mag_type, Kokkos::LayoutLeft, Kokkos::HostSpace,
typedef Kokkos::View<mag_type, typename XVector_Internal::array_layout,
Kokkos::HostSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >
RVector_Internal;

Expand Down Expand Up @@ -134,20 +135,21 @@ void nrm2w(const RV& R, const XMV& X, const XMV& W,
KokkosKernels::Impl::throw_runtime_exception(os.str());
}

using UnifiedXLayout =
typename KokkosKernels::Impl::GetUnifiedLayout<XMV>::array_layout;
using UnifiedRVLayout =
typename KokkosKernels::Impl::GetUnifiedLayoutPreferring<
RV, UnifiedXLayout>::array_layout;

// Create unmanaged versions of the input Views. RV and XMV may be
// rank 1 or rank 2.
typedef Kokkos::View<
typename std::conditional<RV::rank == 0,
typename RV::non_const_value_type,
typename RV::non_const_value_type*>::type,
typename KokkosKernels::Impl::GetUnifiedLayout<RV>::array_layout,
typename RV::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
typedef Kokkos::View<typename RV::non_const_data_type, UnifiedRVLayout,
typename RV::device_type,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >
RV_Internal;
typedef Kokkos::View<
typename std::conditional<XMV::rank == 1, typename XMV::const_value_type*,
typename XMV::const_value_type**>::type,
typename KokkosKernels::Impl::GetUnifiedLayout<XMV>::array_layout,
typename XMV::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
typedef Kokkos::View<typename XMV::const_data_type, UnifiedXLayout,
typename XMV::device_type,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >
XMV_Internal;

RV_Internal R_internal = R;
Expand Down
26 changes: 14 additions & 12 deletions src/blas/KokkosBlas1_nrm2w_squared.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ nrm2w_squared(const XVector& x, const XVector& w) {
typename XVector::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
XVector_Internal;

typedef Kokkos::View<mag_type, Kokkos::LayoutLeft, Kokkos::HostSpace,
typedef Kokkos::View<mag_type, typename XVector_Internal::array_layout,
Kokkos::HostSpace,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >
RVector_Internal;

Expand Down Expand Up @@ -135,20 +136,21 @@ void nrm2w_squared(
KokkosKernels::Impl::throw_runtime_exception(os.str());
}

using UnifiedXLayout =
typename KokkosKernels::Impl::GetUnifiedLayout<XMV>::array_layout;
using UnifiedRVLayout =
typename KokkosKernels::Impl::GetUnifiedLayoutPreferring<
RV, UnifiedXLayout>::array_layout;

// Create unmanaged versions of the input Views. RV and XMV may be
// rank 1 or rank 2.
typedef Kokkos::View<
typename std::conditional<RV::rank == 0,
typename RV::non_const_value_type,
typename RV::non_const_value_type*>::type,
typename KokkosKernels::Impl::GetUnifiedLayout<RV>::array_layout,
typename RV::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
typedef Kokkos::View<typename RV::non_const_data_type, UnifiedRVLayout,
typename RV::device_type,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >
RV_Internal;
typedef Kokkos::View<
typename std::conditional<XMV::rank == 1, typename XMV::const_value_type*,
typename XMV::const_value_type**>::type,
typename KokkosKernels::Impl::GetUnifiedLayout<XMV>::array_layout,
typename XMV::device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
typedef Kokkos::View<typename XMV::const_data_type, UnifiedXLayout,
typename XMV::device_type,
Kokkos::MemoryTraits<Kokkos::Unmanaged> >
XMV_Internal;

RV_Internal R_internal = R;
Expand Down
5 changes: 3 additions & 2 deletions src/blas/impl/KokkosBlas1_dot_mv_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ void MV_Dot_Invoke(
}
// Zero out the result vector
Kokkos::deep_copy(
r, Kokkos::ArithTraits<typename RV::non_const_value_type>::zero());
execution_space(), r,
Kokkos::ArithTraits<typename RV::non_const_value_type>::zero());
size_type teamsPerDot;
KokkosBlas::Impl::multipleReductionWorkDistribution<execution_space,
size_type>(
Expand All @@ -156,7 +157,7 @@ void MV_Dot_Invoke(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "Dot_MV temp result"),
r.extent(0));
MV_Dot_Invoke<decltype(tempResult), XV, YV, size_type>(tempResult, x, y);
Kokkos::deep_copy(r, tempResult);
Kokkos::deep_copy(typename XV::execution_space(), r, tempResult);
}

} // namespace Impl
Expand Down
45 changes: 38 additions & 7 deletions src/blas/impl/KokkosBlas1_dot_spec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,20 @@ struct Dot<RV, XV, YV, X_Rank, Y_Rank, false,

typedef typename YV::size_type size_type;

// Helper to get the first column of a rank-1 or rank-2 view.
// This makes it easier to add a path for single-column dot.
template <typename V>
static auto getFirstColumn(
const V& v, typename std::enable_if<V::rank == 2>::type* = nullptr) {
return Kokkos::subview(v, Kokkos::ALL(), 0);
}

template <typename V>
static V getFirstColumn(
const V& v, typename std::enable_if<V::rank == 1>::type* = nullptr) {
return v;
}

static void dot(const RV& R, const XV& X, const YV& Y) {
Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY
? "KokkosBlas::dot[ETI]"
Expand All @@ -392,14 +406,31 @@ struct Dot<RV, XV, YV, X_Rank, Y_Rank, false,
#endif

const size_type numRows = X.extent(0);
const size_type numCols = X.extent(1);
if (numRows < static_cast<size_type>(INT_MAX) &&
numRows * numCols < static_cast<size_type>(INT_MAX)) {
typedef int index_type;
MV_Dot_Invoke<RV, XV, YV, index_type>(R, X, Y);
const size_type numDots = std::max(X.extent(1), Y.extent(1));
if (numDots == Kokkos::ArithTraits<size_type>::one()) {
auto R0 = Kokkos::subview(R, 0);
auto X0 = getFirstColumn(X);
auto Y0 = getFirstColumn(Y);
if (numRows < static_cast<size_type>(INT_MAX)) {
typedef int index_type;
DotFunctor<decltype(R0), decltype(X0), decltype(Y0), index_type> f(X0,
Y0);
f.run("KokkosBlas::dot<1D>", R0);
} else {
typedef int64_t index_type;
DotFunctor<decltype(R0), decltype(X0), decltype(Y0), index_type> f(X0,
Y0);
f.run("KokkosBlas::dot<1D>", R0);
}
} else {
typedef std::int64_t index_type;
MV_Dot_Invoke<RV, XV, YV, index_type>(R, X, Y);
if (numRows < static_cast<size_type>(INT_MAX) &&
numRows * numDots < static_cast<size_type>(INT_MAX)) {
typedef int index_type;
MV_Dot_Invoke<RV, XV, YV, index_type>(R, X, Y);
} else {
typedef std::int64_t index_type;
MV_Dot_Invoke<RV, XV, YV, index_type>(R, X, Y);
}
}
Kokkos::Profiling::popRegion();
}
Expand Down
5 changes: 3 additions & 2 deletions src/blas/impl/KokkosBlas1_nrm1_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ void MV_Nrm1_Invoke(
}
// Zero out the result vector
Kokkos::deep_copy(
r, Kokkos::ArithTraits<typename RV::non_const_value_type>::zero());
execution_space(), r,
Kokkos::ArithTraits<typename RV::non_const_value_type>::zero());
size_type teamsPerVec;
KokkosBlas::Impl::multipleReductionWorkDistribution<execution_space,
size_type>(
Expand All @@ -195,7 +196,7 @@ void MV_Nrm1_Invoke(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "Nrm1 temp result"),
r.extent(0));
MV_Nrm1_Invoke<decltype(tempResult), XV, size_type>(tempResult, x);
Kokkos::deep_copy(r, tempResult);
Kokkos::deep_copy(typename XV::execution_space(), r, tempResult);
}

} // namespace Impl
Expand Down
21 changes: 16 additions & 5 deletions src/blas/impl/KokkosBlas1_nrm1_spec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,12 +200,23 @@ struct Nrm1<RV, XMV, 2, false, KOKKOSKERNELS_IMPL_COMPILE_LIBRARY> {
: "KokkosBlas::nrm1[noETI]");
const size_type numRows = X.extent(0);
const size_type numCols = X.extent(1);
if (numRows < static_cast<size_type>(INT_MAX) &&
numRows * numCols < static_cast<size_type>(INT_MAX)) {
MV_Nrm1_Invoke<RV, XMV, int>(R, X);
if (numCols == Kokkos::ArithTraits<size_type>::one()) {
auto R0 = Kokkos::subview(R, 0);
auto X0 = Kokkos::subview(X, Kokkos::ALL(), 0);
if (numRows < static_cast<size_type>(INT_MAX)) {
V_Nrm1_Invoke<decltype(R0), decltype(X0), int>(R0, X0);
} else {
typedef std::int64_t index_type;
V_Nrm1_Invoke<decltype(R0), decltype(X0), index_type>(R0, X0);
}
} else {
typedef std::int64_t index_type;
MV_Nrm1_Invoke<RV, XMV, index_type>(R, X);
if (numRows < static_cast<size_type>(INT_MAX) &&
numRows * numCols < static_cast<size_type>(INT_MAX)) {
MV_Nrm1_Invoke<RV, XMV, int>(R, X);
} else {
typedef std::int64_t index_type;
MV_Nrm1_Invoke<RV, XMV, index_type>(R, X);
}
}
Kokkos::Profiling::popRegion();
}
Expand Down
5 changes: 3 additions & 2 deletions src/blas/impl/KokkosBlas1_nrm2_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,8 @@ void MV_Nrm2_Invoke(
}
// Zero out the result vector
Kokkos::deep_copy(
r, Kokkos::ArithTraits<typename RV::non_const_value_type>::zero());
execution_space(), r,
Kokkos::ArithTraits<typename RV::non_const_value_type>::zero());
size_type teamsPerVec;
KokkosBlas::Impl::multipleReductionWorkDistribution<execution_space,
size_type>(
Expand Down Expand Up @@ -230,7 +231,7 @@ void MV_Nrm2_Invoke(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "Nrm2 temp result"),
r.extent(0));
MV_Nrm2_Invoke<decltype(tempResult), XV, size_type>(tempResult, x, take_sqrt);
Kokkos::deep_copy(r, tempResult);
Kokkos::deep_copy(typename XV::execution_space(), r, tempResult);
}

} // namespace Impl
Expand Down
22 changes: 17 additions & 5 deletions src/blas/impl/KokkosBlas1_nrm2_spec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,12 +200,24 @@ struct Nrm2<RV, XMV, 2, false, KOKKOSKERNELS_IMPL_COMPILE_LIBRARY> {

const size_type numRows = X.extent(0);
const size_type numCols = X.extent(1);
if (numRows < static_cast<size_type>(INT_MAX) &&
numRows * numCols < static_cast<size_type>(INT_MAX)) {
MV_Nrm2_Invoke<RV, XMV, int>(R, X, take_sqrt);
if (numCols == Kokkos::ArithTraits<size_type>::one()) {
auto R0 = Kokkos::subview(R, 0);
auto X0 = Kokkos::subview(X, Kokkos::ALL(), 0);
if (numRows < static_cast<size_type>(INT_MAX)) {
V_Nrm2_Invoke<decltype(R0), decltype(X0), int>(R0, X0, take_sqrt);
} else {
typedef std::int64_t index_type;
V_Nrm2_Invoke<decltype(R0), decltype(X0), index_type>(R0, X0,
take_sqrt);
}
} else {
typedef std::int64_t index_type;
MV_Nrm2_Invoke<RV, XMV, index_type>(R, X, take_sqrt);
if (numRows < static_cast<size_type>(INT_MAX) &&
numRows * numCols < static_cast<size_type>(INT_MAX)) {
MV_Nrm2_Invoke<RV, XMV, int>(R, X, take_sqrt);
} else {
typedef std::int64_t index_type;
MV_Nrm2_Invoke<RV, XMV, index_type>(R, X, take_sqrt);
}
}
Kokkos::Profiling::popRegion();
}
Expand Down
5 changes: 3 additions & 2 deletions src/blas/impl/KokkosBlas1_nrm2w_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,8 @@ void MV_Nrm2w_Invoke(
}
// Zero out the result vector
Kokkos::deep_copy(
r, Kokkos::ArithTraits<typename RV::non_const_value_type>::zero());
execution_space(), r,
Kokkos::ArithTraits<typename RV::non_const_value_type>::zero());
size_type teamsPerVec;
KokkosBlas::Impl::multipleReductionWorkDistribution<execution_space,
size_type>(
Expand Down Expand Up @@ -230,7 +231,7 @@ void MV_Nrm2w_Invoke(
r.extent(0));
MV_Nrm2w_Invoke<decltype(tempResult), XV, size_type>(tempResult, x, w,
take_sqrt);
Kokkos::deep_copy(r, tempResult);
Kokkos::deep_copy(typename XV::execution_space(), r, tempResult);
}

} // namespace Impl
Expand Down
23 changes: 18 additions & 5 deletions src/blas/impl/KokkosBlas1_nrm2w_spec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,12 +201,25 @@ struct Nrm2w<RV, XMV, 2, false, KOKKOSKERNELS_IMPL_COMPILE_LIBRARY> {

const size_type numRows = X.extent(0);
const size_type numCols = X.extent(1);
if (numRows < static_cast<size_type>(INT_MAX) &&
numRows * numCols < static_cast<size_type>(INT_MAX)) {
MV_Nrm2w_Invoke<RV, XMV, int>(R, X, W, take_sqrt);
if (numCols == 1) {
auto R0 = Kokkos::subview(R, 0);
auto X0 = Kokkos::subview(X, Kokkos::ALL(), 0);
auto W0 = Kokkos::subview(W, Kokkos::ALL(), 0);
if (numRows < static_cast<size_type>(INT_MAX)) {
V_Nrm2w_Invoke<decltype(R0), decltype(X0), int>(R0, X0, W0, take_sqrt);
} else {
typedef std::int64_t index_type;
V_Nrm2w_Invoke<decltype(R0), decltype(X0), index_type>(R0, X0, W0,
take_sqrt);
}
} else {
typedef std::int64_t index_type;
MV_Nrm2w_Invoke<RV, XMV, index_type>(R, X, W, take_sqrt);
if (numRows < static_cast<size_type>(INT_MAX) &&
numRows * numCols < static_cast<size_type>(INT_MAX)) {
MV_Nrm2w_Invoke<RV, XMV, int>(R, X, W, take_sqrt);
} else {
typedef std::int64_t index_type;
MV_Nrm2w_Invoke<RV, XMV, index_type>(R, X, W, take_sqrt);
}
}
Kokkos::Profiling::popRegion();
}
Expand Down
5 changes: 3 additions & 2 deletions src/blas/impl/KokkosBlas1_sum_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,8 @@ void MV_Sum_Invoke(
}
// Zero out the result vector
Kokkos::deep_copy(
r, Kokkos::ArithTraits<typename RV::non_const_value_type>::zero());
execution_space(), r,
Kokkos::ArithTraits<typename RV::non_const_value_type>::zero());
size_type teamsPerVec;
KokkosBlas::Impl::multipleReductionWorkDistribution<execution_space,
size_type>(
Expand All @@ -187,7 +188,7 @@ void MV_Sum_Invoke(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "Sum temp result"),
r.extent(0));
MV_Sum_Invoke<decltype(tempResult), XV, size_type>(tempResult, x);
Kokkos::deep_copy(r, tempResult);
Kokkos::deep_copy(typename XV::execution_space(), r, tempResult);
}

} // namespace Impl
Expand Down
Loading

0 comments on commit 42ab7a2

Please sign in to comment.