Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

algorithm,memory,numeric: Improve thrust compatibility via ADL barriers #433

Merged
merged 1 commit into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
algorithm,memory,numeric: Improve thrust compatibility via ADL barriers
  • Loading branch information
stotko committed Sep 9, 2024
commit 84386f652468a11ee7f576ef7c76a7daf3d5d0bc
43 changes: 43 additions & 0 deletions src/stdgpu/algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ template <typename IndexType,
void
for_each_index(ExecutionPolicy&& policy, IndexType size, UnaryFunction f);

#ifdef STDGPU_RUN_DOXYGEN

/**
* \ingroup algorithm
* \brief Writes the given value into the given range using the copy assignment operator
Expand Down Expand Up @@ -165,6 +167,47 @@ template <typename ExecutionPolicy,
OutputIt
copy_n(ExecutionPolicy&& policy, InputIt begin, Size n, OutputIt output_begin);

#endif

//! @cond Doxygen_Suppress
namespace adl_barrier
{

template <typename ExecutionPolicy,
typename Iterator,
typename T,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
void
fill(ExecutionPolicy&& policy, Iterator begin, Iterator end, const T& value);

template <typename ExecutionPolicy,
typename Iterator,
typename Size,
typename T,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
Iterator
fill_n(ExecutionPolicy&& policy, Iterator begin, Size n, const T& value);

template <typename ExecutionPolicy,
typename InputIt,
typename OutputIt,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
OutputIt
copy(ExecutionPolicy&& policy, InputIt begin, InputIt end, OutputIt output_begin);

template <typename ExecutionPolicy,
typename InputIt,
typename Size,
typename OutputIt,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
OutputIt
copy_n(ExecutionPolicy&& policy, InputIt begin, Size n, OutputIt output_begin);

} // namespace adl_barrier

using namespace adl_barrier;
//! @endcond

} // namespace stdgpu

#include <stdgpu/impl/algorithm_detail.h>
Expand Down
6 changes: 6 additions & 0 deletions src/stdgpu/impl/algorithm_detail.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ class fill_functor
};
} // namespace detail

namespace adl_barrier
{
template <typename ExecutionPolicy,
typename Iterator,
typename T,
Expand All @@ -106,6 +108,7 @@ fill_n(ExecutionPolicy&& policy, Iterator begin, Size n, const T& value)
for_each_index(std::forward<ExecutionPolicy>(policy), n, detail::fill_functor<Iterator, T>(begin, value));
return begin + n;
}
} // namespace adl_barrier

namespace detail
{
Expand All @@ -131,6 +134,8 @@ class copy_functor
};
} // namespace detail

namespace adl_barrier
{
template <typename ExecutionPolicy,
typename InputIt,
typename OutputIt,
Expand All @@ -154,6 +159,7 @@ copy_n(ExecutionPolicy&& policy, InputIt begin, Size n, OutputIt output_begin)
detail::copy_functor<InputIt, OutputIt>(begin, output_begin));
return output_begin + n;
}
} // namespace adl_barrier

} // namespace stdgpu

Expand Down
3 changes: 3 additions & 0 deletions src/stdgpu/impl/memory_detail.h
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,8 @@ destroy_at(T* p)
p->~T();
}

namespace adl_barrier
{
template <typename ExecutionPolicy,
typename Iterator,
typename T,
Expand Down Expand Up @@ -781,6 +783,7 @@ destroy_n(ExecutionPolicy&& policy, Iterator first, Size n)

return last;
}
} // namespace adl_barrier

template <>
dynamic_memory_type
Expand Down
3 changes: 3 additions & 0 deletions src/stdgpu/impl/numeric_detail.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ class iota_functor
};
} // namespace detail

namespace adl_barrier
{
template <typename ExecutionPolicy,
typename Iterator,
typename T,
Expand All @@ -60,6 +62,7 @@ iota(ExecutionPolicy&& policy, Iterator begin, Iterator end, T value)
static_cast<index_t>(end - begin),
detail::iota_functor<Iterator, T>(begin, value));
}
} // namespace adl_barrier

template <typename IndexType,
typename ExecutionPolicy,
Expand Down
56 changes: 56 additions & 0 deletions src/stdgpu/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,8 @@ template <typename T>
STDGPU_HOST_DEVICE void
destroy_at(T* p);

#ifdef STDGPU_RUN_DOXYGEN

/**
* \ingroup memory
* \brief Writes the given value to into the given range using the copy constructor
Expand Down Expand Up @@ -882,6 +884,60 @@ template <typename ExecutionPolicy,
Iterator
destroy_n(ExecutionPolicy&& policy, Iterator first, Size n);

#endif

//! @cond Doxygen_Suppress
namespace adl_barrier
{

template <typename ExecutionPolicy,
typename Iterator,
typename T,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
void
uninitialized_fill(ExecutionPolicy&& policy, Iterator begin, Iterator end, const T& value);

template <typename ExecutionPolicy,
typename Iterator,
typename Size,
typename T,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
Iterator
uninitialized_fill_n(ExecutionPolicy&& policy, Iterator begin, Size n, const T& value);

template <typename ExecutionPolicy,
typename InputIt,
typename OutputIt,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
OutputIt
uninitialized_copy(ExecutionPolicy&& policy, InputIt begin, InputIt end, OutputIt output_begin);

template <typename ExecutionPolicy,
typename InputIt,
typename Size,
typename OutputIt,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
OutputIt
uninitialized_copy_n(ExecutionPolicy&& policy, InputIt begin, Size n, OutputIt output_begin);

template <typename ExecutionPolicy,
typename Iterator,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
void
destroy(ExecutionPolicy&& policy, Iterator first, Iterator last);

template <typename ExecutionPolicy,
typename Iterator,
typename Size,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
Iterator
destroy_n(ExecutionPolicy&& policy, Iterator first, Size n);

} // namespace adl_barrier

using namespace adl_barrier;
//! @endcond

/**
* \ingroup memory
* \brief Registers the given memory block into the internal memory size manger
Expand Down
20 changes: 20 additions & 0 deletions src/stdgpu/numeric.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
namespace stdgpu
{

#ifdef STDGPU_RUN_DOXYGEN

/**
* \ingroup numeric
* \brief Writes ascending values {values + i} to the i-th position of the given range
Expand All @@ -48,6 +50,24 @@ template <typename ExecutionPolicy,
void
iota(ExecutionPolicy&& policy, Iterator begin, Iterator end, T value);

#endif

//! @cond Doxygen_Suppress
namespace adl_barrier
{

template <typename ExecutionPolicy,
typename Iterator,
typename T,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
void
iota(ExecutionPolicy&& policy, Iterator begin, Iterator end, T value);

} // namespace adl_barrier

using namespace adl_barrier;
//! @endcond

/**
* \ingroup numeric
* \brief Calls the given unary function with an index from the range [0, size) and performs a reduction afterwards
Expand Down