diff --git a/include/CL/sycl/group.hpp b/include/CL/sycl/group.hpp index 9a8b03f66..a87de1eae 100644 --- a/include/CL/sycl/group.hpp +++ b/include/CL/sycl/group.hpp @@ -27,8 +27,8 @@ struct group; namespace detail { template -void parallel_for_workitem(const group &g, - ParallelForFunctor f); +void parallel_for_workitem_in_group(const group &g, + ParallelForFunctor f); } @@ -179,7 +179,7 @@ struct group { */ void parallel_for_work_item(std::function)> f) const { - detail::parallel_for_workitem(*this, f); + detail::parallel_for_workitem_in_group(*this, f); } }; diff --git a/include/CL/sycl/parallelism/detail/parallelism.hpp b/include/CL/sycl/parallelism/detail/parallelism.hpp index 09d30c77e..625c7bff2 100644 --- a/include/CL/sycl/parallelism/detail/parallelism.hpp +++ b/include/CL/sycl/parallelism/detail/parallelism.hpp @@ -5,6 +5,8 @@ Implement the detail of the parallel constructions to launch kernels + \todo Refactor this file + Ronan at keryell dot FR This file is distributed under the University of Illinois Open Source @@ -15,6 +17,7 @@ #include #include "CL/sycl/group.hpp" +#include "CL/sycl/h_item.hpp" #include "CL/sycl/id.hpp" #include "CL/sycl/item.hpp" #include "CL/sycl/nd_item.hpp" @@ -228,6 +231,7 @@ void parallel_for(nd_range r, // Then iterate on the local work-groups cl::sycl::group wg {g, r}; parallel_for_workitem, decltype(f)>(wg, f); }; @@ -240,10 +244,10 @@ void parallel_for(nd_range r, id local; range local_range = r.get_local(); - // Reconstruct the nd_item from its group and local id + // Reconstruct the item from its group and local id auto reconstruct_item = [&] (id l) { //local.display(); - // Reconstruct the global nd_item + // Reconstruct the global item index.set_local(local); // Upgrade local_range to an id<> so that we can * with the group (an id<>) index.set_global(local + id(local_range)*group); @@ -299,7 +303,7 @@ void parallel_for_workgroup(nd_range r, \todo Better type the functor */ -template +template void parallel_for_workitem(const group &g, ParallelForFunctor f) { #if defined(_OPENMP) && (!defined(TRISYCL_NO_BARRIER) && !defined(_MSC_VER)) @@ -320,7 +324,7 @@ void parallel_for_workitem(const group &g, } #pragma omp parallel num_threads(tot) { - nd_item index { g.get_nd_range() }; + T_Item index { g.get_nd_range() }; id local; // to initialize correctly #pragma omp for nowait for (std::size_t th_id = 0; th_id < tot; ++th_id) { @@ -341,15 +345,15 @@ void parallel_for_workitem(const group &g, } #else // In a sequential execution there is only one index processed at a time - h_item index { g.get_nd_range() }; + T_Item index { g.get_nd_range() }; // To iterate on the local work-item id local; - // Reconstruct the nd_item from its group and local id + // Reconstruct the item from its group and local id auto reconstruct_item = [&] (id l) { //local.display(); //l.display(); - // Reconstruct the global nd_item + // Reconstruct the global item index.set_local(local); // \todo Some strength reduction here index.set_global(local + id(g.get_local_range())*g.get_id()); @@ -367,6 +371,19 @@ void parallel_for_workitem(const group &g, local }; #endif } + + +/** Implement the loop on the work-items inside a work-group +*/ +template +void parallel_for_workitem_in_group(const group &g, + ParallelForFunctor f) { + parallel_for_workitem, + ParallelForFunctor>(g, f); +} + + /// @} End the parallelism Doxygen group } // namespace detail