Merge pull request triSYCL#149 from keryell/h_item

Partial h_item implementation to have the Parallel STL running
ahonorat · Jun 13, 2018 · 26ccc0d · 26ccc0d
2 parents 85ca7ab + bdc06b6
commit 26ccc0d
Show file tree

Hide file tree

Showing 24 changed files with 451 additions and 216 deletions.
diff --git a/include/CL/sycl.hpp b/include/CL/sycl.hpp
@@ -47,6 +47,7 @@
 #include "CL/sycl/exception.hpp"
 #include "CL/sycl/group.hpp"
 #include "CL/sycl/handler.hpp"
+#include "CL/sycl/h_item.hpp"
 #include "CL/sycl/id.hpp"
 #include "CL/sycl/image.hpp"
 #include "CL/sycl/item.hpp"

diff --git a/include/CL/sycl/accessor/detail/accessor_base.hpp b/include/CL/sycl/accessor/detail/accessor_base.hpp
@@ -63,6 +63,9 @@ class accessor_base {
 #ifdef TRISYCL_OPENCL
   /// Get the boost::compute::buffer or throw if unset
   virtual boost::compute::buffer get_cl_buffer() const = 0;
+
+  // Virtual to call the real destructor
+  virtual ~accessor_base() {}
 #endif
 
 };

diff --git a/include/CL/sycl/device.hpp b/include/CL/sycl/device.hpp
@@ -23,8 +23,10 @@
 #ifdef TRISYCL_OPENCL
 #include "CL/sycl/device/detail/opencl_device.hpp"
 #endif
+#include "CL/sycl/id.hpp"
 #include "CL/sycl/info/device.hpp"
 #include "CL/sycl/device_selector.hpp"
+#include "CL/sycl/opencl_types.hpp"
 #include "CL/sycl/platform.hpp"
 
 namespace cl {
@@ -261,6 +263,11 @@ inline auto device::get_info<info::device::max_work_group_size>() const {
   return size_t { 8 };
 }
 
+template <>
+inline auto device::get_info<info::device::max_work_item_sizes>() const {
+  return cl::sycl::id<3> { 128, 128, 128 };
+}
+
 template <>
 inline auto device::get_info<info::device::max_compute_units>() const {
   return size_t { 8 };

diff --git a/include/CL/sycl/error_handler.hpp b/include/CL/sycl/error_handler.hpp
@@ -42,7 +42,7 @@ struct error_handler {
   */
   static trisycl::default_error_handler default_handler;
 
-
+  // Virtual to call the real destructor
   virtual ~error_handler() = 0;
 };
 

diff --git a/include/CL/sycl/group.hpp b/include/CL/sycl/group.hpp
@@ -1,7 +1,7 @@
 #ifndef TRISYCL_SYCL_GROUP_HPP
 #define TRISYCL_SYCL_GROUP_HPP
 
-/** \file The OpenCL SYCL nd_item<>
+/** \file The OpenCL SYCL group<>
 
     Ronan at Keryell point FR
 
@@ -13,6 +13,7 @@
 #include <functional>
 
 #include "CL/sycl/detail/linear_id.hpp"
+#include "CL/sycl/h_item.hpp"
 #include "CL/sycl/id.hpp"
 #include "CL/sycl/nd_range.hpp"
 #include "CL/sycl/range.hpp"
@@ -26,8 +27,8 @@ struct group;
 namespace detail {
 
 template <int Dimensions = 1, typename ParallelForFunctor>
-void parallel_for_workitem(const group<Dimensions> &g,
-                           ParallelForFunctor f);
+void parallel_for_workitem_in_group(const group<Dimensions> &g,
+                                    ParallelForFunctor f);
 
 }
 
@@ -175,26 +176,10 @@ struct group {
 
 
   /** Loop on the work-items inside a work-group
-
-      \todo Add this method in the specification
-  */
-  void parallel_for_work_item(std::function<void(nd_item<dimensionality>)> f)
-    const {
-    detail::parallel_for_workitem(*this, f);
-  }
-
-
-  /** Loop on the work-items inside a work-group
-
-      \todo Add this method in the specification
-  */
-  void parallel_for_work_item(std::function<void(item<dimensionality>)> f)
+   */
+  void parallel_for_work_item(std::function<void(h_item<dimensionality>)> f)
     const {
-    auto item_adapter = [=] (nd_item<dimensionality> ndi) {
-      item<dimensionality> i = ndi.get_item();
-      f(i);
-    };
-    detail::parallel_for_workitem(*this, item_adapter);
+    detail::parallel_for_workitem_in_group(*this, f);
   }
 
 };

diff --git a/include/CL/sycl/h_item.hpp b/include/CL/sycl/h_item.hpp
@@ -0,0 +1,246 @@
+#ifndef TRISYCL_SYCL_H_ITEM_HPP
+#define TRISYCL_SYCL_H_ITEM_HPP
+
+/** \file The OpenCL SYCL h_item<>
+
+    Ronan at Keryell point FR
+
+    This file is distributed under the University of Illinois Open Source
+    License. See LICENSE.TXT for details.
+*/
+
+#include <cstddef>
+
+#include "CL/sycl/access.hpp"
+#include "CL/sycl/detail/linear_id.hpp"
+#include "CL/sycl/detail/unimplemented.hpp"
+#include "CL/sycl/id.hpp"
+#include "CL/sycl/item.hpp"
+#include "CL/sycl/nd_range.hpp"
+#include "CL/sycl/range.hpp"
+
+namespace cl {
+namespace sycl {
+
+/** \addtogroup parallelism Expressing parallelism through kernels
+    @{
+*/
+
+/** \c h_item<int dimensions> identifies an instance of a \c
+    group::parallel_for_work_item function object executing at each
+    point in a local \c range<int dimensions> passed to a \c
+    parallel_for_work_item call or to the corresponding \c
+    parallel_for_work_group call if no range is passed to the \c
+    parallel_for_work_item call. It encapsulates enough information to
+    identify the work-item’s local and global items according to the
+    information given to \c parallel_for_work_group (physical ids) as
+    well as the work-item’s logical local items in the flexible
+    range. All returned items objects are offset-less. Instances of
+    the \c h_item<int dimensions> class are not user-constructible and
+    are passed by the runtime to each instance of the function object.
+*/
+template <int Dimensions = 1>
+struct h_item {
+  /// \todo add this Boost::multi_array or STL concept to the
+  /// specification?
+  static constexpr auto dimensionality = Dimensions;
+
+private:
+
+  id<Dimensions> global_index;
+  /* This is a cached value since it can be computed from global_index and
+     ND_range */
+  id<Dimensions> local_index;
+  nd_range<Dimensions> ND_range;
+
+public:
+
+  /** Create an empty nd_item<> from an nd_range<>
+
+      \todo This is for the triSYCL implementation which is expected to
+      call set_global() and set_local() later. This should be hidden to
+      the user.
+  */
+  h_item(nd_range<Dimensions> ndr) : ND_range { ndr } {}
+
+
+  /** Create a full nd_item
+
+      \todo This is for validation purpose. Hide this to the programmer
+      somehow
+  */
+  h_item(id<Dimensions> global_index,
+          nd_range<Dimensions> ndr) :
+    global_index { global_index },
+    // Compute the local index using the offset and the group size
+    local_index
+      { (global_index - ndr.get_offset())%id<Dimensions> { ndr.get_local() } },
+    ND_range { ndr }
+  {}
+
+
+  /** To be able to copy and assign nd_item, use default constructors too
+
+      \todo Make most of them protected, reserved to implementation
+  */
+  h_item() = default;
+
+
+  /** Return the constituent global id representing the work-item's
+      position in the global iteration space
+  */
+  id<Dimensions> get_global_id() const { return global_index; }
+
+
+  /** Return the constituent element of the global id representing the
+      work-item's position in the global iteration space in the given
+      dimension
+  */
+  size_t get_global_id(int dimension) const {
+    return get_global_id()[dimension];
+  }
+
+
+  /** Return the flattened id of the current work-item after subtracting
+      the offset
+  */
+  size_t get_global_linear_id() const {
+    return detail::linear_id(get_global_range(), get_global_id(), get_offset());
+  }
+
+
+  /** Return the constituent local id representing the work-item's
+      position within the current work-group
+  */
+  id<Dimensions> get_local_id() const { return local_index; }
+
+
+  /** Return the constituent element of the local id representing the
+      work-item's position within the current work-group in the given
+      dimension
+  */
+  size_t get_local_id(int dimension) const { return get_local_id()[dimension]; }
+
+
+  /** Return the flattened id of the current work-item within the current
+      work-group
+   */
+  size_t get_local_linear_id() const {
+    return detail::linear_id(get_local_range(), get_local_id());
+  }
+
+
+  /** Return the constituent group group representing the work-group's
+      position within the overall nd_range
+  */
+  id<Dimensions> get_group() const {
+    /* Convert get_local_range() to an id<> to remove ambiguity into using
+       implicit conversion either from range<> to id<> or the opposite */
+    return get_global_id()/id<Dimensions> { get_local_range() };
+  }
+
+
+  /** Return the constituent element of the group id representing the
+      work-group;s position within the overall nd_range in the given
+      dimension.
+  */
+  size_t get_group(int dimension) const {
+    return get_group()[dimension];
+  }
+
+
+  /// Return the flattened id of the current work-group
+  size_t get_group_linear_id() const {
+    return detail::linear_id(get_num_groups(), get_group());
+  }
+
+
+  /// Return the number of groups in the nd_range
+  id<Dimensions> get_num_groups() const {
+    return get_nd_range().get_group();
+  }
+
+  /// Return the number of groups for dimension in the nd_range
+  size_t get_num_groups(int dimension) const {
+     return get_num_groups()[dimension];
+  }
+
+
+  /// Return a range<> representing the dimensions of the nd_range<>
+  range<Dimensions> get_global_range() const {
+    return get_nd_range().get_global();
+  }
+
+
+  /// Return a range<> representing the dimensions of the current work-group
+  range<Dimensions> get_local_range() const {
+    return get_nd_range().get_local();
+  }
+
+
+  /** Return an id<> representing the n-dimensional offset provided to the
+      constructor of the nd_range<> and that is added by the runtime to the
+      global-ID of each work-item
+  */
+  id<Dimensions> get_offset() const { return get_nd_range().get_offset(); }
+
+
+  /// Return the nd_range<> of the current execution
+  nd_range<Dimensions> get_nd_range() const { return ND_range; }
+
+
+  /** Allows projection down to an item
+
+      \todo Add to the specification
+  */
+  item<Dimensions> get_item() const {
+    return { get_global_range(), get_global_id(), get_offset() };
+  }
+
+
+  /** Execute a barrier with memory ordering on the local address space,
+      global address space or both based on the value of flag
+
+      The current work-item will wait at the barrier until all work-items
+      in the current work-group have reached the barrier.
+
+      In addition, the barrier performs a fence operation ensuring that all
+      memory accesses in the specified address space issued before the
+      barrier complete before those issued after the barrier
+  */
+  void barrier(access::fence_space flag =
+               access::fence_space::global_and_local) const {
+#if defined(_OPENMP) && !defined(TRISYCL_NO_BARRIER)
+    /* Use OpenMP barrier in the implementation with 1 OpenMP thread per
+       work-item of the work-group */
+#pragma omp barrier
+#else
+    // \todo To be implemented efficiently otherwise
+    detail::unimplemented();
+#endif
+  }
+
+
+  // For the triSYCL implementation, need to set the local index
+  void set_local(id<Dimensions> Index) { local_index = Index; }
+
+
+  // For the triSYCL implementation, need to set the global index
+  void set_global(id<Dimensions> Index) { global_index = Index; }
+
+};
+
+/// @} End the parallelism Doxygen group
+
+}
+}
+
+/*
+    # Some Emacs stuff:
+    ### Local Variables:
+    ### ispell-local-dictionary: "american"
+    ### eval: (flyspell-prog-mode)
+    ### End:
+*/
+
+#endif // TRISYCL_SYCL_H_ITEM_HPP
diff --git a/include/CL/sycl/info/device.hpp b/include/CL/sycl/info/device.hpp
@@ -9,6 +9,8 @@
     License. See LICENSE.TXT for details.
 */
 
+#include "CL/sycl/id.hpp"
+#include "CL/sycl/opencl_types.hpp"
 #include "CL/sycl/info/param_traits.hpp"
 
 namespace cl {
@@ -189,6 +191,15 @@ using device_queue_properties = unsigned int;
     \todo To be implemented, return always void.
 */
 TRISYCL_INFO_PARAM_TRAITS_ANY_T(info::device, void)
+TRISYCL_INFO_PARAM_TRAITS(info::device::device_type, info::device_type)
+TRISYCL_INFO_PARAM_TRAITS(info::device::local_mem_size, cl::sycl::cl_ulong)
+TRISYCL_INFO_PARAM_TRAITS(info::device::max_compute_units, cl::sycl::cl_uint)
+TRISYCL_INFO_PARAM_TRAITS(info::device::max_mem_alloc_size, cl::sycl::cl_ulong)
+TRISYCL_INFO_PARAM_TRAITS(info::device::max_work_group_size, std::size_t)
+TRISYCL_INFO_PARAM_TRAITS(info::device::max_work_item_sizes, cl::sycl::id<3>)
+TRISYCL_INFO_PARAM_TRAITS(info::device::name, string_class)
+TRISYCL_INFO_PARAM_TRAITS(info::device::profile, string_class)
+TRISYCL_INFO_PARAM_TRAITS(info::device::vendor, string_class)
 
 }
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -42,7 +42,7 @@ struct error_handler { @@
       */
       static trisycl::default_error_handler default_handler;
+      // Virtual to call the real destructor
       virtual ~error_handler() = 0;
     };
@@ Expand Down @@