Skip to content

Commit

Permalink
Merge pull request triSYCL#149 from keryell/h_item
Browse files Browse the repository at this point in the history
Partial h_item implementation to have the Parallel STL running
  • Loading branch information
keryell authored Jun 13, 2018
2 parents 85ca7ab + bdc06b6 commit 26ccc0d
Show file tree
Hide file tree
Showing 24 changed files with 451 additions and 216 deletions.
1 change: 1 addition & 0 deletions include/CL/sycl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#include "CL/sycl/exception.hpp"
#include "CL/sycl/group.hpp"
#include "CL/sycl/handler.hpp"
#include "CL/sycl/h_item.hpp"
#include "CL/sycl/id.hpp"
#include "CL/sycl/image.hpp"
#include "CL/sycl/item.hpp"
Expand Down
3 changes: 3 additions & 0 deletions include/CL/sycl/accessor/detail/accessor_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ class accessor_base {
#ifdef TRISYCL_OPENCL
/// Get the boost::compute::buffer or throw if unset
virtual boost::compute::buffer get_cl_buffer() const = 0;

// Virtual to call the real destructor
virtual ~accessor_base() {}
#endif

};
Expand Down
7 changes: 7 additions & 0 deletions include/CL/sycl/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@
#ifdef TRISYCL_OPENCL
#include "CL/sycl/device/detail/opencl_device.hpp"
#endif
#include "CL/sycl/id.hpp"
#include "CL/sycl/info/device.hpp"
#include "CL/sycl/device_selector.hpp"
#include "CL/sycl/opencl_types.hpp"
#include "CL/sycl/platform.hpp"

namespace cl {
Expand Down Expand Up @@ -261,6 +263,11 @@ inline auto device::get_info<info::device::max_work_group_size>() const {
return size_t { 8 };
}

template <>
inline auto device::get_info<info::device::max_work_item_sizes>() const {
return cl::sycl::id<3> { 128, 128, 128 };
}

template <>
inline auto device::get_info<info::device::max_compute_units>() const {
return size_t { 8 };
Expand Down
2 changes: 1 addition & 1 deletion include/CL/sycl/error_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct error_handler {
*/
static trisycl::default_error_handler default_handler;


// Virtual to call the real destructor
virtual ~error_handler() = 0;
};

Expand Down
29 changes: 7 additions & 22 deletions include/CL/sycl/group.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef TRISYCL_SYCL_GROUP_HPP
#define TRISYCL_SYCL_GROUP_HPP

/** \file The OpenCL SYCL nd_item<>
/** \file The OpenCL SYCL group<>
Ronan at Keryell point FR
Expand All @@ -13,6 +13,7 @@
#include <functional>

#include "CL/sycl/detail/linear_id.hpp"
#include "CL/sycl/h_item.hpp"
#include "CL/sycl/id.hpp"
#include "CL/sycl/nd_range.hpp"
#include "CL/sycl/range.hpp"
Expand All @@ -26,8 +27,8 @@ struct group;
namespace detail {

template <int Dimensions = 1, typename ParallelForFunctor>
void parallel_for_workitem(const group<Dimensions> &g,
ParallelForFunctor f);
void parallel_for_workitem_in_group(const group<Dimensions> &g,
ParallelForFunctor f);

}

Expand Down Expand Up @@ -175,26 +176,10 @@ struct group {


/** Loop on the work-items inside a work-group
\todo Add this method in the specification
*/
void parallel_for_work_item(std::function<void(nd_item<dimensionality>)> f)
const {
detail::parallel_for_workitem(*this, f);
}


/** Loop on the work-items inside a work-group
\todo Add this method in the specification
*/
void parallel_for_work_item(std::function<void(item<dimensionality>)> f)
*/
void parallel_for_work_item(std::function<void(h_item<dimensionality>)> f)
const {
auto item_adapter = [=] (nd_item<dimensionality> ndi) {
item<dimensionality> i = ndi.get_item();
f(i);
};
detail::parallel_for_workitem(*this, item_adapter);
detail::parallel_for_workitem_in_group(*this, f);
}

};
Expand Down
246 changes: 246 additions & 0 deletions include/CL/sycl/h_item.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
#ifndef TRISYCL_SYCL_H_ITEM_HPP
#define TRISYCL_SYCL_H_ITEM_HPP

/** \file The OpenCL SYCL h_item<>
Ronan at Keryell point FR
This file is distributed under the University of Illinois Open Source
License. See LICENSE.TXT for details.
*/

#include <cstddef>

#include "CL/sycl/access.hpp"
#include "CL/sycl/detail/linear_id.hpp"
#include "CL/sycl/detail/unimplemented.hpp"
#include "CL/sycl/id.hpp"
#include "CL/sycl/item.hpp"
#include "CL/sycl/nd_range.hpp"
#include "CL/sycl/range.hpp"

namespace cl {
namespace sycl {

/** \addtogroup parallelism Expressing parallelism through kernels
@{
*/

/** \c h_item<int dimensions> identifies an instance of a \c
group::parallel_for_work_item function object executing at each
point in a local \c range<int dimensions> passed to a \c
parallel_for_work_item call or to the corresponding \c
parallel_for_work_group call if no range is passed to the \c
parallel_for_work_item call. It encapsulates enough information to
identify the work-item’s local and global items according to the
information given to \c parallel_for_work_group (physical ids) as
well as the work-item’s logical local items in the flexible
range. All returned items objects are offset-less. Instances of
the \c h_item<int dimensions> class are not user-constructible and
are passed by the runtime to each instance of the function object.
*/
template <int Dimensions = 1>
struct h_item {
/// \todo add this Boost::multi_array or STL concept to the
/// specification?
static constexpr auto dimensionality = Dimensions;

private:

id<Dimensions> global_index;
/* This is a cached value since it can be computed from global_index and
ND_range */
id<Dimensions> local_index;
nd_range<Dimensions> ND_range;

public:

/** Create an empty nd_item<> from an nd_range<>
\todo This is for the triSYCL implementation which is expected to
call set_global() and set_local() later. This should be hidden to
the user.
*/
h_item(nd_range<Dimensions> ndr) : ND_range { ndr } {}


/** Create a full nd_item
\todo This is for validation purpose. Hide this to the programmer
somehow
*/
h_item(id<Dimensions> global_index,
nd_range<Dimensions> ndr) :
global_index { global_index },
// Compute the local index using the offset and the group size
local_index
{ (global_index - ndr.get_offset())%id<Dimensions> { ndr.get_local() } },
ND_range { ndr }
{}


/** To be able to copy and assign nd_item, use default constructors too
\todo Make most of them protected, reserved to implementation
*/
h_item() = default;


/** Return the constituent global id representing the work-item's
position in the global iteration space
*/
id<Dimensions> get_global_id() const { return global_index; }


/** Return the constituent element of the global id representing the
work-item's position in the global iteration space in the given
dimension
*/
size_t get_global_id(int dimension) const {
return get_global_id()[dimension];
}


/** Return the flattened id of the current work-item after subtracting
the offset
*/
size_t get_global_linear_id() const {
return detail::linear_id(get_global_range(), get_global_id(), get_offset());
}


/** Return the constituent local id representing the work-item's
position within the current work-group
*/
id<Dimensions> get_local_id() const { return local_index; }


/** Return the constituent element of the local id representing the
work-item's position within the current work-group in the given
dimension
*/
size_t get_local_id(int dimension) const { return get_local_id()[dimension]; }


/** Return the flattened id of the current work-item within the current
work-group
*/
size_t get_local_linear_id() const {
return detail::linear_id(get_local_range(), get_local_id());
}


/** Return the constituent group group representing the work-group's
position within the overall nd_range
*/
id<Dimensions> get_group() const {
/* Convert get_local_range() to an id<> to remove ambiguity into using
implicit conversion either from range<> to id<> or the opposite */
return get_global_id()/id<Dimensions> { get_local_range() };
}


/** Return the constituent element of the group id representing the
work-group;s position within the overall nd_range in the given
dimension.
*/
size_t get_group(int dimension) const {
return get_group()[dimension];
}


/// Return the flattened id of the current work-group
size_t get_group_linear_id() const {
return detail::linear_id(get_num_groups(), get_group());
}


/// Return the number of groups in the nd_range
id<Dimensions> get_num_groups() const {
return get_nd_range().get_group();
}

/// Return the number of groups for dimension in the nd_range
size_t get_num_groups(int dimension) const {
return get_num_groups()[dimension];
}


/// Return a range<> representing the dimensions of the nd_range<>
range<Dimensions> get_global_range() const {
return get_nd_range().get_global();
}


/// Return a range<> representing the dimensions of the current work-group
range<Dimensions> get_local_range() const {
return get_nd_range().get_local();
}


/** Return an id<> representing the n-dimensional offset provided to the
constructor of the nd_range<> and that is added by the runtime to the
global-ID of each work-item
*/
id<Dimensions> get_offset() const { return get_nd_range().get_offset(); }


/// Return the nd_range<> of the current execution
nd_range<Dimensions> get_nd_range() const { return ND_range; }


/** Allows projection down to an item
\todo Add to the specification
*/
item<Dimensions> get_item() const {
return { get_global_range(), get_global_id(), get_offset() };
}


/** Execute a barrier with memory ordering on the local address space,
global address space or both based on the value of flag
The current work-item will wait at the barrier until all work-items
in the current work-group have reached the barrier.
In addition, the barrier performs a fence operation ensuring that all
memory accesses in the specified address space issued before the
barrier complete before those issued after the barrier
*/
void barrier(access::fence_space flag =
access::fence_space::global_and_local) const {
#if defined(_OPENMP) && !defined(TRISYCL_NO_BARRIER)
/* Use OpenMP barrier in the implementation with 1 OpenMP thread per
work-item of the work-group */
#pragma omp barrier
#else
// \todo To be implemented efficiently otherwise
detail::unimplemented();
#endif
}


// For the triSYCL implementation, need to set the local index
void set_local(id<Dimensions> Index) { local_index = Index; }


// For the triSYCL implementation, need to set the global index
void set_global(id<Dimensions> Index) { global_index = Index; }

};

/// @} End the parallelism Doxygen group

}
}

/*
# Some Emacs stuff:
### Local Variables:
### ispell-local-dictionary: "american"
### eval: (flyspell-prog-mode)
### End:
*/

#endif // TRISYCL_SYCL_H_ITEM_HPP
11 changes: 11 additions & 0 deletions include/CL/sycl/info/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
License. See LICENSE.TXT for details.
*/

#include "CL/sycl/id.hpp"
#include "CL/sycl/opencl_types.hpp"
#include "CL/sycl/info/param_traits.hpp"

namespace cl {
Expand Down Expand Up @@ -189,6 +191,15 @@ using device_queue_properties = unsigned int;
\todo To be implemented, return always void.
*/
TRISYCL_INFO_PARAM_TRAITS_ANY_T(info::device, void)
TRISYCL_INFO_PARAM_TRAITS(info::device::device_type, info::device_type)
TRISYCL_INFO_PARAM_TRAITS(info::device::local_mem_size, cl::sycl::cl_ulong)
TRISYCL_INFO_PARAM_TRAITS(info::device::max_compute_units, cl::sycl::cl_uint)
TRISYCL_INFO_PARAM_TRAITS(info::device::max_mem_alloc_size, cl::sycl::cl_ulong)
TRISYCL_INFO_PARAM_TRAITS(info::device::max_work_group_size, std::size_t)
TRISYCL_INFO_PARAM_TRAITS(info::device::max_work_item_sizes, cl::sycl::id<3>)
TRISYCL_INFO_PARAM_TRAITS(info::device::name, string_class)
TRISYCL_INFO_PARAM_TRAITS(info::device::profile, string_class)
TRISYCL_INFO_PARAM_TRAITS(info::device::vendor, string_class)

}
}
Expand Down
Loading

0 comments on commit 26ccc0d

Please sign in to comment.