From c8061ab21ad2f3d6a0e058a56c98b5bb968acf41 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 8 Jun 2023 13:27:20 +0100 Subject: [PATCH 01/20] mem_host_flags: use size_t for element count (#1755) More recent GCC versions (e.g. 12.2, 13.1) report that the argument to `new[]` in the `Init` methods exceeds the maximum object size, seemingly related to the negative range of the widened `int`. Use an unsigned type to avoid the warning and propagate the signedness change to other uses of the `num_elements` member. Fixes https://github.com/KhronosGroup/OpenCL-CTS/issues/1582 Signed-off-by: Sven van Haastregt --- .../mem_host_flags/C_host_memory_block.h | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/test_conformance/mem_host_flags/C_host_memory_block.h b/test_conformance/mem_host_flags/C_host_memory_block.h index 78692d17f6..0784c2c2f8 100644 --- a/test_conformance/mem_host_flags/C_host_memory_block.h +++ b/test_conformance/mem_host_flags/C_host_memory_block.h @@ -24,14 +24,14 @@ template class C_host_memory_block { public: - int num_elements; + size_t num_elements; int element_size; T *pData; C_host_memory_block(); ~C_host_memory_block(); - void Init(int num_elem, T &value); - void Init(int num_elem); + void Init(size_t num_elem, T &value); + void Init(size_t num_elem); void Set_to(T &val); void Set_to_zero(); bool Equal_to(T &val); @@ -40,7 +40,7 @@ template class C_host_memory_block { bool Equal_rect(C_host_memory_block &another, size_t *host_origin, size_t *region, size_t host_row_pitch, size_t host_slice_pitch); - bool Equal(T *pData, int num_elements); + bool Equal(T *pData, size_t num_elements); bool Equal_rect_from_orig(C_host_memory_block &another, size_t *soffset, size_t *region, size_t host_row_pitch, @@ -63,20 +63,20 @@ template C_host_memory_block::~C_host_memory_block() num_elements = 0; } -template void C_host_memory_block::Init(int num_elem, T &value) +template void C_host_memory_block::Init(size_t num_elem, T &value) { if (pData != NULL) delete[] pData; pData = new T[num_elem]; - for (int i = 0; i < num_elem; i++) pData[i] = value; + for (size_t i = 0; i < num_elem; i++) pData[i] = value; num_elements = num_elem; } -template void C_host_memory_block::Init(int num_elem) +template void C_host_memory_block::Init(size_t num_elem) { if (pData != NULL) delete[] pData; pData = new T[num_elem]; - for (int i = 0; i < num_elem; i++) pData[i] = (T)i; + for (size_t i = 0; i < num_elem; i++) pData[i] = (T)i; num_elements = num_elem; } @@ -88,14 +88,14 @@ template void C_host_memory_block::Set_to_zero() template void C_host_memory_block::Set_to(T &val) { - for (int i = 0; i < num_elements; i++) pData[i] = val; + for (size_t i = 0; i < num_elements; i++) pData[i] = val; } template bool C_host_memory_block::Equal_to(T &val) { - int count = 0; + size_t count = 0; - for (int i = 0; i < num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { if (pData[i] == val) count++; } @@ -106,9 +106,9 @@ template bool C_host_memory_block::Equal_to(T &val) template bool C_host_memory_block::Equal(C_host_memory_block &another) { - int count = 0; + size_t count = 0; - for (int i = 0; i < num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { if (pData[i] == another.pData[i]) count++; } @@ -117,13 +117,13 @@ bool C_host_memory_block::Equal(C_host_memory_block &another) } template -bool C_host_memory_block::Equal(T *pIn_Data, int Innum_elements) +bool C_host_memory_block::Equal(T *pIn_Data, size_t Innum_elements) { if (this->num_elements != Innum_elements) return false; - int count = 0; + size_t count = 0; - for (int i = 0; i < num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { if (pData[i] == pIn_Data[i]) count++; } @@ -134,7 +134,7 @@ bool C_host_memory_block::Equal(T *pIn_Data, int Innum_elements) template size_t C_host_memory_block::Count(T &val) { size_t count = 0; - for (int i = 0; i < num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { if (pData[i] == val) count++; } From 475a37abbfa22a55fe47bf76d5c7904b3a37730a Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Fri, 9 Jun 2023 11:25:20 +0100 Subject: [PATCH 02/20] [NFC] Do not use reserved names for include guards (#1737) Names that begin with an underscore followed by an uppercase letter are reserved for the C++ implementation. Signed-off-by: Sven van Haastregt --- test_common/harness/compat.h | 6 +++--- test_common/harness/crc32.h | 4 ++-- test_conformance/c11_atomics/common.h | 6 +++--- test_conformance/c11_atomics/host_atomics.h | 6 +++--- test_conformance/d3d10/harness.h | 4 ++-- .../cl_khr_command_buffer/basic_command_buffer.h | 6 +++--- .../mutable_command_basic.h | 6 +++--- .../cl_khr_command_buffer_mutable_dispatch/procs.h | 6 +++--- .../cl_khr_command_buffer/command_buffer_test_base.h | 6 +++--- test_conformance/extensions/cl_khr_command_buffer/procs.h | 6 +++--- .../extensions/cl_khr_external_semaphore/procs.h | 6 +++--- .../images/kernel_read_write/test_cl_ext_image_buffer.hpp | 6 +++--- .../non_uniform_work_group/TestNonUniformWorkGroup.h | 7 +++---- test_conformance/non_uniform_work_group/tools.h | 6 +++--- test_conformance/pipes/kernels.h | 6 +++--- test_conformance/relationals/test_comparisons_fp.h | 6 +++--- 16 files changed, 46 insertions(+), 47 deletions(-) diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h index 4053b7ee72..a42f29172d 100644 --- a/test_common/harness/compat.h +++ b/test_common/harness/compat.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _COMPAT_H_ -#define _COMPAT_H_ +#ifndef COMPAT_H_ +#define COMPAT_H_ #if defined(_WIN32) && defined(_MSC_VER) #include @@ -398,4 +398,4 @@ EXTERN_C int __builtin_clz(unsigned int pattern); #define sleep(sec) Sleep((sec)*1000) #endif -#endif // _COMPAT_H_ +#endif // COMPAT_H_ diff --git a/test_common/harness/crc32.h b/test_common/harness/crc32.h index 65ca15eea1..6958701108 100644 --- a/test_common/harness/crc32.h +++ b/test_common/harness/crc32.h @@ -15,8 +15,8 @@ Agreement or Khronos Conformance Test Source License Agreement as executed between Khronos and the recipient. ******************************************************************/ -#ifndef _CRC32_H_ -#define _CRC32_H_ +#ifndef CRC32_H_ +#define CRC32_H_ #include #include diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 6c7d0b12bf..37c37e874f 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _COMMON_H_ -#define _COMMON_H_ +#ifndef COMMON_H_ +#define COMMON_H_ #include "harness/testHarness.h" #include "harness/typeWrappers.h" @@ -1567,4 +1567,4 @@ int CBasicTest::ExecuteSingleTest( return 0; } -#endif //_COMMON_H_ +#endif // COMMON_H_ diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index 6c4e783aa1..b865970f44 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _HOST_ATOMICS_H_ -#define _HOST_ATOMICS_H_ +#ifndef HOST_ATOMICS_H_ +#define HOST_ATOMICS_H_ #include "harness/testHarness.h" @@ -247,4 +247,4 @@ CorrespondingType host_atomic_fetch_max(volatile AtomicType *a, CorrespondingTyp bool host_atomic_flag_test_and_set(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order); void host_atomic_flag_clear(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order); -#endif //_HOST_ATOMICS_H_ +#endif // HOST_ATOMICS_H_ diff --git a/test_conformance/d3d10/harness.h b/test_conformance/d3d10/harness.h index 184e52cb5b..afeb4966a8 100644 --- a/test_conformance/d3d10/harness.h +++ b/test_conformance/d3d10/harness.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _HARNESS_H_ -#define _HARNESS_H_ +#ifndef HARNESS_H_ +#define HARNESS_H_ #define _CRT_SECURE_NO_WARNINGS diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h index b1d36024c0..44f4cc6307 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef _CL_KHR_BASIC_COMMAND_BUFFER_H -#define _CL_KHR_BASIC_COMMAND_BUFFER_H +#ifndef CL_KHR_BASIC_COMMAND_BUFFER_H +#define CL_KHR_BASIC_COMMAND_BUFFER_H #include "command_buffer_test_base.h" #include "harness/typeWrappers.h" @@ -99,4 +99,4 @@ int MakeAndRunTest(cl_device_id device, cl_context context, return TEST_PASS; } -#endif // _CL_KHR_BASIC_COMMAND_BUFFER_H +#endif // CL_KHR_BASIC_COMMAND_BUFFER_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h index 9056a00d90..966695834b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef _CL_KHR_MUTABLE_COMMAND_BASIC_H -#define _CL_KHR_MUTABLE_COMMAND_BASIC_H +#ifndef CL_KHR_MUTABLE_COMMAND_BASIC_H +#define CL_KHR_MUTABLE_COMMAND_BASIC_H #include "../basic_command_buffer.h" #include "../command_buffer_test_base.h" @@ -104,4 +104,4 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest const size_t global_work_size = 4 * sizeof(cl_int); }; -#endif //_CL_KHR_MUTABLE_COMMAND_BASIC_H \ No newline at end of file +#endif // CL_KHR_MUTABLE_COMMAND_BASIC_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h index 08512caef5..4b6dacb699 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H -#define _CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H +#ifndef CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H +#define CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H #include @@ -59,4 +59,4 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); -#endif /*_CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H*/ +#endif // CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h index 0fd2e4ec70..48abe25d70 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef _CL_KHR_COMMAND_BUFFER_TEST_BASE_H -#define _CL_KHR_COMMAND_BUFFER_TEST_BASE_H +#ifndef CL_KHR_COMMAND_BUFFER_TEST_BASE_H +#define CL_KHR_COMMAND_BUFFER_TEST_BASE_H #include #include "harness/deviceInfo.h" @@ -174,4 +174,4 @@ class clCommandBufferWrapper { } -#endif // _CL_KHR_COMMAND_BUFFER_TEST_BASE_H +#endif // CL_KHR_COMMAND_BUFFER_TEST_BASE_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h index 63e004a7b4..53a7d93490 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _CL_KHR_COMMAND_BUFFER_PROCS_H -#define _CL_KHR_COMMAND_BUFFER_PROCS_H +#ifndef CL_KHR_COMMAND_BUFFER_PROCS_H +#define CL_KHR_COMMAND_BUFFER_PROCS_H #include @@ -131,4 +131,4 @@ extern int test_event_info_reference_count(cl_device_id device, cl_command_queue queue, int num_elements); -#endif /*_CL_KHR_COMMAND_BUFFER_PROCS_H*/ +#endif // CL_KHR_COMMAND_BUFFER_PROCS_H diff --git a/test_conformance/extensions/cl_khr_external_semaphore/procs.h b/test_conformance/extensions/cl_khr_external_semaphore/procs.h index 753c8fe227..7e1c4caf3a 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore/procs.h +++ b/test_conformance/extensions/cl_khr_external_semaphore/procs.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H -#define _CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H +#ifndef CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H +#define CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H #include @@ -79,4 +79,4 @@ extern int test_external_semaphores_invalid_command(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -#endif /* CL_KHR_EXTERNAL_SEMAPHORE */ +#endif // CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp index c6646330b8..56d15808d9 100644 --- a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp +++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp @@ -14,8 +14,8 @@ // limitations under the License. // -#ifndef _TEST_CL_EXT_IMAGE_BUFFER -#define _TEST_CL_EXT_IMAGE_BUFFER +#ifndef TEST_CL_EXT_IMAGE_BUFFER +#define TEST_CL_EXT_IMAGE_BUFFER #define TEST_IMAGE_SIZE 20 @@ -121,4 +121,4 @@ static inline void image_desc_init(cl_image_desc* desc, } } -#endif /* _TEST_CL_EXT_IMAGE_BUFFER */ \ No newline at end of file +#endif // TEST_CL_EXT_IMAGE_BUFFER diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h index 414d10047c..f584606148 100644 --- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h +++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _TESTNONUNIFORMWORKGROUP_H -#define _TESTNONUNIFORMWORKGROUP_H +#ifndef TESTNONUNIFORMWORKGROUP_H +#define TESTNONUNIFORMWORKGROUP_H #include "procs.h" #include @@ -147,5 +147,4 @@ class SubTestExecutor { unsigned int _overallCounter; }; -#endif // _TESTNONUNIFORMWORKGROUP_H - +#endif // TESTNONUNIFORMWORKGROUP_H diff --git a/test_conformance/non_uniform_work_group/tools.h b/test_conformance/non_uniform_work_group/tools.h index 2e63c3ddeb..ba01fc991b 100644 --- a/test_conformance/non_uniform_work_group/tools.h +++ b/test_conformance/non_uniform_work_group/tools.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _TOOLS_H -#define _TOOLS_H +#ifndef TOOLS_H +#define TOOLS_H #include "procs.h" #include @@ -106,4 +106,4 @@ namespace Error { }; } -#endif // _TOOLS_H +#endif // TOOLS_H diff --git a/test_conformance/pipes/kernels.h b/test_conformance/pipes/kernels.h index a2fb70c059..a897e5e848 100644 --- a/test_conformance/pipes/kernels.h +++ b/test_conformance/pipes/kernels.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _KERNELS_H_ -#define _KERNELS_H_ +#ifndef KERNELS_H_ +#define KERNELS_H_ static const char* pipe_readwrite_struct_kernel_code = { "typedef struct{\n" @@ -127,4 +127,4 @@ static const char* pipe_convenience_readwrite_struct_kernel_code = { " read_pipe(in_pipe, &dst[gid]);\n" "}\n" }; -#endif //_KERNELS_H_ +#endif // KERNELS_H_ diff --git a/test_conformance/relationals/test_comparisons_fp.h b/test_conformance/relationals/test_comparisons_fp.h index 66c62c2d13..3401163ea0 100644 --- a/test_conformance/relationals/test_comparisons_fp.h +++ b/test_conformance/relationals/test_comparisons_fp.h @@ -14,8 +14,8 @@ // limitations under the License. // -#ifndef _TEST_COMPARISONS_FP_H -#define _TEST_COMPARISONS_FP_H +#ifndef TEST_COMPARISONS_FP_H +#define TEST_COMPARISONS_FP_H #include #include @@ -225,4 +225,4 @@ int MakeAndRunTest(cl_device_id device, cl_context context, return TEST_PASS; } -#endif // _TEST_COMPARISONS_FP_H +#endif // TEST_COMPARISONS_FP_H From 095091bc5755fb3a239f049a6a8ade1d82169fc6 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 13 Jun 2023 08:39:22 +0200 Subject: [PATCH 03/20] Added cl_khr_fp16 extension support for test_vec_type_hint from basic (#1724) * Added cl_khr_fp16 extension support for test_vec_type_hint from basic (issue #142, basic) * Added correction to fix casting problem --- test_conformance/basic/test_vec_type_hint.cpp | 152 ++++++++++-------- 1 file changed, 85 insertions(+), 67 deletions(-) diff --git a/test_conformance/basic/test_vec_type_hint.cpp b/test_conformance/basic/test_vec_type_hint.cpp index 33168b1369..0ba105db63 100644 --- a/test_conformance/basic/test_vec_type_hint.cpp +++ b/test_conformance/basic/test_vec_type_hint.cpp @@ -13,28 +13,27 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include #include #include #include #include - +#include #include "procs.h" #include "harness/conversions.h" #include "harness/typeWrappers.h" - static const char *sample_kernel = { - "%s\n" // optional pragma string - "__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global int *src, __global int *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = src[tid];\n" - "\n" - "}\n" + "%s\n" + "__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global " + "int *src, __global int *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + " dst[tid] = src[tid];\n" + "\n" + "}\n" }; int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) @@ -42,66 +41,85 @@ int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_que int error; int vec_type_index, vec_size_index; - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble }; - const char *size_names[] = {"", "2", "4", "8", "16"}; - char *program_source; - - program_source = (char*)malloc(sizeof(char)*4096); + ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, + kLong, kULong, kFloat, kHalf, kDouble }; + const char *size_names[] = { "", "2", "4", "8", "16" }; + std::vector program_source(4096); + + for (vec_type_index = 0; + vec_type_index < sizeof(vecType) / sizeof(vecType[0]); vec_type_index++) + { + + if (vecType[vec_type_index] == kHalf + && !is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info( + "Extension cl_khr_fp16 not supported; skipping half tests.\n"); + continue; + } + else if (vecType[vec_type_index] == kDouble + && !is_extension_available(deviceID, "cl_khr_fp64")) + { + log_info( + "Extension cl_khr_fp64 not supported; skipping double tests.\n"); + continue; + } + else if ((vecType[vec_type_index] == kLong + || vecType[vec_type_index] == kULong) + && !gHasLong) + { + log_info( + "Extension cl_khr_int64 not supported; skipping long tests.\n"); + continue; + } - for (vec_type_index=0; vec_type_index<10; vec_type_index++) { - if (vecType[vec_type_index] == kDouble) { - if (!is_extension_available(deviceID, "cl_khr_fp64")) { - log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n"); - continue; + for (vec_size_index = 0; vec_size_index < 5; vec_size_index++) + { + clProgramWrapper program; + clKernelWrapper kernel; + clMemWrapper in, out; + size_t global[] = { 1, 1, 1 }; + + log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", + get_explicit_type_name(vecType[vec_type_index]), + size_names[vec_size_index]); + char extension[128] = { 0 }; + if (vecType[vec_type_index] == kDouble) + std::snprintf(extension, sizeof(extension), + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"); + else if (vecType[vec_type_index] == kHalf) + std::snprintf(extension, sizeof(extension), + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"); + + sprintf(program_source.data(), sample_kernel, extension, + get_explicit_type_name(vecType[vec_type_index]), + size_names[vec_size_index]); + + const char *src = &program_source.front(); + error = create_single_kernel_helper(context, &program, &kernel, 1, + &src, "sample_test"); + test_error(error, "create_single_kernel_helper failed"); + + in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * 10, + NULL, &error); + test_error(error, "clCreateBuffer failed"); + out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int) * 10, + NULL, &error); + test_error(error, "clCreateBuffer failed"); + + error = clSetKernelArg(kernel, 0, sizeof(in), &in); + test_error(error, "clSetKernelArg failed"); + error = clSetKernelArg(kernel, 1, sizeof(out), &out); + test_error(error, "clSetKernelArg failed"); + + error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, + 0, NULL, NULL); + test_error(error, "clEnqueueNDRangeKernel failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); } - log_info("Testing doubles.\n"); - } - - if (vecType[vec_type_index] == kLong || vecType[vec_type_index] == kULong) - { - if (!gHasLong) - { - log_info("Extension cl_khr_int64 not supported; skipping long tests.\n"); - continue; - } - } - - for (vec_size_index=0; vec_size_index<5; vec_size_index++) { - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper in, out; - size_t global[] = {1,1,1}; - - log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]); - - program_source[0] = '\0'; - sprintf(program_source, sample_kernel, - (vecType[vec_type_index] == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]); - - error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&program_source, "sample_test" ); - if( error != 0 ) - return error; - - in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*10, NULL, &error); - test_error(error, "clCreateBuffer failed"); - out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*10, NULL, &error); - test_error(error, "clCreateBuffer failed"); - - error = clSetKernelArg(kernel, 0, sizeof(in), &in); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 1, sizeof(out), &out); - test_error(error, "clSetKernelArg failed"); - - error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL); - test_error(error, "clEnqueueNDRangeKernel failed"); - - error = clFinish(queue); - test_error(error, "clFinish failed"); - } } - free(program_source); - return 0; } From 16a75dc0af2e0c55d27a91ffefd0aa1b97b3f484 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 13 Jun 2023 17:41:39 +0200 Subject: [PATCH 04/20] Added cl_khr_fp16 extension support for test_vector_creation from basic (#1728) * Added cl_khr_fp16 extension support for vector_creation test from basic * Added corrections related to vendor's review * Added protection to avoid similar creation cases * Added comment for recent correction * cosmetics * Corrected factor array to restore lost capacity of original test.. leaving only 16-sizes vector tests limited. --- .../basic/test_vector_creation.cpp | 489 +++++++++++------- 1 file changed, 294 insertions(+), 195 deletions(-) diff --git a/test_conformance/basic/test_vector_creation.cpp b/test_conformance/basic/test_vector_creation.cpp index d9530b4e9b..801c72b18b 100644 --- a/test_conformance/basic/test_vector_creation.cpp +++ b/test_conformance/basic/test_vector_creation.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -17,48 +17,41 @@ #include "harness/conversions.h" #include "harness/typeWrappers.h" #include "harness/errorHelpers.h" +#include - - +#include #define DEBUG 0 #define DEPTH 16 // Limit the maximum code size for any given kernel. -#define MAX_CODE_SIZE (1024*32) - -const int sizes[] = {1, 2, 3, 4, 8, 16, -1, -1, -1, -1}; -const char *size_names[] = {"", "2", "3", "4", "8", "16" , "!!a", "!!b", "!!c", "!!d"}; - -// Creates a kernel by enumerating all possible ways of building the vector out of vloads -// skip_to_results will skip results up to a given number. If the amount of code generated -// is greater than MAX_CODE_SIZE, this function will return the number of results used, -// which can then be used as the skip_to_result value to continue where it left off. -int create_kernel(ExplicitType type, int output_size, char *program, int *number_of_results, int skip_to_result) { +#define MAX_CODE_SIZE (1024 * 32) + +static const int sizes[] = { 1, 2, 3, 4, 8, 16, -1, -1, -1, -1 }; +static const int initial_no_sizes[] = { 0, 0, 0, 0, 0, 0, 2 }; +static const char *size_names[] = { "", "2", "3", "4", "8", + "16", "!!a", "!!b", "!!c", "!!d" }; +static char extension[128] = { 0 }; + +// Creates a kernel by enumerating all possible ways of building the vector out +// of vloads skip_to_results will skip results up to a given number. If the +// amount of code generated is greater than MAX_CODE_SIZE, this function will +// return the number of results used, which can then be used as the +// skip_to_result value to continue where it left off. +int create_kernel(ExplicitType type, int output_size, char *program, + int *number_of_results, int skip_to_result) +{ int number_of_sizes; - switch (output_size) { - case 1: - number_of_sizes = 1; - break; - case 2: - number_of_sizes = 2; - break; - case 3: - number_of_sizes = 3; - break; - case 4: - number_of_sizes = 4; - break; - case 8: - number_of_sizes = 5; - break; - case 16: - number_of_sizes = 6; - break; - default: - log_error("Invalid size: %d\n", output_size); - return -1; + switch (output_size) + { + case 1: number_of_sizes = 1; break; + case 2: number_of_sizes = 2; break; + case 3: number_of_sizes = 3; break; + case 4: number_of_sizes = 4; break; + case 8: number_of_sizes = 5; break; + case 16: number_of_sizes = 6; break; + default: log_error("Invalid size: %d\n", output_size); return -1; } int total_results = 0; @@ -67,102 +60,125 @@ int create_kernel(ExplicitType type, int output_size, char *program, int *number int total_program_length = 0; int aborted_due_to_size = 0; - if (skip_to_result < 0) - skip_to_result = 0; + if (skip_to_result < 0) skip_to_result = 0; // The line of code for the vector creation char line[1024]; - // Keep track of what size vector we are using in each position so we can iterate through all fo them + // Keep track of what size vector we are using in each position so we can + // iterate through all fo them int pos[DEPTH]; int max_size = output_size; if (DEBUG > 1) log_info("max_size: %d\n", max_size); program[0] = '\0'; - sprintf(program, "%s\n__kernel void test_vector_creation(__global %s *src, __global %s%s *result) {\n", - type == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name(type), get_explicit_type_name(type), ( number_of_sizes == 3 ) ? "" : size_names[number_of_sizes-1]); + sprintf(program, + "%s\n__kernel void test_vector_creation(__global %s *src, __global " + "%s%s *result) {\n", + extension, get_explicit_type_name(type), + get_explicit_type_name(type), + (number_of_sizes == 3) ? "" : size_names[number_of_sizes - 1]); total_program_length += (int)strlen(program); - char storePrefix[ 128 ], storeSuffix[ 128 ]; + char storePrefix[128], storeSuffix[128]; - // Start out trying sizes 1,1,1,1,1... - for (int i=0; i 1) { + while (!done) + { + if (DEBUG > 1) + { log_info("pos size[] = ["); - for (int k=0; k 1) log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far); + if (DEBUG > 1) + log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far); - // If they did not fit the required size exactly it is too long, so there is no point in checking any other combinations + // If they did not fit the required size exactly it is too long, so + // there is no point in checking any other combinations // of the sizes to the right. Prune them from the search. - if (size_so_far != max_size) { + if (size_so_far != max_size) + { // Zero all the sizes to the right - for (int k=vloads+1; k=0; d--) { + for (int d = vloads; d >= 0; d--) + { pos[d]++; - if (pos[d] >= number_of_sizes) { + if (pos[d] >= number_of_sizes) + { pos[d] = 0; - if (d == 0) { + if (d == 0) + { // If we rolled over then we are done done = 1; break; } - } else { + } + else + { break; } } - // Go on to the next size since this one (and all others "under" it) didn't fit + // Go on to the next size since this one (and all others "under" it) + // didn't fit continue; } // Generate the actual load line if we are building this part - line[0]= '\0'; - if (skip_to_result == 0 || total_results >= skip_to_result) { - if( number_of_sizes == 3 ) + line[0] = '\0'; + if (skip_to_result == 0 || total_results >= skip_to_result) + { + if (number_of_sizes == 3) { - sprintf( storePrefix, "vstore3( " ); - sprintf( storeSuffix, ", %d, result )", current_result ); + sprintf(storePrefix, "vstore3( "); + sprintf(storeSuffix, ", %d, result )", current_result); } else { - sprintf( storePrefix, "result[%d] = ", current_result ); - storeSuffix[ 0 ] = 0; + sprintf(storePrefix, "result[%d] = ", current_result); + storeSuffix[0] = 0; } - sprintf(line, "\t%s(%s%d)(", storePrefix, get_explicit_type_name(type), output_size); + sprintf(line, "\t%s(%s%d)(", storePrefix, + get_explicit_type_name(type), output_size); current_result++; int offset = 0; - for (int i=0; i MAX_CODE_SIZE) { + if (total_program_length > MAX_CODE_SIZE) + { aborted_due_to_size = 1; done = 1; } @@ -179,132 +196,194 @@ int create_kernel(ExplicitType type, int output_size, char *program, int *number if (DEBUG) log_info("line is: %s", line); - // If we did not use all of them, then we ignore any changes further to the right. - // We do this by causing those loops to skip on the next iteration. - if (vloads < DEPTH) { + // If we did not use all of them, then we ignore any changes further to + // the right. We do this by causing those loops to skip on the next + // iteration. + if (vloads < DEPTH) + { if (DEBUG > 1) log_info("done with this depth\n"); - for (int k=vloads; k=0; d--) { + for (int d = DEPTH - 1; d >= 0; d--) + { pos[d]++; - if (pos[d] >= number_of_sizes) { + if (pos[d] >= number_of_sizes) + { pos[d] = 0; - if (d == 0) { + if (d == 0) + { // If we rolled over at the far-left then we are done done = 1; break; } - } else { + } + else + { break; } } - if (done) - break; + if (done) break; // Continue until we are done. } - strcat(program, "}\n\n"); //log_info("%s\n", program); + strcat(program, "}\n\n"); // log_info("%s\n", program); total_program_length += 3; - if (DEBUG) log_info("\t\t(Program for vector type %s%s contains %d vector creations, of total program length %gkB, with a total of %d vloads.)\n", - get_explicit_type_name(type), size_names[number_of_sizes-1], total_results, total_program_length/1024.0, total_vloads); + if (DEBUG) + log_info( + "\t\t(Program for vector type %s%s contains %d vector creations, " + "of total program length %gkB, with a total of %d vloads.)\n", + get_explicit_type_name(type), size_names[number_of_sizes - 1], + total_results, total_program_length / 1024.0, total_vloads); *number_of_results = current_result; - if (aborted_due_to_size) - return total_results; + if (aborted_due_to_size) return total_results; return 0; } - - -int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_vector_creation(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble }; - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16}; + const std::vector vecType = { kChar, kUChar, kShort, kUShort, + kInt, kUInt, kLong, kULong, + kFloat, kHalf, kDouble }; + // should be in sync with global array size_names + const std::vector vecSizes = { 1, 2, 3, 4, 8, 16 }; - char *program_source; - int error; + int error = CL_SUCCESS; int total_errors = 0; + int number_of_results = 0; - cl_int input_data_int[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - cl_double input_data_double[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - void *input_data_converted; - void *output_data; - - int number_of_results;; - - input_data_converted = malloc(sizeof(cl_double)*16); - program_source = (char*)malloc(sizeof(char)*1024*1024*4); + std::vector input_data_converted(sizeof(cl_double) * 16); + std::vector program_source(sizeof(char) * 1024 * 1024 * 4); + std::vector output_data; // Iterate over all the types - for (int type_index=0; type_index<10; type_index++) { - if(!gHasLong && ((vecType[type_index] == kLong) || (vecType[type_index] == kULong))) + for (int type_index = 0; type_index < vecType.size(); type_index++) { - log_info("Long/ULong data type not supported on this device\n"); - continue; - } - - clMemWrapper input; - if (vecType[type_index] == kDouble) { - if (!is_extension_available(deviceID, "cl_khr_fp64")) { - log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n"); + if (!gHasLong + && ((vecType[type_index] == kLong) + || (vecType[type_index] == kULong))) + { + log_info("Long/ULong data type not supported on this device\n"); + continue; + } + else if (vecType[type_index] == kDouble) + { + if (!is_extension_available(deviceID, "cl_khr_fp64")) + { + log_info("Extension cl_khr_fp64 not supported; skipping double " + "tests.\n"); continue; } - log_info("Testing doubles.\n"); + snprintf(extension, sizeof(extension), "%s", + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"); } + else if (vecType[type_index] == kHalf) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info("Extension cl_khr_fp16 not supported; skipping half " + "tests.\n"); + continue; + } + snprintf(extension, sizeof(extension), "%s", + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"); + } + + log_info("Testing %s.\n", get_explicit_type_name(vecType[type_index])); // Convert the data to the right format for the test. - memset(input_data_converted, 0xff, sizeof(cl_double)*16); - if (vecType[type_index] != kDouble) { - for (int j=0; j<16; j++) { - convert_explicit_value(&input_data_int[j], ((char*)input_data_converted)+get_explicit_type_size(vecType[type_index])*j, - kInt, 0, kRoundToEven, vecType[type_index]); + memset(input_data_converted.data(), 0xff, sizeof(cl_double) * 16); + if (vecType[type_index] == kDouble) + { + const cl_double input_data_double[16] = { 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15 }; + memcpy(input_data_converted.data(), &input_data_double, + sizeof(cl_double) * 16); + } + else if (vecType[type_index] == kHalf) + { + cl_half *buf = + reinterpret_cast(input_data_converted.data()); + for (int j = 0; j < 16; j++) + buf[j] = cl_half_from_float(float(j), CL_HALF_RTE); + } + else + { + for (int j = 0; j < 16; j++) + { + convert_explicit_value( + &j, + ((char *)input_data_converted.data()) + + get_explicit_type_size(vecType[type_index]) * j, + kInt, 0, kRoundToEven, vecType[type_index]); } - } else { - memcpy(input_data_converted, &input_data_double, sizeof(cl_double)*16); } - input = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, get_explicit_type_size(vecType[type_index])*16, - (vecType[type_index] != kDouble) ? input_data_converted : input_data_double, &error); - if (error) { + clMemWrapper input = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + get_explicit_type_size(vecType[type_index]) * 16, + input_data_converted.data(), &error); + if (error) + { print_error(error, "clCreateBuffer failed"); total_errors++; continue; } // Iterate over all the vector sizes. - for (int size_index=1; size_index< 5; size_index++) { - size_t global[] = {1,1,1}; + for (int size_index = 1; size_index < vecSizes.size(); size_index++) + { + size_t global[] = { 1, 1, 1 }; int number_generated = -1; int previous_number_generated = 0; - log_info("Testing %s%s...\n", get_explicit_type_name(vecType[type_index]), size_names[size_index]); - while (number_generated != 0) { + log_info("Testing %s%s...\n", + get_explicit_type_name(vecType[type_index]), + size_names[size_index]); + while (number_generated != 0) + { clMemWrapper output; clKernelWrapper kernel; clProgramWrapper program; - number_generated = create_kernel(vecType[type_index], vecSizes[size_index], program_source, &number_of_results, number_generated); - if (number_generated != 0) { + number_generated = + create_kernel(vecType[type_index], vecSizes[size_index], + program_source.data(), &number_of_results, + number_generated); + if (number_generated != 0) + { if (previous_number_generated == 0) - log_info("Code size greater than %gkB; splitting test into multiple kernels.\n", MAX_CODE_SIZE/1024.0); - log_info("\tExecuting vector permutations %d to %d...\n", previous_number_generated, number_generated-1); + log_info("Code size greater than %gkB; splitting test " + "into multiple kernels.\n", + MAX_CODE_SIZE / 1024.0); + log_info("\tExecuting vector permutations %d to %d...\n", + previous_number_generated, number_generated - 1); } - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&program_source, "test_vector_creation"); - if (error) { + char *src = program_source.data(); + error = create_single_kernel_helper(context, &program, &kernel, + 1, (const char **)&src, + "test_vector_creation"); + if (error) + { log_error("create_single_kernel_helper failed.\n"); total_errors++; break; } - output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, - number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index], - NULL, &error); - if (error) { + output = clCreateBuffer( + context, CL_MEM_WRITE_ONLY, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index], + NULL, &error); + if (error) + { print_error(error, "clCreateBuffer failed"); total_errors++; break; @@ -312,95 +391,115 @@ int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_q error = clSetKernelArg(kernel, 0, sizeof(input), &input); error |= clSetKernelArg(kernel, 1, sizeof(output), &output); - if (error) { + if (error) + { print_error(error, "clSetKernelArg failed"); total_errors++; break; } - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL); - if (error) { + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, + NULL, 0, NULL, NULL); + if (error) + { print_error(error, "clEnqueueNDRangeKernel failed"); total_errors++; break; } error = clFinish(queue); - if (error) { + if (error) + { print_error(error, "clFinish failed"); total_errors++; break; } - output_data = malloc(number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]); - if (output_data == NULL) { - log_error("Failed to allocate memory for output data.\n"); - total_errors++; - break; - } - memset(output_data, 0xff, number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]); - error = clEnqueueReadBuffer(queue, output, CL_TRUE, 0, - number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index], - output_data, 0, NULL, NULL); - if (error) { + output_data.resize(number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index]); + memset(output_data.data(), 0xff, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index]); + error = clEnqueueReadBuffer( + queue, output, CL_TRUE, 0, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index], + output_data.data(), 0, NULL, NULL); + if (error) + { print_error(error, "clEnqueueReadBuffer failed"); total_errors++; - free(output_data); break; } // Check the results - char *res = (char *)output_data; - char *exp = (char *)input_data_converted; - for (int i=0; i Date: Fri, 16 Jun 2023 10:53:08 +0100 Subject: [PATCH 05/20] basic: fix unused-but-set variables (#1764) Remove the unused `numItems` variable. As this fixes all occurrences of this warning in test_basic, remove the suppression flag. Signed-off-by: Sven van Haastregt --- test_conformance/basic/CMakeLists.txt | 2 -- test_conformance/basic/test_work_item_functions.cpp | 3 --- 2 files changed, 5 deletions(-) diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index adf24bd80d..c07d32b661 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -70,6 +70,4 @@ if(APPLE) list(APPEND ${MODULE_NAME}_SOURCES test_queue_priority.cpp) endif(APPLE) -set_gnulike_module_compile_flags("-Wno-unused-but-set-variable") - include(../CMakeCommon.txt) diff --git a/test_conformance/basic/test_work_item_functions.cpp b/test_conformance/basic/test_work_item_functions.cpp index d95915cf53..9683a8342f 100644 --- a/test_conformance/basic/test_work_item_functions.cpp +++ b/test_conformance/basic/test_work_item_functions.cpp @@ -91,7 +91,6 @@ int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_comma { for( int i = 0; i < NUM_TESTS; i++ ) { - size_t numItems = 1; for( size_t j = 0; j < dim; j++ ) { // All of our thread sizes should be within the max local sizes, since they're all <= 20 @@ -100,8 +99,6 @@ int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_comma while( localThreads[ j ] > 1 && ( threads[ j ] % localThreads[ j ] != 0 ) ) localThreads[ j ]--; - numItems *= threads[ j ]; - // Hack for now: localThreads > 1 are iffy localThreads[ j ] = 1; } From 0e229b8f01afc9e16ca83234b656830c26f11215 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 20 Jun 2023 17:42:57 +0200 Subject: [PATCH 06/20] Added cl_khr_fp16 extension support for test_fpmath from basic (#1718) * Added half and double support for fpmath test from basic (issue #142, basic) * Cosmetic corrections due to code review * Removed unnecessary casting * Added corrections due to code review * Tuning range of input generation to avoid hitting infinity * Moved string helpers procedures due to request from test_commonfns PR #1695 --- .../harness/stringHelpers.h | 0 test_conformance/basic/CMakeLists.txt | 2 +- test_conformance/basic/main.cpp | 37 +- test_conformance/basic/procs.h | 10 +- test_conformance/basic/test_astype.cpp | 7 +- test_conformance/basic/test_fpmath.cpp | 386 ++++++++++++++++++ test_conformance/basic/test_fpmath_float.cpp | 196 --------- 7 files changed, 427 insertions(+), 211 deletions(-) rename test_conformance/basic/utils.h => test_common/harness/stringHelpers.h (100%) create mode 100644 test_conformance/basic/test_fpmath.cpp delete mode 100644 test_conformance/basic/test_fpmath_float.cpp diff --git a/test_conformance/basic/utils.h b/test_common/harness/stringHelpers.h similarity index 100% rename from test_conformance/basic/utils.h rename to test_common/harness/stringHelpers.h diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index c07d32b661..c89a93cf0e 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -2,7 +2,7 @@ set(MODULE_NAME BASIC) set(${MODULE_NAME}_SOURCES main.cpp - test_fpmath_float.cpp + test_fpmath.cpp test_intmath.cpp test_hiloeo.cpp test_local.cpp test_pointercast.cpp test_if.cpp test_loop.cpp diff --git a/test_conformance/basic/main.cpp b/test_conformance/basic/main.cpp index 86c3cec359..24262dbf99 100644 --- a/test_conformance/basic/main.cpp +++ b/test_conformance/basic/main.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2023 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,14 +22,15 @@ #include #include #include + +#include + #include "harness/testHarness.h" #include "procs.h" test_definition test_list[] = { ADD_TEST(hostptr), - ADD_TEST(fpmath_float), - ADD_TEST(fpmath_float2), - ADD_TEST(fpmath_float4), + ADD_TEST(fpmath), ADD_TEST(intmath_int), ADD_TEST(intmath_int2), ADD_TEST(intmath_int4), @@ -164,9 +165,35 @@ test_definition test_list[] = { }; const int test_num = ARRAY_SIZE( test_list ); +cl_half_rounding_mode halfRoundingMode = CL_HALF_RTE; + +test_status InitCL(cl_device_id device) +{ + if (is_extension_available(device, "cl_khr_fp16")) + { + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + halfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + halfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode"); + return TEST_FAIL; + } + } + + return TEST_PASS; +} int main(int argc, const char *argv[]) { - return runTestHarness(argc, argv, test_num, test_list, false, 0); + return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0, + InitCL); } diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h index c14340de34..9cbc373a3a 100644 --- a/test_conformance/basic/procs.h +++ b/test_conformance/basic/procs.h @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. // + #include "harness/kernelHelpers.h" #include "harness/testHarness.h" #include "harness/errorHelpers.h" @@ -21,9 +22,8 @@ #include "harness/rounding_mode.h" extern int test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_fpmath(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); extern int test_intmath_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_intmath_int2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_intmath_int4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); diff --git a/test_conformance/basic/test_astype.cpp b/test_conformance/basic/test_astype.cpp index 08a4cb85aa..45669a7cbd 100644 --- a/test_conformance/basic/test_astype.cpp +++ b/test_conformance/basic/test_astype.cpp @@ -14,6 +14,9 @@ // limitations under the License. // #include "harness/compat.h" +#include "harness/conversions.h" +#include "harness/stringHelpers.h" +#include "harness/typeWrappers.h" #include #include @@ -22,11 +25,7 @@ #include #include -#include "harness/conversions.h" -#include "harness/typeWrappers.h" - #include "procs.h" -#include "utils.h" // clang-format off diff --git a/test_conformance/basic/test_fpmath.cpp b/test_conformance/basic/test_fpmath.cpp new file mode 100644 index 0000000000..6719e72816 --- /dev/null +++ b/test_conformance/basic/test_fpmath.cpp @@ -0,0 +1,386 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "harness/compat.h" +#include "harness/rounding_mode.h" +#include "harness/stringHelpers.h" + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "procs.h" + +static const char *fp_kernel_code = R"( +%s +__kernel void test_fp(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *dst) +{ + int tid = get_global_id(0); + + dst[tid] = srcA[tid] OP srcB[tid]; +})"; + +extern cl_half_rounding_mode halfRoundingMode; + +#define HFF(num) cl_half_from_float(num, halfRoundingMode) +#define HTF(num) cl_half_to_float(num) + +template double toDouble(T val) +{ + if (std::is_same::value) + return HTF(val); + else + return val; +} + +bool isHalfNan(cl_half v) +{ + // Extract FP16 exponent and mantissa + uint16_t h_exp = (v >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = v & 0x3FF; + + // NaN test + return (h_exp == 0x1F && h_mant != 0); +} + +cl_half half_plus(cl_half a, cl_half b) +{ + return HFF(std::plus()(HTF(a), HTF(b))); +} + +cl_half half_minus(cl_half a, cl_half b) +{ + return HFF(std::minus()(HTF(a), HTF(b))); +} + +cl_half half_mult(cl_half a, cl_half b) +{ + return HFF(std::multiplies()(HTF(a), HTF(b))); +} + +template struct TestDef +{ + const char op; + std::function ref; + std::string type_str; + size_t vec_size; +}; + +template +int verify_fp(std::vector (&input)[2], std::vector &output, + const TestDef &test) +{ + auto &inA = input[0]; + auto &inB = input[1]; + for (int i = 0; i < output.size(); i++) + { + bool nan_test = false; + + T r = test.ref(inA[i], inB[i]); + + if (std::is_same::value) + nan_test = !(isHalfNan(r) && isHalfNan(output[i])); + + if (r != output[i] && nan_test) + { + log_error("FP math test for type: %s, vec size: %zu, failed at " + "index %d, %a '%c' %a, expected %a, get %a\n", + test.type_str.c_str(), test.vec_size, i, toDouble(inA[i]), + test.op, toDouble(inB[i]), toDouble(r), + toDouble(output[i])); + return -1; + } + } + + return 0; +} + +template void generate_random_inputs(std::vector (&input)[2]) +{ + RandomSeed seed(gRandomSeed); + + if (std::is_same::value) + { + auto random_generator = [&seed]() { + return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), + MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } + else if (std::is_same::value) + { + auto random_generator = [&seed]() { + return get_random_double(-MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63), + MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63), + seed); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } + else + { + auto random_generator = [&seed]() { + return HFF(get_random_float(-MAKE_HEX_FLOAT(0x1.0p8f, 0x1, 8), + MAKE_HEX_FLOAT(0x1.0p8f, 0x1, 8), + seed)); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } +} + +struct TypesIterator +{ + using TypeIter = std::tuple; + + TypesIterator(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elems) + : context(context), queue(queue), fpConfigHalf(0), fpConfigFloat(0), + num_elements(num_elems) + { + // typeid().name one day + type2name[sizeof(cl_half)] = "half"; + type2name[sizeof(cl_float)] = "float"; + type2name[sizeof(cl_double)] = "double"; + + fp16Support = is_extension_available(deviceID, "cl_khr_fp16"); + fp64Support = is_extension_available(deviceID, "cl_khr_fp64"); + + fpConfigFloat = get_default_rounding_mode(deviceID); + + if (fp16Support) + fpConfigHalf = + get_default_rounding_mode(deviceID, CL_DEVICE_HALF_FP_CONFIG); + + for_each_elem(it); + } + + template int test_fpmath(TestDef &test) + { + constexpr size_t vecSizes[] = { 1, 2, 4, 8, 16 }; + cl_int err = CL_SUCCESS; + + std::ostringstream sstr; + if (std::is_same::value) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + + if (std::is_same::value) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + + std::string program_source = + str_sprintf(std::string(fp_kernel_code), sstr.str().c_str()); + + for (unsigned i = 0; i < ARRAY_SIZE(vecSizes); i++) + { + test.vec_size = vecSizes[i]; + + std::ostringstream vecNameStr; + vecNameStr << test.type_str; + if (test.vec_size != 1) vecNameStr << test.vec_size; + + clMemWrapper streams[3]; + clProgramWrapper program; + clKernelWrapper kernel; + + size_t length = sizeof(T) * num_elements * test.vec_size; + + bool isRTZ = false; + RoundingMode oldMode = kDefaultRoundingMode; + + + // If we only support rtz mode + if (std::is_same::value) + { + if (CL_FP_ROUND_TO_ZERO == fpConfigHalf) + { + isRTZ = true; + oldMode = get_round(); + } + } + else if (std::is_same::value) + { + if (CL_FP_ROUND_TO_ZERO == fpConfigFloat) + { + isRTZ = true; + oldMode = get_round(); + } + } + + std::vector inputs[]{ + std::vector(test.vec_size * num_elements), + std::vector(test.vec_size * num_elements) + }; + std::vector output = + std::vector(test.vec_size * num_elements); + + generate_random_inputs(inputs); + + for (int i = 0; i < ARRAY_SIZE(streams); i++) + { + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, + NULL, &err); + test_error(err, "clCreateBuffer failed."); + } + for (int i = 0; i < ARRAY_SIZE(inputs); i++) + { + err = + clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length, + inputs[i].data(), 0, NULL, NULL); + test_error(err, "clEnqueueWriteBuffer failed."); + } + + std::string build_options = "-DTYPE="; + build_options.append(vecNameStr.str()) + .append(" -DOP=") + .append(1, test.op); + + const char *ptr = program_source.c_str(); + err = + create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "test_fp", build_options.c_str()); + + test_error(err, "create_single_kernel_helper failed"); + + for (int i = 0; i < ARRAY_SIZE(streams); i++) + { + err = + clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); + test_error(err, "clSetKernelArgs failed."); + } + + size_t threads[] = { static_cast(num_elements) }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, + 0, NULL, NULL); + test_error(err, "clEnqueueNDRangeKernel failed."); + + err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, + output.data(), 0, NULL, NULL); + test_error(err, "clEnqueueReadBuffer failed."); + + if (isRTZ) set_round(kRoundTowardZero, kfloat); + + err = verify_fp(inputs, output, test); + + if (isRTZ) set_round(oldMode, kfloat); + + test_error(err, "test verification failed"); + log_info("FP '%c' '%s' test passed\n", test.op, + vecNameStr.str().c_str()); + } + + return err; + } + + template int test_fpmath_common() + { + int err = TEST_PASS; + if (std::is_same::value) + { + TestDef tests[] = { { '+', half_plus, type2name[sizeof(T)] }, + { '-', half_minus, type2name[sizeof(T)] }, + { '*', half_mult, type2name[sizeof(T)] } }; + for (auto &test : tests) err |= test_fpmath(test); + } + else + { + TestDef tests[] = { + { '+', std::plus(), type2name[sizeof(T)] }, + { '-', std::minus(), type2name[sizeof(T)] }, + { '*', std::multiplies(), type2name[sizeof(T)] } + }; + for (auto &test : tests) err |= test_fpmath(test); + } + + return err; + } + + template bool skip_type() + { + if (std::is_same::value && !fp64Support) + return true; + else if (std::is_same::value && !fp16Support) + return true; + return false; + } + + template + void iterate_type(const Type &t) + { + bool doTest = !skip_type(); + + if (doTest) + { + if (test_fpmath_common()) + { + throw std::runtime_error("test_fpmath_common failed\n"); + } + } + } + + template + inline typename std::enable_if::type + for_each_elem( + const std::tuple &) // Unused arguments are given no names. + {} + + template + inline typename std::enable_if < Cnt::type + for_each_elem(const std::tuple &t) + { + iterate_type(std::get(t)); + for_each_elem(t); + } + +protected: + TypeIter it; + + cl_context context; + cl_command_queue queue; + + cl_device_fp_config fpConfigHalf; + cl_device_fp_config fpConfigFloat; + + bool fp16Support; + bool fp64Support; + + int num_elements; + std::map type2name; +}; + +int test_fpmath(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) +{ + try + { + TypesIterator(device, context, queue, num_elements); + } catch (const std::runtime_error &e) + { + log_error("%s", e.what()); + return TEST_FAIL; + } + + return TEST_PASS; +} diff --git a/test_conformance/basic/test_fpmath_float.cpp b/test_conformance/basic/test_fpmath_float.cpp deleted file mode 100644 index fced0f4ecc..0000000000 --- a/test_conformance/basic/test_fpmath_float.cpp +++ /dev/null @@ -1,196 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include -#include -#include "harness/rounding_mode.h" - -#include -#include -#include -#include - -#include "procs.h" - -struct TestDef -{ - const char op; - std::function ref; -}; - -static const char *fp_kernel_code = R"( -__kernel void test_fp(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *dst) -{ - int tid = get_global_id(0); - - dst[tid] = srcA[tid] OP srcB[tid]; -})"; - -static int verify_fp(std::vector (&input)[2], std::vector &output, - const TestDef &test) -{ - - auto &inA = input[0]; - auto &inB = input[1]; - for (int i = 0; i < output.size(); i++) - { - float r = test.ref(inA[i], inB[i]); - if (r != output[i]) - { - log_error("FP '%c' float test failed\n", test.op); - return -1; - } - } - - log_info("FP '%c' float test passed\n", test.op); - return 0; -} - - -void generate_random_inputs(std::vector (&input)[2]) -{ - RandomSeed seed(gRandomSeed); - - auto random_generator = [&seed]() { - return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), - MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed); - }; - - for (auto &v : input) - { - std::generate(v.begin(), v.end(), random_generator); - } -} - -template -int test_fpmath(cl_device_id device, cl_context context, cl_command_queue queue, - int num_elements, const std::string type_str, - const TestDef &test) -{ - clMemWrapper streams[3]; - clProgramWrapper program; - clKernelWrapper kernel; - - int err; - - size_t length = sizeof(cl_float) * num_elements * N; - - int isRTZ = 0; - RoundingMode oldMode = kDefaultRoundingMode; - - // If we only support rtz mode - if (CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device)) - { - isRTZ = 1; - oldMode = get_round(); - } - - - std::vector inputs[]{ std::vector(N * num_elements), - std::vector(N * num_elements) }; - std::vector output = std::vector(N * num_elements); - - generate_random_inputs(inputs); - - for (int i = 0; i < ARRAY_SIZE(streams); i++) - { - streams[i] = - clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err); - test_error(err, "clCreateBuffer failed."); - } - for (int i = 0; i < ARRAY_SIZE(inputs); i++) - { - err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length, - inputs[i].data(), 0, NULL, NULL); - test_error(err, "clEnqueueWriteBuffer failed."); - } - - std::string build_options = "-DTYPE="; - build_options.append(type_str).append(" -DOP=").append(1, test.op); - - err = create_single_kernel_helper(context, &program, &kernel, 1, - &fp_kernel_code, "test_fp", - build_options.c_str()); - - test_error(err, "create_single_kernel_helper failed"); - - for (int i = 0; i < ARRAY_SIZE(streams); i++) - { - err = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); - test_error(err, "clSetKernelArgs failed."); - } - - size_t threads[] = { static_cast(num_elements) }; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, - NULL); - test_error(err, "clEnqueueNDRangeKernel failed."); - - err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, - output.data(), 0, NULL, NULL); - test_error(err, "clEnqueueReadBuffer failed."); - - if (isRTZ) set_round(kRoundTowardZero, kfloat); - - err = verify_fp(inputs, output, test); - - if (isRTZ) set_round(oldMode, kfloat); - - return err; -} - - -template -int test_fpmath_common(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements, - const std::string type_str) -{ - TestDef tests[] = { { '+', std::plus() }, - { '-', std::minus() }, - { '*', std::multiplies() } }; - int err = TEST_PASS; - - for (const auto &test : tests) - { - err |= test_fpmath(device, context, queue, num_elements, type_str, - test); - } - - return err; -} - -int test_fpmath_float(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<1>(device, context, queue, num_elements, "float"); -} - -int test_fpmath_float2(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<2>(device, context, queue, num_elements, - "float2"); -} - -int test_fpmath_float4(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<4>(device, context, queue, num_elements, - "float4"); -} From df3ec8deecdb81661ee61c3c97ae63419b5f4822 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 20 Jun 2023 17:44:45 +0200 Subject: [PATCH 07/20] Added cl_khr_fp16 extension support for test_int2fp from basic (#1742) * Added cl_khr_fp16 and cl_khr_fp64 support for float2int and int2float tests from basic * removed debug output * Replaced procedure to generate random half values in specific range (issue #142, basic) * Added cosmetic fixes due to code review comments * Moved string helper procedures due to request for test_commonfns PR #1695 --- test_conformance/basic/CMakeLists.txt | 2 +- test_conformance/basic/main.cpp | 6 +- test_conformance/basic/procs.h | 6 +- test_conformance/basic/test_int2float.cpp | 140 ---------- test_conformance/basic/test_int2fp.cpp | 324 ++++++++++++++++++++++ 5 files changed, 332 insertions(+), 146 deletions(-) delete mode 100644 test_conformance/basic/test_int2float.cpp create mode 100644 test_conformance/basic/test_int2fp.cpp diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index c89a93cf0e..47c1c980f1 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -11,7 +11,7 @@ set(${MODULE_NAME}_SOURCES test_multireadimageonefmt.cpp test_multireadimagemultifmt.cpp test_imagedim.cpp test_vloadstore.cpp - test_int2float.cpp + test_int2fp.cpp test_createkernelsinprogram.cpp test_hostptr.cpp test_explicit_s2v.cpp diff --git a/test_conformance/basic/main.cpp b/test_conformance/basic/main.cpp index 24262dbf99..d1901f95d6 100644 --- a/test_conformance/basic/main.cpp +++ b/test_conformance/basic/main.cpp @@ -59,8 +59,8 @@ test_definition test_list[] = { ADD_TEST(image_r8), ADD_TEST(barrier), ADD_TEST_VERSION(wg_barrier, Version(2, 0)), - ADD_TEST(int2float), - ADD_TEST(float2int), + ADD_TEST(int2fp), + ADD_TEST(fp2int), ADD_TEST(imagereadwrite), ADD_TEST(imagereadwrite3d), ADD_TEST(readimage3d), @@ -156,7 +156,7 @@ test_definition test_list[] = { ADD_TEST(simple_read_image_pitch), ADD_TEST(simple_write_image_pitch), -#if defined( __APPLE__ ) +#if defined(__APPLE__) ADD_TEST(queue_priority), #endif diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h index 9cbc373a3a..b685ecd53c 100644 --- a/test_conformance/basic/procs.h +++ b/test_conformance/basic/procs.h @@ -52,8 +52,10 @@ extern int test_image_r8(cl_device_id deviceID, cl_context context, cl_comm extern int test_simplebarrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_wg_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_int2float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_float2int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_int2fp(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_fp2int(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); extern int test_imagearraycopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_imagearraycopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_imagereadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); diff --git a/test_conformance/basic/test_int2float.cpp b/test_conformance/basic/test_int2float.cpp deleted file mode 100644 index c5afc2440a..0000000000 --- a/test_conformance/basic/test_int2float.cpp +++ /dev/null @@ -1,140 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include -#include - -#include -#include - -#include "procs.h" - -namespace { -const char *int2float_kernel_code = R"( -__kernel void test_X2Y(__global TYPE_X *src, __global TYPE_Y *dst) -{ - int tid = get_global_id(0); - - dst[tid] = (TYPE_Y)src[tid]; - -})"; - -template const char *Type2str() { return ""; } -template <> const char *Type2str() { return "int"; } -template <> const char *Type2str() { return "float"; } - -template void generate_random_inputs(std::vector &v) -{ - RandomSeed seed(gRandomSeed); - - auto random_generator = [&seed]() { - return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), - MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed); - }; - - std::generate(v.begin(), v.end(), random_generator); -} - -template bool equal_value(Tx a, Ty b) -{ - return a == (Tx)b; -} - -template -int verify_X2Y(std::vector input, std::vector output, - const char *test_name) -{ - - if (!std::equal(output.begin(), output.end(), input.begin(), - equal_value)) - { - log_error("%s test failed\n", test_name); - return -1; - } - - log_info("%s test passed\n", test_name); - return 0; -} -template -int test_X2Y(cl_device_id device, cl_context context, cl_command_queue queue, - int num_elements, const char *test_name) -{ - clMemWrapper streams[2]; - clProgramWrapper program; - clKernelWrapper kernel; - int err; - - - std::vector input(num_elements); - std::vector output(num_elements); - - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(Tx) * num_elements, nullptr, &err); - test_error(err, "clCreateBuffer failed."); - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(Ty) * num_elements, nullptr, &err); - test_error(err, "clCreateBuffer failed."); - - generate_random_inputs(input); - - err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, - sizeof(Tx) * num_elements, input.data(), 0, - nullptr, nullptr); - test_error(err, "clEnqueueWriteBuffer failed."); - - std::string build_options; - build_options.append("-DTYPE_X=").append(Type2str()); - build_options.append(" -DTYPE_Y=").append(Type2str()); - err = create_single_kernel_helper(context, &program, &kernel, 1, - &int2float_kernel_code, "test_X2Y", - build_options.c_str()); - test_error(err, "create_single_kernel_helper failed."); - - err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); - err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); - test_error(err, "clSetKernelArg failed."); - - size_t threads[] = { (size_t)num_elements }; - err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads, nullptr, 0, - nullptr, nullptr); - test_error(err, "clEnqueueNDRangeKernel failed."); - - err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, - sizeof(Ty) * num_elements, output.data(), 0, - nullptr, nullptr); - test_error(err, "clEnqueueReadBuffer failed."); - - err = verify_X2Y(input, output, test_name); - - return err; -} -} -int test_int2float(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_X2Y(device, context, queue, num_elements, - "INT2FLOAT"); -} -int test_float2int(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_X2Y(device, context, queue, num_elements, - "FLOAT2INT"); -} diff --git a/test_conformance/basic/test_int2fp.cpp b/test_conformance/basic/test_int2fp.cpp new file mode 100644 index 0000000000..8b1203a71b --- /dev/null +++ b/test_conformance/basic/test_int2fp.cpp @@ -0,0 +1,324 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "CL/cl_half.h" +#include "harness/compat.h" +#include "harness/errorHelpers.h" +#include "harness/stringHelpers.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "procs.h" + +extern cl_half_rounding_mode halfRoundingMode; + +#define HFF(num) cl_half_from_float(num, halfRoundingMode) +#define HTF(num) cl_half_to_float(num) + +namespace { +const char *int2float_kernel_code = R"( +%s +__kernel void test_X2Y(__global TYPE_X *src, __global TYPE_Y *dst) +{ + int tid = get_global_id(0); + + dst[tid] = (TYPE_Y)src[tid]; + +})"; + +template struct TypesIterator +{ + TypesIterator(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elems, const char *test_name) + : context(context), queue(queue), test_name(test_name), + num_elements(num_elems) + { + fp16Support = is_extension_available(deviceID, "cl_khr_fp16"); + fp64Support = is_extension_available(deviceID, "cl_khr_fp64"); + + type2name[sizeof(cl_half)] = std::make_pair("half", "short"); + type2name[sizeof(cl_float)] = std::make_pair("float", "int"); + type2name[sizeof(cl_double)] = std::make_pair("double", "long"); + + std::tuple it; + for_each_elem(it); + } + + template void generate_random_inputs(std::vector &v) + { + RandomSeed seed(gRandomSeed); + + if (sizeof(T) == sizeof(cl_half)) + { + // Bound generated half values to 0x1.ffcp+14(32752.0) which is the + // largest cl_half value smaller than the max value of cl_short, + // 32767. + if (int2fp) + { + auto random_generator = [&seed]() { + return (cl_short)get_random_float( + -MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), + MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), seed); + }; + std::generate(v.begin(), v.end(), random_generator); + } + else + { + auto random_generator = [&seed]() { + return HFF(get_random_float( + -MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), + MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), seed)); + }; + std::generate(v.begin(), v.end(), random_generator); + } + } + else if (sizeof(T) == sizeof(cl_float)) + { + auto random_generator = [&seed]() { + return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), + MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), + seed); + }; + std::generate(v.begin(), v.end(), random_generator); + } + else if (sizeof(T) == sizeof(cl_double)) + { + auto random_generator = [&seed]() { + return get_random_double(-MAKE_HEX_DOUBLE(0x1.0p63, 0x1, 63), + MAKE_HEX_DOUBLE(0x1.0p63, 0x1, 63), + seed); + }; + std::generate(v.begin(), v.end(), random_generator); + } + } + + template static bool equal_value(Tx a, Ty b) + { + return a == (Tx)b; + } + + static bool equal_value_from_half(cl_short a, cl_half b) + { + return a == (cl_short)HTF(b); + } + + static bool equal_value_to_half(cl_half a, cl_short b) + { + return a == HFF((float)b); + } + + + template + int verify_X2Y(std::vector input, std::vector output) + { + if (std::is_same::value + || std::is_same::value) + { + bool res = true; + if (int2fp) + res = std::equal(output.begin(), output.end(), input.begin(), + equal_value_to_half); + else + res = std::equal(output.begin(), output.end(), input.begin(), + equal_value_from_half); + + if (!res) + { + log_error("%s test failed\n", test_name.c_str()); + return -1; + } + } + else + { + if (!std::equal(output.begin(), output.end(), input.begin(), + equal_value)) + { + log_error("%s test failed\n", test_name.c_str()); + return -1; + } + } + + log_info("%s test passed\n", test_name.c_str()); + return 0; + } + + template int test_X2Y() + { + clMemWrapper streams[2]; + clProgramWrapper program; + clKernelWrapper kernel; + int err; + + std::vector input(num_elements); + std::vector output(num_elements); + + streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(Tx) * num_elements, nullptr, &err); + test_error(err, "clCreateBuffer failed."); + streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(Ty) * num_elements, nullptr, &err); + test_error(err, "clCreateBuffer failed."); + + generate_random_inputs(input); + + err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, + sizeof(Tx) * num_elements, input.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed."); + + std::string src_name = type2name[sizeof(Tx)].first; + std::string dst_name = type2name[sizeof(Tx)].second; + if (int2fp) std::swap(src_name, dst_name); + + std::string build_options; + build_options.append("-DTYPE_X=").append(src_name.c_str()); + build_options.append(" -DTYPE_Y=").append(dst_name.c_str()); + + std::string extension; + if (sizeof(Tx) == sizeof(cl_double)) + extension = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + + if (sizeof(Tx) == sizeof(cl_half)) + extension = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + + std::string kernelSource = + str_sprintf(int2float_kernel_code, extension.c_str()); + const char *ptr = kernelSource.c_str(); + + err = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "test_X2Y", build_options.c_str()); + test_error(err, "create_single_kernel_helper failed."); + + err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); + err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); + test_error(err, "clSetKernelArg failed."); + + size_t threads[] = { (size_t)num_elements }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads, + nullptr, 0, nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed."); + + err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, + sizeof(Ty) * num_elements, output.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed."); + + err = verify_X2Y(input, output); + + return err; + } + + template bool skip_type() + { + if (std::is_same::value && !fp64Support) + return true; + else if (std::is_same::value && !fp16Support) + return true; + return false; + } + + template void iterate_type(const T &t) + { + bool doTest = !skip_type(); + + if (doTest) + { + typedef typename std::conditional< + (sizeof(T) == sizeof(std::int16_t)), std::int16_t, + typename std::conditional<(sizeof(T) == sizeof(std::int32_t)), + std::int32_t, + std::int64_t>::type>::type U; + if (int2fp) + { + if (test_X2Y()) + throw std::runtime_error("test_X2Y failed\n"); + } + else + { + if (test_X2Y()) + throw std::runtime_error("test_X2Y failed\n"); + } + } + } + + template + inline typename std::enable_if::type + for_each_elem( + const std::tuple &) // Unused arguments are given no names. + {} + + template + inline typename std::enable_if < Cnt::type + for_each_elem(const std::tuple &t) + { + iterate_type(std::get(t)); + for_each_elem(t); + } + +protected: + cl_context context; + cl_command_queue queue; + + cl_device_fp_config fpConfigHalf; + cl_device_fp_config fpConfigFloat; + + bool fp16Support; + bool fp64Support; + + std::map> type2name; + + std::string test_name; + int num_elements; +}; + +} + +int test_int2fp(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) +{ + try + { + TypesIterator(device, context, queue, num_elements, "INT2FP"); + } catch (const std::runtime_error &e) + { + log_error("%s", e.what()); + return TEST_FAIL; + } + + return TEST_PASS; +} + +int test_fp2int(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) +{ + try + { + TypesIterator(device, context, queue, num_elements, "FP2INT"); + } catch (const std::runtime_error &e) + { + log_error("%s", e.what()); + return TEST_FAIL; + } + + return TEST_PASS; +} From 50f9f063236394eea1edfab92bb4ebebd8c33b78 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 21 Jun 2023 15:19:21 +0100 Subject: [PATCH 08/20] test_common: fix -Wsign-compare warnings (#1759) In preparation of re-enabling `-Wsign-compare` globally, avoid mixing signed and unsigned integers in comparisons in test_common. Signed-off-by: Sven van Haastregt --- test_common/gl/helpers.cpp | 4 ++-- test_common/harness/testHarness.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test_common/gl/helpers.cpp b/test_common/gl/helpers.cpp index b9f95a94a8..1fb85035e5 100644 --- a/test_common/gl/helpers.cpp +++ b/test_common/gl/helpers.cpp @@ -1715,7 +1715,7 @@ void * CreateGLRenderbuffer( GLsizei width, GLsizei height, // Reverse and reorder to validate since in the // kernel the read_imagef() call always returns RGBA cl_uchar *p = (cl_uchar *)buffer; - for( size_t i = 0; i < (size_t)width * height; i++ ) + for (GLsizei i = 0; i < width * height; i++) { cl_uchar uc0 = p[i * 4 + 0]; cl_uchar uc1 = p[i * 4 + 1]; @@ -1733,7 +1733,7 @@ void * CreateGLRenderbuffer( GLsizei width, GLsizei height, // Reverse and reorder to validate since in the // kernel the read_imagef() call always returns RGBA cl_uchar *p = (cl_uchar *)buffer; - for( size_t i = 0; i < width * height; i++ ) + for (GLsizei i = 0; i < width * height; i++) { cl_uchar uc0 = p[i * 4 + 0]; cl_uchar uc1 = p[i * 4 + 1]; diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index 95ea81631e..3d743e717f 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -835,9 +835,9 @@ void callTestFunctions(test_definition testList[], std::vector threads; test_harness_state state = { testList, resultTestList, deviceToUse, config }; - for (int i = 0; i < config.numWorkerThreads; i++) + for (unsigned i = 0; i < config.numWorkerThreads; i++) { - log_info("Spawning worker thread %i\n", i); + log_info("Spawning worker thread %u\n", i); threads.push_back(new std::thread(test_function_runner, &state)); } From 2e88013b34586c10fb8cc9eb0320e5587ce94785 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 22 Jun 2023 06:08:21 +0100 Subject: [PATCH 09/20] compiler: fix memory leak from unnecessary strdup (#1761) The result of the `strdup` was never freed. The string duplication isn't necessary, so remove it. Signed-off-by: Sven van Haastregt --- test_conformance/compiler/test_compile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_conformance/compiler/test_compile.cpp b/test_conformance/compiler/test_compile.cpp index f3ee431224..d250bdd477 100644 --- a/test_conformance/compiler/test_compile.cpp +++ b/test_conformance/compiler/test_compile.cpp @@ -462,7 +462,7 @@ int test_large_multiple_embedded_headers(cl_context context, cl_device_id device header_names[i] = _strdup(buffer); sprintf(buffer, composite_kernel_extern_template, i); - const char* line = _strdup(buffer); + const char *line = buffer; error = create_single_kernel_helper_create_program(context, &headers[i], 1, &line); if( headers[i] == NULL || error != CL_SUCCESS ) { From 60f025a7da5ab2456ba41405e9fdf655ce948eac Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 27 Jun 2023 17:40:35 +0200 Subject: [PATCH 10/20] Added cl_half support for test_select (#1617) * Added cl_half support for test_select (issue #142, select) * Added corrections due to code review + performance optimization + replaced C object with wrappers * minor fix * Corrected use of user event * Removed unnecessary user event --- test_conformance/select/test_select.cpp | 295 ++++----- test_conformance/select/test_select.h | 24 +- test_conformance/select/util_select.cpp | 779 ++++++++++++++---------- 3 files changed, 576 insertions(+), 522 deletions(-) diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index b0cda09fd1..8a0567c34b 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -14,11 +14,14 @@ // limitations under the License. // #include "harness/compat.h" +#include "harness/typeWrappers.h" #include #include #include #include +#include + #if ! defined( _WIN32) #if defined(__APPLE__) #include @@ -66,6 +69,16 @@ static void printUsage( void ); #define BUFFER_SIZE (1024*1024) #define KPAGESIZE 4096 +#define test_error_count(errCode, msg) \ + { \ + auto errCodeResult = errCode; \ + if (errCodeResult != CL_SUCCESS) \ + { \ + gFailCount++; \ + print_error(errCodeResult, msg); \ + return errCode; \ + } \ + } // When we indicate non wimpy mode, the types that are 32 bits value will // test their entire range and 64 bits test will test the 32 bit @@ -74,12 +87,6 @@ static void printUsage( void ); static bool s_wimpy_mode = false; static int s_wimpy_reduction_factor = 256; -// Tests are broken into the major test which is based on the -// src and cmp type and their corresponding vector types and -// sub tests which is for each individual test. The following -// tracks the subtests -int s_test_cnt = 0; - //----------------------------------------- // Static helper functions //----------------------------------------- @@ -237,6 +244,9 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont if (srctype == kdouble) strcpy( extension, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" ); + if (srctype == khalf) + strcpy(extension, "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"); + // create type name and testname switch( vec_len ) { @@ -288,25 +298,14 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont return program; } - #define VECTOR_SIZE_COUNT 6 static int doTest(cl_command_queue queue, cl_context context, Type stype, Type cmptype, cl_device_id device) { int err = CL_SUCCESS; - int s_test_fail = 0; - MTdataHolder d; + MTdataHolder d(gRandomSeed); const size_t element_count[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 }; - cl_mem src1 = NULL; - cl_mem src2 = NULL; - cl_mem cmp = NULL; - cl_mem dest = NULL; - void *ref = NULL; - void *sref = NULL; - void *src1_host = NULL; - void *src2_host = NULL; - void *cmp_host = NULL; - void *dest_host = NULL; + clMemWrapper src1, src2, cmp, dest; cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE; size_t block_elements = BUFFER_SIZE / type_size[stype]; @@ -315,16 +314,22 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c // It is more efficient to create the tests all at once since we // use the same test data on each of the vector sizes - int vecsize; - cl_program programs[VECTOR_SIZE_COUNT]; - cl_kernel kernels[VECTOR_SIZE_COUNT]; + clProgramWrapper programs[VECTOR_SIZE_COUNT]; + clKernelWrapper kernels[VECTOR_SIZE_COUNT]; - if(stype == kdouble && ! is_extension_available( device, "cl_khr_fp64" )) + if (stype == kdouble && !is_extension_available(device, "cl_khr_fp64")) { log_info("Skipping double because cl_khr_fp64 extension is not supported.\n"); return 0; } + if (stype == khalf && !is_extension_available(device, "cl_khr_fp16")) + { + log_info( + "Skipping half because cl_khr_fp16 extension is not supported.\n"); + return 0; + } + if (gIsEmbedded) { if (( stype == klong || stype == kulong ) && ! is_extension_available( device, "cles_khr_int64" )) @@ -340,54 +345,41 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c } } - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) - { - programs[vecsize] = makeSelectProgram(&kernels[vecsize], context, stype, cmptype, element_count[vecsize] ); - if (!programs[vecsize] || !kernels[vecsize]) { - ++s_test_fail; - ++s_test_cnt; - return -1; - } - } - - ref = malloc( BUFFER_SIZE ); - if( NULL == ref ){ log_error("Error: could not allocate ref buffer\n" ); goto exit; } - sref = malloc( BUFFER_SIZE ); - if( NULL == sref ){ log_error("Error: could not allocate ref buffer\n" ); goto exit; } src1 = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate src1 buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate src1 buffer\n"); src2 = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate src2 buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate src2 buffer\n"); cmp = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate cmp buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate cmp buffer\n"); dest = clCreateBuffer( context, CL_MEM_WRITE_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate dest buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate dest buffer\n"); - src1_host = malloc(BUFFER_SIZE); - if (NULL == src1_host) - { - log_error("Error: could not allocate src1_host buffer\n"); - goto exit; - } - src2_host = malloc(BUFFER_SIZE); - if (NULL == src2_host) - { - log_error("Error: could not allocate src2_host buffer\n"); - goto exit; - } - cmp_host = malloc(BUFFER_SIZE); - if (NULL == cmp_host) + for (int vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) { - log_error("Error: could not allocate cmp_host buffer\n"); - goto exit; - } - dest_host = malloc(BUFFER_SIZE); - if (NULL == dest_host) - { - log_error("Error: could not allocate dest_host buffer\n"); - goto exit; + programs[vecsize] = makeSelectProgram(&kernels[vecsize], context, stype, + cmptype, element_count[vecsize]); + if (!programs[vecsize] || !kernels[vecsize]) + { + return -1; + } + + err = clSetKernelArg(kernels[vecsize], 0, sizeof dest, &dest); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 1, sizeof src1, &src1); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 2, sizeof src2, &src2); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 3, sizeof cmp, &cmp); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); } + std::vector ref(BUFFER_SIZE); + std::vector sref(BUFFER_SIZE); + std::vector src1_host(BUFFER_SIZE); + std::vector src2_host(BUFFER_SIZE); + std::vector cmp_host(BUFFER_SIZE); + std::vector dest_host(BUFFER_SIZE); + // We block the test as we are running over the range of compare values // "block the test" means "break the test into blocks" if( type_size[stype] == 4 ) @@ -396,111 +388,63 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c cmp_stride = block_elements * step * (0xffffffffffffffffULL / 0x100000000ULL + 1); log_info("Testing..."); - d = MTdataHolder(gRandomSeed); uint64_t i; + for (i=0; i < blocks; i+=step) { - void *s1 = clEnqueueMapBuffer( queue, src1, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map src1" ); goto exit; } - // Setup the input data to change for each block - initSrcBuffer( s1, stype, d); - - void *s2 = clEnqueueMapBuffer( queue, src2, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map src2" ); goto exit; } - // Setup the input data to change for each block - initSrcBuffer( s2, stype, d); - - void *s3 = clEnqueueMapBuffer( queue, cmp, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map cmp" ); goto exit; } - // Setup the input data to change for each block - initCmpBuffer(s3, cmptype, i * cmp_stride, block_elements); - - if( (err = clEnqueueUnmapMemObject( queue, src1, s1, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap src1\n" ); ++s_test_fail; goto exit; } - if( (err = clEnqueueUnmapMemObject( queue, src2, s2, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap src2\n" ); ++s_test_fail; goto exit; } - if( (err = clEnqueueUnmapMemObject( queue, cmp, s3, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap cmp\n" ); ++s_test_fail; goto exit; } - - // Create the reference result - err = clEnqueueReadBuffer(queue, src1, CL_TRUE, 0, BUFFER_SIZE, - src1_host, 0, NULL, NULL); - if (err) - { - log_error("Error: Reading buffer from src1 to src1_host failed\n"); - ++s_test_fail; - goto exit; - } - err = clEnqueueReadBuffer(queue, src2, CL_TRUE, 0, BUFFER_SIZE, - src2_host, 0, NULL, NULL); - if (err) - { - log_error("Error: Reading buffer from src2 to src2_host failed\n"); - ++s_test_fail; - goto exit; - } - err = clEnqueueReadBuffer(queue, cmp, CL_TRUE, 0, BUFFER_SIZE, cmp_host, - 0, NULL, NULL); - if (err) - { - log_error("Error: Reading buffer from cmp to cmp_host failed\n"); - ++s_test_fail; - goto exit; - } + initSrcBuffer(src1_host.data(), stype, d); + initSrcBuffer(src2_host.data(), stype, d); + initCmpBuffer(cmp_host.data(), cmptype, i * cmp_stride, block_elements); + + err = clEnqueueWriteBuffer(queue, src1, CL_FALSE, 0, BUFFER_SIZE, + src1_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write src1"); + + err = clEnqueueWriteBuffer(queue, src2, CL_FALSE, 0, BUFFER_SIZE, + src2_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write src2"); + + err = clEnqueueWriteBuffer(queue, cmp, CL_FALSE, 0, BUFFER_SIZE, + cmp_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write cmp"); Select sfunc = (cmptype == ctype[stype][0]) ? vrefSelects[stype][0] : vrefSelects[stype][1]; - (*sfunc)(ref, src1_host, src2_host, cmp_host, block_elements); + (*sfunc)(ref.data(), src1_host.data(), src2_host.data(), + cmp_host.data(), block_elements); sfunc = (cmptype == ctype[stype][0]) ? refSelects[stype][0] : refSelects[stype][1]; - (*sfunc)(sref, src1_host, src2_host, cmp_host, block_elements); + (*sfunc)(sref.data(), src1_host.data(), src2_host.data(), + cmp_host.data(), block_elements); - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) + for (int vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) { size_t vector_size = element_count[vecsize] * type_size[stype]; size_t vector_count = (BUFFER_SIZE + vector_size - 1) / vector_size; - if((err = clSetKernelArg(kernels[vecsize], 0, sizeof dest, &dest) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 1, sizeof src1, &src1) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 2, sizeof src2, &src2) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 3, sizeof cmp, &cmp) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - - // Wipe destination - void *d = clEnqueueMapBuffer( queue, dest, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map dest" ); ++s_test_fail; goto exit; } - memset( d, -1, BUFFER_SIZE ); - if( (err = clEnqueueUnmapMemObject( queue, dest, d, 0, NULL, NULL ) ) ){ log_error( "Error: Could not unmap dest" ); ++s_test_fail; goto exit; } + const cl_int pattern = -1; + err = clEnqueueFillBuffer(queue, dest, &pattern, sizeof(cl_int), 0, + BUFFER_SIZE, 0, nullptr, nullptr); + test_error_count(err, "clEnqueueFillBuffer failed"); + err = clEnqueueNDRangeKernel(queue, kernels[vecsize], 1, NULL, &vector_count, NULL, 0, NULL, NULL); - if (err != CL_SUCCESS) { - log_error("clEnqueueNDRangeKernel failed errcode:%d\n", err); - ++s_test_fail; - goto exit; - } + test_error_count(err, "clEnqueueNDRangeKernel failed errcode\n"); err = clEnqueueReadBuffer(queue, dest, CL_TRUE, 0, BUFFER_SIZE, - dest_host, 0, NULL, NULL); - if (err) - { - log_error( - "Error: Reading buffer from dest to dest_host failed\n"); - ++s_test_fail; - goto exit; - } + dest_host.data(), 0, NULL, NULL); + test_error_count( + err, "Error: Reading buffer from dest to dest_host failed\n"); - if ((*checkResults[stype])(dest_host, vecsize == 0 ? sref : ref, + if ((*checkResults[stype])(dest_host.data(), + vecsize == 0 ? sref.data() : ref.data(), block_elements, element_count[vecsize]) != 0) { log_error("vec_size:%d indx: 0x%16.16llx\n", (int)element_count[vecsize], i); - ++s_test_fail; - goto exit; + return TEST_FAIL; } } // for vecsize } // for i @@ -510,28 +454,6 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c else log_info(" Wimpy Passed\n\n"); -exit: - if( src1 ) clReleaseMemObject( src1 ); - if( src2 ) clReleaseMemObject( src2 ); - if( cmp ) clReleaseMemObject( cmp ); - if( dest) clReleaseMemObject( dest ); - if( ref ) free(ref ); - if( sref ) free(sref ); - if (src1_host) free(src1_host); - if (src2_host) free(src2_host); - if (cmp_host) free(cmp_host); - if (dest_host) free(dest_host); - - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; vecsize++) { - clReleaseKernel(kernels[vecsize]); - clReleaseProgram(programs[vecsize]); - } - ++s_test_cnt; - if (s_test_fail) - { - err = TEST_FAIL; - gFailCount++; - } return err; } @@ -567,6 +489,16 @@ int test_select_short_short(cl_device_id deviceID, cl_context context, cl_comman { return doTest(queue, context, kshort, kshort, deviceID); } +int test_select_half_ushort(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, khalf, kushort, deviceID); +} +int test_select_half_short(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, khalf, kshort, deviceID); +} int test_select_uint_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return doTest(queue, context, kuint, kuint, deviceID); @@ -617,26 +549,17 @@ int test_select_double_long(cl_device_id deviceID, cl_context context, cl_comman } test_definition test_list[] = { - ADD_TEST( select_uchar_uchar ), - ADD_TEST( select_uchar_char ), - ADD_TEST( select_char_uchar ), - ADD_TEST( select_char_char ), - ADD_TEST( select_ushort_ushort ), - ADD_TEST( select_ushort_short ), - ADD_TEST( select_short_ushort ), - ADD_TEST( select_short_short ), - ADD_TEST( select_uint_uint ), - ADD_TEST( select_uint_int ), - ADD_TEST( select_int_uint ), - ADD_TEST( select_int_int ), - ADD_TEST( select_float_uint ), - ADD_TEST( select_float_int ), - ADD_TEST( select_ulong_ulong ), - ADD_TEST( select_ulong_long ), - ADD_TEST( select_long_ulong ), - ADD_TEST( select_long_long ), - ADD_TEST( select_double_ulong ), - ADD_TEST( select_double_long ), + ADD_TEST(select_uchar_uchar), ADD_TEST(select_uchar_char), + ADD_TEST(select_char_uchar), ADD_TEST(select_char_char), + ADD_TEST(select_ushort_ushort), ADD_TEST(select_ushort_short), + ADD_TEST(select_short_ushort), ADD_TEST(select_short_short), + ADD_TEST(select_half_ushort), ADD_TEST(select_half_short), + ADD_TEST(select_uint_uint), ADD_TEST(select_uint_int), + ADD_TEST(select_int_uint), ADD_TEST(select_int_int), + ADD_TEST(select_float_uint), ADD_TEST(select_float_int), + ADD_TEST(select_ulong_ulong), ADD_TEST(select_ulong_long), + ADD_TEST(select_long_ulong), ADD_TEST(select_long_long), + ADD_TEST(select_double_ulong), ADD_TEST(select_double_long), }; const int test_num = ARRAY_SIZE( test_list ); diff --git a/test_conformance/select/test_select.h b/test_conformance/select/test_select.h index c51ae13c2c..5cd786022b 100644 --- a/test_conformance/select/test_select.h +++ b/test_conformance/select/test_select.h @@ -28,18 +28,20 @@ #endif // Defines the set of types we support (no support for double) -typedef enum { +typedef enum +{ kuchar = 0, kchar = 1, kushort = 2, kshort = 3, - kuint = 4, - kint = 5, - kfloat = 6, - kulong = 7, - klong = 8, - kdouble = 9, - kTypeCount // always goes last + khalf = 4, + kuint = 5, + kint = 6, + kfloat = 7, + kulong = 8, + klong = 9, + kdouble = 10, + kTypeCount // always goes last } Type; @@ -56,7 +58,8 @@ extern const size_t type_size[kTypeCount]; extern const Type ctype[kTypeCount][2]; // Reference functions for the primitive (non vector) type -typedef void (*Select)(void *dest, void *src1, void *src2, void *cmp, size_t c); +typedef void (*Select)(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t c); extern Select refSelects[kTypeCount][2]; // Reference functions for the primtive type but uses the vector @@ -64,7 +67,8 @@ extern Select refSelects[kTypeCount][2]; extern Select vrefSelects[kTypeCount][2]; // Check functions for each output type -typedef size_t (*CheckResults)(void *out1, void *out2, size_t count, size_t vectorSize); +typedef size_t (*CheckResults)(const void *const out1, const void *const out2, + size_t count, size_t vectorSize); extern CheckResults checkResults[kTypeCount]; // Helpful macros diff --git a/test_conformance/select/util_select.cpp b/test_conformance/select/util_select.cpp index f9641e9938..b85f54a762 100644 --- a/test_conformance/select/util_select.cpp +++ b/test_conformance/select/util_select.cpp @@ -13,7 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include "harness/errorHelpers.h" #include @@ -25,29 +24,28 @@ //----------------------------------------- -const char *type_name[kTypeCount] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "float", "ulong", "long", "double" }; +const char *type_name[kTypeCount] = { "uchar", "char", "ushort", "short", + "half", "uint", "int", "float", + "ulong", "long", "double" }; const size_t type_size[kTypeCount] = { - sizeof(cl_uchar), sizeof(cl_char), - sizeof(cl_ushort), sizeof(cl_short), - sizeof(cl_uint), sizeof(cl_int), - sizeof(cl_float), sizeof(cl_ulong), sizeof(cl_long), sizeof( cl_double ) }; + sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short), + sizeof(cl_half), sizeof(cl_uint), sizeof(cl_int), sizeof(cl_float), + sizeof(cl_ulong), sizeof(cl_long), sizeof(cl_double) +}; const Type ctype[kTypeCount][2] = { - { kuchar, kchar }, // uchar - { kuchar, kchar }, // char - { kushort, kshort}, // ushort - { kushort, kshort}, // short - { kuint, kint }, // uint - { kuint, kint }, // int - { kuint, kint }, // float - { kulong, klong }, // ulong - { kulong, klong }, // long - { kulong, klong } // double + { kuchar, kchar }, // uchar + { kuchar, kchar }, // char + { kushort, kshort }, // ushort + { kushort, kshort }, // short + { kushort, kshort }, // half + { kuint, kint }, // uint + { kuint, kint }, // int + { kuint, kint }, // float + { kulong, klong }, // ulong + { kulong, klong }, // long + { kulong, klong } // double }; @@ -55,510 +53,594 @@ const Type ctype[kTypeCount][2] = { // Reference functions //----------------------------------------- -void refselect_1i8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y, *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_char*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) { d[i] = m[i] ? y[i] : x[i]; } } -void refselect_1u8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y; - cl_char *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_char*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) { d[i] = m[i] ? y[i] : x[i]; } } -void refselect_1i16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_short *d, *x, *y, *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_short*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ushort *d, *x, *y; - cl_short *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_short*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i32(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_int *d, *x, *y, *m; - d = (cl_int*)dest; - x = (cl_int*)src1; - y = (cl_int*)src2; - m = (cl_int*)cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u32(void *dest, void *src1, void *src2, void *cmp, size_t count){ +void refselect_1u32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*)dest; - x = (cl_uint*)src1; - y = (cl_uint*)src2; - m = (cl_int*)cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y, *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y; - cl_uchar *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_uchar*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y, *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_uchar*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y; - cl_ushort *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_ushort*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y, *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_ushort*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_uint *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_uint*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y, *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y, *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ffi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_hhi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; + for (i = 0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; +} + +void refselect_hhu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_int *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_int*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; + for (i = 0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; +} + +void refselect_ffi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ffu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ffu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_uint *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ddi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ddi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_long *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ddu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ddu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void vrefselect_1i8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y, *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_char*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80) ? y[i] : x[i]; } -void vrefselect_1u8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y; - cl_char *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_char*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80) ? y[i] : x[i]; } -void vrefselect_1i16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y, *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_short*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; } -void vrefselect_1u16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y; - cl_short *m; - d = (cl_ushort*) dest; - x = (cl_ushort*)src1; - y = (cl_ushort*)src2; - m = (cl_short*)cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; } -void vrefselect_1i32(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y, *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_int*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_1u32(void *dest, void *src1, void *src2, void *cmp, size_t count){ +void vrefselect_1u32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_int*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_1i64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y, *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_1u64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_1i8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_char *d, *x, *y; - cl_uchar *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_uchar*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80U) ? y[i] : x[i]; } -void vrefselect_1u8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uchar *d, *x, *y, *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_uchar*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80U) ? y[i] : x[i]; } -void vrefselect_1i16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y; - cl_ushort *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_ushort*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; } -void vrefselect_1u16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y, *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_ushort*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; } -void vrefselect_1i32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_uint *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_uint*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_1u32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y, *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_1i64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } -void vrefselect_1u64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y, *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } -void vrefselect_ffi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_hhi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; + for (i = 0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; +} + +void vrefselect_hhu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; + for (i = 0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; +} + +void vrefselect_ffi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_int*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_ffu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ffu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_uint *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_ddi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ddi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_ddu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ddu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_ulong *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } // Define refSelects -Select refSelects[kTypeCount][2] = { - { refselect_1u8u, refselect_1u8 }, // cl_uchar - { refselect_1i8u, refselect_1i8 }, // char +Select refSelects[kTypeCount][2] = { + { refselect_1u8u, refselect_1u8 }, // cl_uchar + { refselect_1i8u, refselect_1i8 }, // char { refselect_1u16u, refselect_1u16 }, // ushort { refselect_1i16u, refselect_1i16 }, // short + { refselect_hhu, refselect_hhi }, // half { refselect_1u32u, refselect_1u32 }, // uint { refselect_1i32u, refselect_1i32 }, // int - { refselect_ffu, refselect_ffi }, // float + { refselect_ffu, refselect_ffi }, // float { refselect_1u64u, refselect_1u64 }, // ulong { refselect_1i64u, refselect_1i64 }, // long - { refselect_ddu, refselect_ddi } // double + { refselect_ddu, refselect_ddi } // double }; // Define vrefSelects (vector refSelects) -Select vrefSelects[kTypeCount][2] = { - { vrefselect_1u8u, vrefselect_1u8 }, // cl_uchar - { vrefselect_1i8u, vrefselect_1i8 }, // char +Select vrefSelects[kTypeCount][2] = { + { vrefselect_1u8u, vrefselect_1u8 }, // cl_uchar + { vrefselect_1i8u, vrefselect_1i8 }, // char { vrefselect_1u16u, vrefselect_1u16 }, // ushort { vrefselect_1i16u, vrefselect_1i16 }, // short + { vrefselect_hhu, vrefselect_hhi }, // half { vrefselect_1u32u, vrefselect_1u32 }, // uint { vrefselect_1i32u, vrefselect_1i32 }, // int - { vrefselect_ffu, vrefselect_ffi }, // float + { vrefselect_ffu, vrefselect_ffi }, // float { vrefselect_1u64u, vrefselect_1u64 }, // ulong { vrefselect_1i64u, vrefselect_1i64 }, // long - { vrefselect_ddu, vrefselect_ddi } // double + { vrefselect_ddu, vrefselect_ddi } // double }; //----------------------------------------- // Check functions //----------------------------------------- -size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size) { - const cl_uchar *t = (const cl_uchar *) test; - const cl_uchar *c = (const cl_uchar *) correct; +size_t check_uchar(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uchar *const t = (const cl_uchar *)test; + const cl_uchar *const c = (const cl_uchar *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -576,9 +658,11 @@ size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_char(void *test, void *correct, size_t count, size_t vector_size) { - const cl_char *t = (const cl_char *) test; - const cl_char *c = (const cl_char *) correct; +size_t check_char(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_char *const t = (const cl_char *)test; + const cl_char *const c = (const cl_char *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -597,9 +681,11 @@ size_t check_char(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size) { - const cl_ushort *t = (const cl_ushort *) test; - const cl_ushort *c = (const cl_ushort *) correct; +size_t check_ushort(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ushort *const t = (const cl_ushort *)test; + const cl_ushort *const c = (const cl_ushort *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -618,9 +704,11 @@ size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_short(void *test, void *correct, size_t count, size_t vector_size) { - const cl_short *t = (const cl_short *) test; - const cl_short *c = (const cl_short *) correct; +size_t check_short(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_short *const t = (const cl_short *)test; + const cl_short *const c = (const cl_short *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -639,9 +727,11 @@ size_t check_short(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) { - const cl_uint *t = (const cl_uint *) test; - const cl_uint *c = (const cl_uint *) correct; +size_t check_uint(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uint *const t = (const cl_uint *)test; + const cl_uint *const c = (const cl_uint *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -660,9 +750,11 @@ size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_int(void *test, void *correct, size_t count, size_t vector_size) { - const cl_int *t = (const cl_int *) test; - const cl_int *c = (const cl_int *) correct; +size_t check_int(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_int *const t = (const cl_int *)test; + const cl_int *const c = (const cl_int *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -682,9 +774,11 @@ size_t check_int(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size) { - const cl_ulong *t = (const cl_ulong *) test; - const cl_ulong *c = (const cl_ulong *) correct; +size_t check_ulong(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ulong *const t = (const cl_ulong *)test; + const cl_ulong *const c = (const cl_ulong *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -703,9 +797,11 @@ size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_long(void *test, void *correct, size_t count, size_t vector_size) { - const cl_long *t = (const cl_long *) test; - const cl_long *c = (const cl_long *) correct; +size_t check_long(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_long *const t = (const cl_long *)test; + const cl_long *const c = (const cl_long *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -724,9 +820,36 @@ size_t check_long(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_float( void *test, void *correct, size_t count, size_t vector_size ) { - const cl_uint *t = (const cl_uint *) test; - const cl_uint *c = (const cl_uint *) correct; +size_t check_half(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ushort *const t = (const cl_ushort *)test; + const cl_ushort *const c = (const cl_ushort *)correct; + size_t i; + + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) /* Allow nans to be binary different */ + if ((t[i] != c[i]) + && !(isnan(((cl_half *)correct)[i]) + && isnan(((cl_half *)test)[i]))) + { + log_error("\n(check_half) Error for vector size %ld found at " + "0x%8.8lx (of 0x%8.8lx): " + "*0x%4.4x vs 0x%4.4x\n", + vector_size, i, count, c[i], t[i]); + return i + 1; + } + } + + return 0; +} + +size_t check_float(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uint *const t = (const cl_uint *)test; + const cl_uint *const c = (const cl_uint *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -746,9 +869,11 @@ size_t check_float( void *test, void *correct, size_t count, size_t vector_size return 0; } -size_t check_double( void *test, void *correct, size_t count, size_t vector_size ) { - const cl_ulong *t = (const cl_ulong *) test; - const cl_ulong *c = (const cl_ulong *) correct; +size_t check_double(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ulong *const t = (const cl_ulong *)test; + const cl_ulong *const c = (const cl_ulong *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -770,5 +895,7 @@ size_t check_double( void *test, void *correct, size_t count, size_t vector_size } CheckResults checkResults[kTypeCount] = { - check_uchar, check_char, check_ushort, check_short, check_uint, - check_int, check_float, check_ulong, check_long, check_double }; + check_uchar, check_char, check_ushort, check_short, + check_half, check_uint, check_int, check_float, + check_ulong, check_long, check_double +}; From 2495eca9fa89fcfadb3bcca7fda61b9f20b1f4e3 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 27 Jun 2023 17:42:02 +0200 Subject: [PATCH 11/20] Added cl_khr_fp16 extension support for test_commonfns (#1695) * Added cl_khr_fp16 extension support for commonfns test (issue #142, commonfns) * Added missing header due to presubmit check * Corrected radians/degrees ulp calculations + cosmetic fixes * Corrected presubmit code format * Corrections related to code review * Moved string format helper to test_common in separate header * Added clang format for last commit * Corrections related to code review * Modified mix verification procedure for half type to only report max error * Removed redundant condition for logging mix verification * Corrected generator limits for half tests --- test_common/harness/stringHelpers.h | 6 +- test_conformance/commonfns/main.cpp | 43 +++-- test_conformance/commonfns/test_base.h | 86 +++++++--- test_conformance/commonfns/test_binary_fn.cpp | 74 +++++++-- test_conformance/commonfns/test_clamp.cpp | 83 ++++++++-- test_conformance/commonfns/test_mix.cpp | 120 +++++++++----- .../commonfns/test_smoothstep.cpp | 123 +++++++++----- test_conformance/commonfns/test_step.cpp | 61 ++++--- test_conformance/commonfns/test_unary_fn.cpp | 155 +++++++++++------- .../relationals/test_comparisons_fp.cpp | 33 +--- 10 files changed, 526 insertions(+), 258 deletions(-) diff --git a/test_common/harness/stringHelpers.h b/test_common/harness/stringHelpers.h index 3f6bf64db4..a02624d6da 100644 --- a/test_common/harness/stringHelpers.h +++ b/test_common/harness/stringHelpers.h @@ -14,8 +14,8 @@ // limitations under the License. // -#ifndef BASIC_UTILS_H -#define BASIC_UTILS_H +#ifndef STRING_HELPERS_H +#define STRING_HELPERS_H #include #include @@ -38,4 +38,4 @@ inline std::string str_sprintf(const std::string &str, Args... args) return std::string(buffer.get(), buffer.get() + s - 1); } -#endif // BASIC_UTIL_H +#endif // STRING_HELPERS_H diff --git a/test_conformance/commonfns/main.cpp b/test_conformance/commonfns/main.cpp index 3e4b0b8e76..645d3f703c 100644 --- a/test_conformance/commonfns/main.cpp +++ b/test_conformance/commonfns/main.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -18,8 +18,10 @@ #include #include "procs.h" #include "test_base.h" +#include "harness/kernelHelpers.h" std::map BaseFunctionTest::type2name; +cl_half_rounding_mode BaseFunctionTest::halfRoundingMode = CL_HALF_RTE; int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount]; int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3}; @@ -45,17 +47,38 @@ test_definition test_list[] = { const int test_num = ARRAY_SIZE( test_list ); -int main(int argc, const char *argv[]) +test_status InitCL(cl_device_id device) { - initVecSizes(); - - if (BaseFunctionTest::type2name.empty()) + if (is_extension_available(device, "cl_khr_fp16")) { - BaseFunctionTest::type2name[sizeof(half)] = "half"; - BaseFunctionTest::type2name[sizeof(float)] = "float"; - BaseFunctionTest::type2name[sizeof(double)] = "double"; + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + BaseFunctionTest::halfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + BaseFunctionTest::halfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode"); + return TEST_FAIL; + } } - return runTestHarness(argc, argv, test_num, test_list, false, 0); + return TEST_PASS; } +int main(int argc, const char *argv[]) +{ + initVecSizes(); + + BaseFunctionTest::type2name[sizeof(half)] = "half"; + BaseFunctionTest::type2name[sizeof(float)] = "float"; + BaseFunctionTest::type2name[sizeof(double)] = "double"; + + return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0, + InitCL); +} diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h index 4429104263..be36ed264b 100644 --- a/test_conformance/commonfns/test_base.h +++ b/test_conformance/commonfns/test_base.h @@ -19,27 +19,23 @@ #include #include #include +#include #include #include -#include "harness/deviceInfo.h" #include "harness/testHarness.h" #include "harness/typeWrappers.h" - template using VerifyFuncBinary = int (*)(const T *const, const T *const, const T *const, const int num, const int vs, const int vp); - template using VerifyFuncUnary = int (*)(const T *const, const T *const, const int num); - using half = cl_half; - struct BaseFunctionTest { BaseFunctionTest(cl_device_id device, cl_context context, @@ -61,9 +57,9 @@ struct BaseFunctionTest bool vecParam; static std::map type2name; + static cl_half_rounding_mode halfRoundingMode; }; - struct MinTest : BaseFunctionTest { MinTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -74,7 +70,6 @@ struct MinTest : BaseFunctionTest cl_int Run() override; }; - struct MaxTest : BaseFunctionTest { MaxTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -85,7 +80,6 @@ struct MaxTest : BaseFunctionTest cl_int Run() override; }; - struct ClampTest : BaseFunctionTest { ClampTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -96,7 +90,6 @@ struct ClampTest : BaseFunctionTest cl_int Run() override; }; - struct DegreesTest : BaseFunctionTest { DegreesTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -107,7 +100,6 @@ struct DegreesTest : BaseFunctionTest cl_int Run() override; }; - struct RadiansTest : BaseFunctionTest { RadiansTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -118,7 +110,6 @@ struct RadiansTest : BaseFunctionTest cl_int Run() override; }; - struct SignTest : BaseFunctionTest { SignTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -129,7 +120,6 @@ struct SignTest : BaseFunctionTest cl_int Run() override; }; - struct SmoothstepTest : BaseFunctionTest { SmoothstepTest(cl_device_id device, cl_context context, @@ -141,7 +131,6 @@ struct SmoothstepTest : BaseFunctionTest cl_int Run() override; }; - struct StepTest : BaseFunctionTest { StepTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -152,7 +141,6 @@ struct StepTest : BaseFunctionTest cl_int Run() override; }; - struct MixTest : BaseFunctionTest { MixTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -163,19 +151,71 @@ struct MixTest : BaseFunctionTest cl_int Run() override; }; +template float UlpFn(const T &val, const double &r) +{ + if (std::is_same::value) + { + return Ulp_Error_Half(val, r); + } + else if (std::is_same::value) + { + return Ulp_Error(val, r); + } + else if (std::is_same::value) + { + return Ulp_Error_Double(val, r); + } + else + { + log_error("UlpFn: unsupported data type\n"); + } + + return -1.f; // wrong val +} + +template inline double conv_to_dbl(const T &val) +{ + if (std::is_same::value) + return (double)cl_half_to_float(val); + else + return (double)val; +} -template -std::string string_format(const std::string &format, Args... args) +template inline double conv_to_flt(const T &val) { - int sformat = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; - if (sformat <= 0) - throw std::runtime_error("string_format: string processing error."); - auto format_size = static_cast(sformat); - std::unique_ptr buffer(new char[format_size]); - std::snprintf(buffer.get(), format_size, format.c_str(), args...); - return std::string(buffer.get(), buffer.get() + format_size - 1); + if (std::is_same::value) + return (float)cl_half_to_float(val); + else + return (float)val; } +template inline half conv_to_half(const T &val) +{ + if (std::is_floating_point::value) + return cl_half_from_float(val, BaseFunctionTest::halfRoundingMode); + return 0; +} + +template bool isfinite_fp(const T &v) +{ + if (std::is_same::value) + { + // Extract FP16 exponent and mantissa + uint16_t h_exp = (((half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = ((half)v) & 0x3FF; + + // !Inf test + return !(h_exp == 0x1F && h_mant == 0); + } + else + { +#if !defined(_WIN32) + return std::isfinite(v); +#else + return isfinite(v); +#endif + } +} template int MakeAndRunTest(cl_device_id device, cl_context context, diff --git a/test_conformance/commonfns/test_binary_fn.cpp b/test_conformance/commonfns/test_binary_fn.cpp index 1eb12f730f..a6c75647d0 100644 --- a/test_conformance/commonfns/test_binary_fn.cpp +++ b/test_conformance/commonfns/test_binary_fn.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -22,6 +22,7 @@ #include "harness/deviceInfo.h" #include "harness/typeWrappers.h" +#include "harness/stringHelpers.h" #include "procs.h" #include "test_base.h" @@ -53,7 +54,6 @@ const char *binary_fn_code_pattern_v3_scalar = " vstore3(%s(vload3(tid,x), y[tid] ), tid, dst);\n" "}\n"; - template int test_binary_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, @@ -105,6 +105,16 @@ int test_binary_fn(cl_device_id device, cl_context context, input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d); } } + else if (std::is_same::value) + { + const float fval = CL_HALF_MAX; + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (int j = 0; j < num_elements; j++) + { + input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d)); + } + } for (i = 0; i < 2; i++) { @@ -125,22 +135,22 @@ int test_binary_fn(cl_device_id device, cl_context context, { std::string str = binary_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), fnName.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), fnName.c_str()); } else { std::string str = binary_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), fnName.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), fnName.c_str()); } } else { // do regular std::string str = binary_fn_code_pattern; - kernelSource = string_format( + kernelSource = str_sprintf( str, pragma_str.c_str(), tname.c_str(), vecSizeNames[i], tname.c_str(), vecSecParam ? vecSizeNames[i] : "", tname.c_str(), vecSizeNames[i], fnName.c_str()); @@ -203,13 +213,20 @@ int max_verify(const T* const x, const T* const y, const T* const out, { int k = i * vecSize + j; int l = (k * vecParam + i * (1 - vecParam)); - T v = (x[k] < y[l]) ? y[l] : x[k]; + T v = (conv_to_dbl(x[k]) < conv_to_dbl(y[l])) ? y[l] : x[k]; if (v != out[k]) { - log_error( - "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is " - "vector %d, element %d, for vector size %d)\n", - k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); + if (std::is_same::value) + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k, + conv_to_flt(out[k]), v, k, i, j, vecSize); + else + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); return -1; } } @@ -227,13 +244,20 @@ int min_verify(const T* const x, const T* const y, const T* const out, { int k = i * vecSize + j; int l = (k * vecParam + i * (1 - vecParam)); - T v = (x[k] > y[l]) ? y[l] : x[k]; + T v = (conv_to_dbl(x[k]) > conv_to_dbl(y[l])) ? y[l] : x[k]; if (v != out[k]) { - log_error( - "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is " - "vector %d, element %d, for vector size %d)\n", - k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); + if (std::is_same::value) + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k, + conv_to_flt(out[k]), v, k, i, j, vecSize); + else + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); return -1; } } @@ -246,6 +270,13 @@ int min_verify(const T* const x, const T* const y, const T* const out, cl_int MaxTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, + max_verify); + test_error(error, "MaxTest::Run failed"); + } error = test_binary_fn(device, context, queue, num_elems, fnName.c_str(), vecParam, max_verify); @@ -265,6 +296,13 @@ cl_int MaxTest::Run() cl_int MinTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, + min_verify); + test_error(error, "MinTest::Run failed"); + } error = test_binary_fn(device, context, queue, num_elems, fnName.c_str(), vecParam, min_verify); diff --git a/test_conformance/commonfns/test_clamp.cpp b/test_conformance/commonfns/test_clamp.cpp index 0e96fb6027..1bf4067705 100644 --- a/test_conformance/commonfns/test_clamp.cpp +++ b/test_conformance/commonfns/test_clamp.cpp @@ -26,12 +26,10 @@ #include "procs.h" #include "test_base.h" - #ifndef M_PI #define M_PI 3.14159265358979323846264338327950288 #endif - #define CLAMP_KERNEL(type) \ const char *clamp_##type##_kernel_code = EMIT_PRAGMA_DIRECTIVE \ "__kernel void test_clamp(__global " #type " *x, __global " #type \ @@ -64,6 +62,14 @@ "vload3(tid,maxval)), tid, dst);\n" \ "}\n"; +#define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" +CLAMP_KERNEL(half) +CLAMP_KERNEL_V(half, 2) +CLAMP_KERNEL_V(half, 4) +CLAMP_KERNEL_V(half, 8) +CLAMP_KERNEL_V(half, 16) +CLAMP_KERNEL_V3(half, 3) +#undef EMIT_PRAGMA_DIRECTIVE #define EMIT_PRAGMA_DIRECTIVE " " CLAMP_KERNEL(float) @@ -83,6 +89,10 @@ CLAMP_KERNEL_V(double, 16) CLAMP_KERNEL_V3(double, 3) #undef EMIT_PRAGMA_DIRECTIVE +const char *clamp_half_codes[] = { + clamp_half_kernel_code, clamp_half2_kernel_code, clamp_half4_kernel_code, + clamp_half8_kernel_code, clamp_half16_kernel_code, clamp_half3_kernel_code +}; const char *clamp_float_codes[] = { clamp_float_kernel_code, clamp_float2_kernel_code, clamp_float4_kernel_code, clamp_float8_kernel_code, @@ -96,21 +106,42 @@ const char *clamp_double_codes[] = { namespace { - template int verify_clamp(const T *const x, const T *const minval, const T *const maxval, const T *const outptr, int n) { - T t; - for (int i = 0; i < n; i++) + if (std::is_same::value) + { + float t; + for (int i = 0; i < n; i++) + { + t = std::min( + std::max(cl_half_to_float(x[i]), cl_half_to_float(minval[i])), + cl_half_to_float(maxval[i])); + if (t != cl_half_to_float(outptr[i])) + { + log_error( + "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", + i, cl_half_to_float(x[i]), cl_half_to_float(minval[i]), + cl_half_to_float(maxval[i]), t, + cl_half_to_float(outptr[i])); + return -1; + } + } + } + else { - t = std::min(std::max(x[i], minval[i]), maxval[i]); - if (t != outptr[i]) + T t; + for (int i = 0; i < n; i++) { - log_error( - "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, - x[i], minval[i], maxval[i], t, outptr[i]); - return -1; + t = std::min(std::max(x[i], minval[i]), maxval[i]); + if (t != outptr[i]) + { + log_error( + "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", + i, x[i], minval[i], maxval[i], t, outptr[i]); + return -1; + } } } @@ -118,7 +149,6 @@ int verify_clamp(const T *const x, const T *const minval, const T *const maxval, } } - template int test_clamp_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) @@ -169,6 +199,17 @@ int test_clamp_fn(cl_device_id device, cl_context context, input_ptr[2][j] = get_random_double(input_ptr[1][j], 0x20000000, d); } } + else if (std::is_same::value) + { + const float fval = CL_HALF_MAX; + for (j = 0; j < num_elements; j++) + { + input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[2][j] = conv_to_half( + get_random_float(conv_to_flt(input_ptr[1][j]), fval, d)); + } + } for (i = 0; i < 3; i++) { @@ -194,9 +235,16 @@ int test_clamp_fn(cl_device_id device, cl_context context, "test_clamp"); test_error(err, "Unable to create kernel"); } + else if (std::is_same::value) + { + err = create_single_kernel_helper( + context, &programs[i], &kernels[i], 1, &clamp_half_codes[i], + "test_clamp"); + test_error(err, "Unable to create kernel"); + } - log_info("Just made a program for float, i=%d, size=%d, in slot %d\n", - i, g_arrVecSizes[i], i); + log_info("Just made a program for %s, i=%d, size=%d, in slot %d\n", + tname.c_str(), i, g_arrVecSizes[i], i); fflush(stdout); for (j = 0; j < 4; j++) @@ -239,10 +287,14 @@ int test_clamp_fn(cl_device_id device, cl_context context, return err; } - cl_int ClampTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_clamp_fn(device, context, queue, num_elems); + test_error(error, "ClampTest::Run failed"); + } error = test_clamp_fn(device, context, queue, num_elems); test_error(error, "ClampTest::Run failed"); @@ -256,7 +308,6 @@ cl_int ClampTest::Run() return error; } - int test_clamp(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp index 92c1010050..2a06e43df6 100644 --- a/test_conformance/commonfns/test_mix.cpp +++ b/test_conformance/commonfns/test_mix.cpp @@ -18,6 +18,8 @@ #include #include +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" @@ -52,33 +54,42 @@ const char *mix_fn_code_pattern_v3_scalar = " vstore3(mix(vload3(tid, x), vload3(tid, y), a[tid]), tid, dst);\n" "}\n"; - #define MAX_ERR 1e-3 namespace { - template int verify_mix(const T *const inptrX, const T *const inptrY, const T *const inptrA, const T *const outptr, const int n, const int veclen, const bool vecParam) { - T r; - float delta = 0.0f; + double r, o; + float delta = 0.f, max_delta = 0.f; int i; if (vecParam) { for (i = 0; i < n * veclen; i++) { - r = inptrX[i] + ((inptrY[i] - inptrX[i]) * inptrA[i]); - delta = fabs(double(r - outptr[i])) / r; - if (delta > MAX_ERR) + r = conv_to_dbl(inptrX[i]) + + ((conv_to_dbl(inptrY[i]) - conv_to_dbl(inptrX[i])) + * conv_to_dbl(inptrA[i])); + + o = conv_to_dbl(outptr[i]); + delta = fabs(double(r - o)) / r; + if (!std::is_same::value) + { + if (delta > MAX_ERR) + { + log_error("%d) verification error: mix(%a, %a, %a) = *%a " + "vs. %a\n", + i, inptrX[i], inptrY[i], inptrA[i], r, outptr[i]); + return -1; + } + } + else { - log_error( - "%d) verification error: mix(%a, %a, %a) = *%a vs. %a\n", i, - inptrX[i], inptrY[i], inptrA[i], r, outptr[i]); - return -1; + max_delta = std::max(max_delta, delta); } } } @@ -90,25 +101,40 @@ int verify_mix(const T *const inptrX, const T *const inptrY, int vi = i * veclen; for (int j = 0; j < veclen; ++j, ++vi) { - r = inptrX[vi] + ((inptrY[vi] - inptrX[vi]) * inptrA[i]); - delta = fabs(double(r - outptr[vi])) / r; - if (delta > MAX_ERR) + r = conv_to_dbl(inptrX[vi]) + + ((conv_to_dbl(inptrY[vi]) - conv_to_dbl(inptrX[vi])) + * conv_to_dbl(inptrA[i])); + delta = fabs(double(r - conv_to_dbl(outptr[vi]))) / r; + if (!std::is_same::value) { - log_error("{%d, element %d}) verification error: mix(%a, " - "%a, %a) = *%a vs. %a\n", - ii, j, inptrX[vi], inptrY[vi], inptrA[i], r, - outptr[vi]); - return -1; + if (delta > MAX_ERR) + { + log_error( + "{%d, element %d}) verification error: mix(%a, " + "%a, %a) = *%a vs. %a\n", + ii, j, inptrX[vi], inptrY[vi], inptrA[i], r, + outptr[vi]); + return -1; + } + } + else + { + max_delta = std::max(max_delta, delta); } } } } + // due to the fact that accuracy of mix for cl_khr_fp16 is implementation + // defined this test only reports maximum error without testing maximum + // error threshold + if (std::is_same::value) + log_error("mix half verification result, max delta: %a\n", max_delta); + return 0; } } // namespace - template int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, bool vecParam) @@ -120,7 +146,7 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, std::vector kernels; int err, i; - MTdataHolder d = MTdataHolder(gRandomSeed); + MTdataHolder d(gRandomSeed); assert(BaseFunctionTest::type2name.find(sizeof(T)) != BaseFunctionTest::type2name.end()); @@ -142,19 +168,32 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, test_error(err, "clCreateBuffer failed"); } - for (i = 0; i < num_elements; i++) - { - input_ptr[0][i] = (T)genrand_real1(d); - input_ptr[1][i] = (T)genrand_real1(d); - input_ptr[2][i] = (T)genrand_real1(d); - } - std::string pragma_str; if (std::is_same::value) { pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; } + if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half((float)genrand_real1(d)); + input_ptr[1][i] = conv_to_half((float)genrand_real1(d)); + input_ptr[2][i] = conv_to_half((float)genrand_real1(d)); + } + } + else + { + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = (T)genrand_real1(d); + input_ptr[1][i] = (T)genrand_real1(d); + input_ptr[2][i] = (T)genrand_real1(d); + } + } + for (i = 0; i < 3; i++) { err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, @@ -164,7 +203,6 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, } char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; - for (i = 0; i < kTotalVecCount; i++) { std::string kernelSource; @@ -174,15 +212,15 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, { std::string str = mix_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } else { std::string str = mix_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } } else @@ -190,10 +228,10 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, // regular path std::string str = mix_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i], - tname.c_str(), vecParam ? vecSizeNames[i] : "", - tname.c_str(), vecSizeNames[i]); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i], + tname.c_str(), vecParam ? vecSizeNames[i] : "", + tname.c_str(), vecSizeNames[i]); } const char *programPtr = kernelSource.c_str(); err = @@ -242,10 +280,14 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, return err; } - cl_int MixTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_mix_fn(device, context, queue, num_elems, vecParam); + test_error(error, "MixTest::Run failed"); + } error = test_mix_fn(device, context, queue, num_elems, vecParam); test_error(error, "MixTest::Run failed"); @@ -260,7 +302,6 @@ cl_int MixTest::Run() return error; } - int test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -268,7 +309,6 @@ int test_mix(cl_device_id device, cl_context context, cl_command_queue queue, true); } - int test_mixf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp index 31948d3fe1..5afc2d0f22 100644 --- a/test_conformance/commonfns/test_smoothstep.cpp +++ b/test_conformance/commonfns/test_smoothstep.cpp @@ -18,10 +18,11 @@ #include #include +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" - const char *smoothstep_fn_code_pattern = "%s\n" /* optional pragma */ "__kernel void test_fn(__global %s%s *e0, __global %s%s *e1, __global %s%s " @@ -53,38 +54,43 @@ const char *smoothstep_fn_code_pattern_v3_scalar = " vstore3(smoothstep(e0[tid], e1[tid], vload3(tid,x)), tid, dst);\n" "}\n"; - #define MAX_ERR (1e-5f) namespace { - template int verify_smoothstep(const T *const edge0, const T *const edge1, const T *const x, const T *const outptr, const int n, const int veclen, const bool vecParam) { - T r, t; - float delta = 0; + double r, t; + float delta = 0, max_delta = 0; if (vecParam) { for (int i = 0; i < n * veclen; i++) { - t = (x[i] - edge0[i]) / (edge1[i] - edge0[i]); - if (t < 0.0f) - t = 0.0f; - else if (t > 1.0f) - t = 1.0f; - r = t * t * (3.0f - 2.0f * t); - delta = (float)fabs(r - outptr[i]); - if (delta > MAX_ERR) + t = (conv_to_dbl(x[i]) - conv_to_dbl(edge0[i])) + / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i])); + if (t < 0.0) + t = 0.0; + else if (t > 1.0) + t = 1.0; + r = t * t * (3.0 - 2.0 * t); + delta = (float)fabs(r - conv_to_dbl(outptr[i])); + if (!std::is_same::value) { - log_error("%d) verification error: smoothstep(%a, %a, %a) = " - "*%a vs. %a\n", - i, x[i], edge0[i], edge1[i], r, outptr[i]); - return -1; + if (delta > MAX_ERR) + { + log_error( + "%d) verification error: smoothstep(%a, %a, %a) = " + "*%a vs. %a\n", + i, x[i], edge0[i], edge1[i], r, outptr[i]); + return -1; + } } + else + max_delta = std::max(max_delta, delta); } } else @@ -95,32 +101,48 @@ int verify_smoothstep(const T *const edge0, const T *const edge1, int vi = i * veclen; for (int j = 0; j < veclen; ++j, ++vi) { - t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]); - if (t < 0.0f) - t = 0.0f; - else if (t > 1.0f) - t = 1.0f; - r = t * t * (3.0f - 2.0f * t); - delta = (float)fabs(r - outptr[vi]); - if (delta > MAX_ERR) + t = (conv_to_dbl(x[vi]) - conv_to_dbl(edge0[i])) + / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i])); + if (t < 0.0) + t = 0.0; + else if (t > 1.0) + t = 1.0; + r = t * t * (3.0 - 2.0 * t); + delta = (float)fabs(r - conv_to_dbl(outptr[vi])); + + if (!std::is_same::value) { - log_error("{%d, element %d}) verification error: " - "smoothstep(%a, %a, %a) = *%a vs. %a\n", - ii, j, x[vi], edge0[i], edge1[i], r, outptr[vi]); - return -1; + if (delta > MAX_ERR) + { + log_error("{%d, element %d}) verification error: " + "smoothstep(%a, %a, %a) = *%a vs. %a\n", + ii, j, x[vi], edge0[i], edge1[i], r, + outptr[vi]); + return -1; + } } + else + max_delta = std::max(max_delta, delta); } } } + + // due to the fact that accuracy of smoothstep for cl_khr_fp16 is + // implementation defined this test only reports maximum error without + // testing maximum error threshold + if (std::is_same::value) + log_error("smoothstep half verification result, max delta: %a\n", + max_delta); + return 0; } } - template int test_smoothstep_fn(cl_device_id device, cl_context context, - cl_command_queue queue, int n_elems, bool vecParam) + cl_command_queue queue, const int n_elems, + const bool vecParam) { clMemWrapper streams[4]; std::vector input_ptr[3], output_ptr; @@ -170,6 +192,17 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, input_ptr[2][i] = get_random_double(-0x20000000, 0x20000000, d); } } + else if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half(get_random_float(-65503, 65503, d)); + input_ptr[1][i] = conv_to_half( + get_random_float(conv_to_flt(input_ptr[0][i]), 65503, d)); + input_ptr[2][i] = conv_to_half(get_random_float(-65503, 65503, d)); + } + } for (i = 0; i < 3; i++) { @@ -179,7 +212,7 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, test_error(err, "Unable to write input buffer"); } - char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; for (i = 0; i < kTotalVecCount; i++) { @@ -190,15 +223,15 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, { std::string str = smoothstep_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } else { std::string str = smoothstep_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } } else @@ -206,11 +239,12 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, // regular path std::string str = smoothstep_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i]); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i]); } + const char *programPtr = kernelSource.c_str(); err = create_single_kernel_helper(context, &programs[i], &kernels[i], 1, @@ -259,10 +293,15 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, return err; } - cl_int SmoothstepTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_smoothstep_fn(device, context, queue, num_elems, + vecParam); + test_error(error, "SmoothstepTest::Run failed"); + } error = test_smoothstep_fn(device, context, queue, num_elems, vecParam); @@ -278,7 +317,6 @@ cl_int SmoothstepTest::Run() return error; } - int test_smoothstep(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -286,7 +324,6 @@ int test_smoothstep(cl_device_id device, cl_context context, "smoothstep", true); } - int test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp index dc91766e90..1cfa96eabd 100644 --- a/test_conformance/commonfns/test_step.cpp +++ b/test_conformance/commonfns/test_step.cpp @@ -18,10 +18,11 @@ #include #include +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" - const char *step_fn_code_pattern = "%s\n" /* optional pragma */ "__kernel void test_fn(__global %s%s *edge, " "__global %s%s *x, __global %s%s *dst)\n" @@ -48,7 +49,6 @@ const char *step_fn_code_pattern_v3_scalar = " vstore3(step(edge[tid], vload3(tid,x)), tid, dst);\n" "}\n"; - namespace { template @@ -62,8 +62,8 @@ int verify_step(const T *const inptrA, const T *const inptrB, { for (int i = 0; i < n * veclen; i++) { - r = (inptrB[i] < inptrA[i]) ? 0.0 : 1.0; - if (r != outptr[i]) return -1; + r = (conv_to_dbl(inptrB[i]) < conv_to_dbl(inptrA[i])) ? 0.0 : 1.0; + if (r != conv_to_dbl(outptr[i])) return -1; } } else @@ -73,24 +73,31 @@ int verify_step(const T *const inptrA, const T *const inptrB, int ii = i / veclen; for (int j = 0; j < veclen && i < n; ++j, ++i) { - r = (inptrB[i] < inptrA[ii]) ? 0.0f : 1.0f; - if (r != outptr[i]) + r = (conv_to_dbl(inptrB[i]) < conv_to_dbl(inptrA[ii])) ? 0.0f + : 1.0f; + if (r != conv_to_dbl(outptr[i])) { - log_error("Failure @ {%d, element %d}: step(%a,%a) -> *%a " - "vs %a\n", - ii, j, inptrA[ii], inptrB[i], r, outptr[i]); + if (std::is_same::value) + log_error( + "Failure @ {%d, element %d}: step(%a,%a) -> *%a " + "vs %a\n", + ii, j, conv_to_flt(inptrA[ii]), + conv_to_flt(inptrB[i]), r, conv_to_flt(outptr[i])); + else + log_error( + "Failure @ {%d, element %d}: step(%a,%a) -> *%a " + "vs %a\n", + ii, j, inptrA[ii], inptrB[i], r, outptr[i]); return -1; } } } } - return 0; } } - template int test_step_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, bool vecParam) @@ -140,6 +147,16 @@ int test_step_fn(cl_device_id device, cl_context context, input_ptr[1][i] = get_random_double(-0x40000000, 0x40000000, d); } } + else if (std::is_same::value) + { + const float fval = CL_HALF_MAX; + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][i] = conv_to_half(get_random_float(-fval, fval, d)); + } + } for (i = 0; i < 2; i++) { @@ -160,15 +177,15 @@ int test_step_fn(cl_device_id device, cl_context context, { std::string str = step_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str()); } else { std::string str = step_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str()); } } else @@ -176,9 +193,9 @@ int test_step_fn(cl_device_id device, cl_context context, // regular path std::string str = step_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i]); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i]); } const char *programPtr = kernelSource.c_str(); err = @@ -229,10 +246,14 @@ int test_step_fn(cl_device_id device, cl_context context, return err; } - cl_int StepTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_step_fn(device, context, queue, num_elems, vecParam); + test_error(error, "StepTest::Run failed"); + } error = test_step_fn(device, context, queue, num_elems, vecParam); test_error(error, "StepTest::Run failed"); @@ -247,7 +268,6 @@ cl_int StepTest::Run() return error; } - int test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -255,7 +275,6 @@ int test_step(cl_device_id device, cl_context context, cl_command_queue queue, true); } - int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_unary_fn.cpp b/test_conformance/commonfns/test_unary_fn.cpp index fed4389d9c..91b5c215bf 100644 --- a/test_conformance/commonfns/test_unary_fn.cpp +++ b/test_conformance/commonfns/test_unary_fn.cpp @@ -21,6 +21,7 @@ #include #include "harness/deviceInfo.h" +#include "harness/stringHelpers.h" #include "harness/typeWrappers.h" #include "procs.h" @@ -30,7 +31,6 @@ #define M_PI 3.14159265358979323846264338327950288 #endif - // clang-format off const char *unary_fn_code_pattern = "%s\n" /* optional pragma */ @@ -51,23 +51,10 @@ const char *unary_fn_code_pattern_v3 = "}\n"; // clang-format on - #define MAX_ERR 2.0f namespace { - -template float UlpFn(const T &val, const double &r) -{ - if (std::is_same::value) - return Ulp_Error_Double(val, r); - else if (std::is_same::value) - return Ulp_Error(val, r); - else if (std::is_same::value) - return Ulp_Error(val, r); -} - - template int verify_degrees(const T *const inptr, const T *const outptr, int n) { @@ -77,7 +64,11 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n) for (int i = 0, j = 0; i < n; i++, j++) { - r = (180.0 / M_PI) * inptr[i]; + r = (180.0 / M_PI) * conv_to_dbl(inptr[i]); + + if (std::is_same::value) + if (!isfinite_fp(conv_to_half(r)) && !isfinite_fp(outptr[i])) + continue; error = UlpFn(outptr[i], r); @@ -88,21 +79,32 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n) max_val = r; if (fabsf(error) > MAX_ERR) { - log_error("%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", - i, inptr[i], r, outptr[i], r, outptr[i], error); + if (std::is_same::value) + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + conv_to_flt(inptr[i]), r, conv_to_flt(outptr[i]), r, + conv_to_flt(outptr[i]), error); + else + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + inptr[i], r, outptr[i], r, outptr[i], error); return 1; } } } - log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", - max_error, max_index, max_val, outptr[max_index], max_val, - outptr[max_index]); + if (std::is_same::value) + log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, conv_to_flt(outptr[max_index]), + max_val, conv_to_flt(outptr[max_index])); + else + log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, outptr[max_index], max_val, + outptr[max_index]); return 0; } - template int verify_radians(const T *const inptr, const T *const outptr, int n) { @@ -112,8 +114,14 @@ int verify_radians(const T *const inptr, const T *const outptr, int n) for (int i = 0, j = 0; i < n; i++, j++) { - r = (M_PI / 180.0) * inptr[i]; - error = Ulp_Error(outptr[i], r); + r = (M_PI / 180.0) * conv_to_dbl(inptr[i]); + + if (std::is_same::value) + if (!isfinite_fp(conv_to_half(r)) && !isfinite_fp(outptr[i])) + continue; + + error = UlpFn(outptr[i], r); + if (fabsf(error) > max_error) { max_error = error; @@ -121,41 +129,51 @@ int verify_radians(const T *const inptr, const T *const outptr, int n) max_val = r; if (fabsf(error) > MAX_ERR) { - log_error("%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", - i, inptr[i], r, outptr[i], r, outptr[i], error); + if (std::is_same::value) + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + conv_to_flt(inptr[i]), r, conv_to_flt(outptr[i]), r, + conv_to_flt(outptr[i]), error); + else + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + inptr[i], r, outptr[i], r, outptr[i], error); return 1; } } } - log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", - max_error, max_index, max_val, outptr[max_index], max_val, - outptr[max_index]); + if (std::is_same::value) + log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, conv_to_flt(outptr[max_index]), + max_val, conv_to_flt(outptr[max_index])); + else + log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, outptr[max_index], max_val, + outptr[max_index]); return 0; } - template int verify_sign(const T *const inptr, const T *const outptr, int n) { - T r = 0; + double r = 0; for (int i = 0; i < n; i++) { - if (inptr[i] > 0.0f) + if (conv_to_dbl(inptr[i]) > 0.0f) r = 1.0; - else if (inptr[i] < 0.0f) + else if (conv_to_dbl(inptr[i]) < 0.0f) r = -1.0; else r = 0.0; - if (r != outptr[i]) return -1; + if (r != conv_to_dbl(outptr[i])) return -1; } return 0; } } - template int test_unary_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, @@ -207,33 +225,38 @@ int test_unary_fn(cl_device_id device, cl_context context, get_random_double(-100000.0 * M_PI, 100000.0 * M_PI, d); } } + else if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (int j = 0; j < num_elements; j++) + { + input_ptr[j] = conv_to_half(get_random_float( + (float)(-10000.f * M_PI), (float)(10000.f * M_PI), d)); + } + } err = clEnqueueWriteBuffer(queue, streams[0], true, 0, sizeof(T) * num_elements, &input_ptr.front(), 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueWriteBuffer failed\n"); - return -1; - } + test_error(err, "clEnqueueWriteBuffer failed\n"); for (i = 0; i < kTotalVecCount; i++) { std::string kernelSource; - char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; if (i >= kVectorSizeCount) { std::string str = unary_fn_code_pattern_v3; - kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), fnName.c_str()); + kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), fnName.c_str()); } else { std::string str = unary_fn_code_pattern; - kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(), - vecSizeNames[i], tname.c_str(), - vecSizeNames[i], fnName.c_str()); + kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecSizeNames[i], tname.c_str(), + vecSizeNames[i], fnName.c_str()); } /* Create kernels */ @@ -290,11 +313,18 @@ int test_unary_fn(cl_device_id device, cl_context context, return err; } - cl_int DegreesTest::Run() { - cl_int error = test_unary_fn(device, context, queue, num_elems, - fnName.c_str(), verify_degrees); + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_degrees); + test_error(error, "DegreesTest::Run failed"); + } + + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_degrees); test_error(error, "DegreesTest::Run failed"); if (is_extension_available(device, "cl_khr_fp64")) @@ -307,11 +337,18 @@ cl_int DegreesTest::Run() return error; } - cl_int RadiansTest::Run() { - cl_int error = test_unary_fn(device, context, queue, num_elems, - fnName.c_str(), verify_radians); + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_radians); + test_error(error, "RadiansTest::Run failed"); + } + + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_radians); test_error(error, "RadiansTest::Run failed"); if (is_extension_available(device, "cl_khr_fp64")) @@ -324,11 +361,18 @@ cl_int RadiansTest::Run() return error; } - cl_int SignTest::Run() { - cl_int error = test_unary_fn(device, context, queue, num_elems, - fnName.c_str(), verify_sign); + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_sign); + test_error(error, "SignTest::Run failed"); + } + + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_sign); test_error(error, "SignTest::Run failed"); if (is_extension_available(device, "cl_khr_fp64")) @@ -341,7 +385,6 @@ cl_int SignTest::Run() return error; } - int test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -349,7 +392,6 @@ int test_degrees(cl_device_id device, cl_context context, "degrees"); } - int test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -357,7 +399,6 @@ int test_radians(cl_device_id device, cl_context context, "radians"); } - int test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/relationals/test_comparisons_fp.cpp b/test_conformance/relationals/test_comparisons_fp.cpp index c3d8f67a37..73ff3dd9ed 100644 --- a/test_conformance/relationals/test_comparisons_fp.cpp +++ b/test_conformance/relationals/test_comparisons_fp.cpp @@ -22,6 +22,8 @@ #include #include +#include "harness/stringHelpers.h" + #include #include "test_comparisons_fp.h" @@ -83,29 +85,6 @@ extension, // clang-format on -std::string concat_kernel(const char* sstr[], int num) -{ - std::string res; - for (int i = 0; i < num; i++) res += std::string(sstr[i]); - return res; -} - -template -std::string string_format(const std::string& format, Args... args) -{ - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) - + 1; // Extra space for '\0' - if (size_s <= 0) - { - throw std::runtime_error("Error during formatting."); - } - auto size = static_cast(size_s); - std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); - return std::string(buf.get(), - buf.get() + size - 1); // We don't want the '\0' inside -} - template bool verify(const T& A, const T& B) { return F()(A, B); @@ -226,14 +205,14 @@ int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize, auto str = concat_kernel(equivTestKerPat_3, sizeof(equivTestKerPat_3) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str(), opName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str(), opName.c_str()); } else { auto str = concat_kernel(equivTestKerPatLessGreater_3, sizeof(equivTestKerPatLessGreater_3) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str()); } } else @@ -243,14 +222,14 @@ int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize, auto str = concat_kernel(equivTestKernPat, sizeof(equivTestKernPat) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str(), opName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str(), opName.c_str()); } else { auto str = concat_kernel(equivTestKernPatLessGreater, sizeof(equivTestKernPatLessGreater) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str()); } } From 43c244f01de00e4d3beb63c4b9167eccfbdeaf77 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 27 Jun 2023 17:42:56 +0200 Subject: [PATCH 12/20] Added cl_khr_fp16 extension support for test vector_times_scalar from spirv_new (#1757) * Added cl_khr_fp16 support for vector_times_scalar from spirv_new (issue #142, spirv_new) * Logging correction --- .../vector_times_scalar_half.spvasm32 | 46 +++++++++++++++++ .../vector_times_scalar_half.spvasm64 | 50 +++++++++++++++++++ .../spirv_new/test_op_vector_times_scalar.cpp | 14 ++++++ test_conformance/spirv_new/types.hpp | 4 ++ 4 files changed, 114 insertions(+) create mode 100644 test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 create mode 100644 test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 new file mode 100644 index 0000000000..6fda7d8f18 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 @@ -0,0 +1,46 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 25 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID + OpName %res "res" + OpName %lhs "lhs" + OpName %rhs "rhs" + OpDecorate %5 FuncParamAttr NoCapture + %5 = OpDecorationGroup + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + OpGroupDecorate %5 %res %lhs %rhs + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %v4half = OpTypeVector %half 4 +%_ptr_CrossWorkgroup_v4half = OpTypePointer CrossWorkgroup %v4half + %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_half +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %15 + %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %rhs = OpFunctionParameter %_ptr_CrossWorkgroup_half + %16 = OpLabel + %17 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0 + %18 = OpCompositeExtract %uint %17 0 + %19 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %lhs %18 + %20 = OpLoad %v4half %19 Aligned 8 + %21 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %rhs %18 + %22 = OpLoad %half %21 Aligned 2 + %23 = OpVectorTimesScalar %v4half %20 %22 + %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %res %18 + OpStore %24 %23 Aligned 8 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 new file mode 100644 index 0000000000..fa2d522103 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 @@ -0,0 +1,50 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 28 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Int64 + OpCapability Float16 + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID + OpName %res "res" + OpName %lhs "lhs" + OpName %rhs "rhs" + OpDecorate %5 FuncParamAttr NoCapture + %5 = OpDecorationGroup + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + OpGroupDecorate %5 %res %lhs %rhs + %ulong = OpTypeInt 64 0 + %v3ulong = OpTypeVector %ulong 3 +%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong + %ulong_32 = OpConstant %ulong 32 + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %v4half = OpTypeVector %half 4 +%_ptr_CrossWorkgroup_v4half = OpTypePointer CrossWorkgroup %v4half + %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_half +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input + %1 = OpFunction %void None %16 + %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %rhs = OpFunctionParameter %_ptr_CrossWorkgroup_half + %17 = OpLabel + %18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0 + %19 = OpCompositeExtract %ulong %18 0 + %20 = OpShiftLeftLogical %ulong %19 %ulong_32 + %21 = OpShiftRightArithmetic %ulong %20 %ulong_32 + %22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %lhs %21 + %23 = OpLoad %v4half %22 Aligned 8 + %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %rhs %21 + %25 = OpLoad %half %24 Aligned 2 + %26 = OpVectorTimesScalar %v4half %23 %25 + %27 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %res %21 + OpStore %27 %26 Aligned 8 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp index 0859668cbb..0be4e8b71c 100644 --- a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp +++ b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp @@ -17,6 +17,8 @@ or Khronos Conformance Test Source License Agreement as executed between Khronos #include #include +using half = cl_half; + template int test_vector_times_scalar(cl_device_id deviceID, cl_context context, @@ -32,6 +34,16 @@ int test_vector_times_scalar(cl_device_id deviceID, } } + if (std::string(Tname).find("half") != std::string::npos) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info("Extension cl_khr_fp16 not supported; skipping half " + "tests.\n"); + return 0; + } + } + cl_int err = CL_SUCCESS; int num = (int)h_lhs.size(); size_t lhs_bytes = num * sizeof(Tv); @@ -171,5 +183,7 @@ int test_vector_times_scalar(cl_device_id deviceID, lhs, rhs); \ } + TEST_VECTOR_TIMES_SCALAR(float, 4) TEST_VECTOR_TIMES_SCALAR(double, 4) +TEST_VECTOR_TIMES_SCALAR(half, 4) diff --git a/test_conformance/spirv_new/types.hpp b/test_conformance/spirv_new/types.hpp index e7fceba0cd..27a45c5b06 100644 --- a/test_conformance/spirv_new/types.hpp +++ b/test_conformance/spirv_new/types.hpp @@ -43,6 +43,8 @@ VEC_NOT_EQ_FUNC(cl_float, 2) VEC_NOT_EQ_FUNC(cl_float, 4) VEC_NOT_EQ_FUNC(cl_double, 2) VEC_NOT_EQ_FUNC(cl_double, 4) +VEC_NOT_EQ_FUNC(cl_half, 2) +VEC_NOT_EQ_FUNC(cl_half, 4) template bool isNotEqual(const T &lhs, const T &rhs) @@ -109,6 +111,8 @@ GENRAND_REAL_FUNC(cl_float, 2) GENRAND_REAL_FUNC(cl_float, 4) GENRAND_REAL_FUNC(cl_double, 2) GENRAND_REAL_FUNC(cl_double, 4) +GENRAND_REAL_FUNC(cl_half, 2) +GENRAND_REAL_FUNC(cl_half, 4) template<> inline cl_half genrandReal(RandomSeed &seed) { From 73ead9da04c2983288799effaaa12dbd02ae321d Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 27 Jun 2023 17:43:44 +0200 Subject: [PATCH 13/20] Added cl_khr_fp16 extension support for test_op_negate from spirv_new (#1762) * Added cl_khr_fp16 extension support for test_op_negate from spirv_new (issue #142) * Added clang format fix --- .../spirv_new/spirv_asm/op_neg_half.spvasm32 | 35 ++++++++++++++++ .../spirv_new/spirv_asm/op_neg_half.spvasm64 | 39 ++++++++++++++++++ test_conformance/spirv_new/test_op_negate.cpp | 40 +++++++++++-------- test_conformance/spirv_new/types.hpp | 2 + 4 files changed, 100 insertions(+), 16 deletions(-) create mode 100644 test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 create mode 100644 test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 new file mode 100644 index 0000000000..4912718745 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 @@ -0,0 +1,35 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 17 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "op_neg_half" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_half +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %10 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_half + %11 = OpLabel + %12 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0 + %13 = OpCompositeExtract %uint %12 0 + %14 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %13 + %15 = OpLoad %half %14 + %16 = OpFNegate %half %15 + OpStore %14 %16 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 new file mode 100644 index 0000000000..9c7e3d6df4 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 @@ -0,0 +1,39 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 20 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Int64 + OpCapability Float16 + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "op_neg_half" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %ulong = OpTypeInt 64 0 + %v3ulong = OpTypeVector %ulong 3 +%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_half + %ulong_32 = OpConstant %ulong 32 +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input + %1 = OpFunction %void None %10 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_half + %12 = OpLabel + %13 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0 + %14 = OpCompositeExtract %ulong %13 0 + %15 = OpShiftLeftLogical %ulong %14 %ulong_32 + %16 = OpShiftRightArithmetic %ulong %15 %ulong_32 + %17 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %16 + %18 = OpLoad %half %17 + %19 = OpFNegate %half %18 + OpStore %17 %19 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/test_op_negate.cpp b/test_conformance/spirv_new/test_op_negate.cpp index e3dc1f349f..5009be9316 100644 --- a/test_conformance/spirv_new/test_op_negate.cpp +++ b/test_conformance/spirv_new/test_op_negate.cpp @@ -32,6 +32,15 @@ int test_negation(cl_device_id deviceID, return 0; } } + if (std::string(Tname).find("half") != std::string::npos) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info( + "Extension cl_khr_fp16 not supported; skipping half tests.\n"); + return 0; + } + } cl_int err = CL_SUCCESS; int num = (int)h_in.size(); @@ -73,29 +82,28 @@ int test_negation(cl_device_id deviceID, return 0; } -#define TEST_NEGATION(TYPE, Tv, OP, FUNC) \ - TEST_SPIRV_FUNC(OP##_##TYPE) \ - { \ - int num = 1 << 20; \ - std::vector in(num); \ - RandomSeed seed(gRandomSeed); \ - for (int i = 0; i < num; i++) { \ - in[i] = genrand(seed); \ - } \ - return test_negation(deviceID, \ - context, \ - queue, \ - #TYPE, \ - #OP, \ - in, FUNC); \ - } \ +#define TEST_NEGATION(TYPE, Tv, OP, FUNC) \ + TEST_SPIRV_FUNC(OP##_##TYPE) \ + { \ + int num = 1 << 20; \ + std::vector in(num); \ + RandomSeed seed(gRandomSeed); \ + for (int i = 0; i < num; i++) \ + { \ + in[i] = genrand(seed); \ + } \ + return test_negation(deviceID, context, queue, #TYPE, #OP, in, \ + FUNC); \ + } +#define TEST_NEG_HALF TEST_NEGATION(half, cl_half, op_neg, negOpHalf) #define TEST_NEG(TYPE) TEST_NEGATION(TYPE, cl_##TYPE, op_neg, negOp) #define TEST_NOT(TYPE) TEST_NEGATION(TYPE, cl_##TYPE, op_not, notOp) #define TEST_NEG_VEC(TYPE, N) TEST_NEGATION(TYPE##N, cl_##TYPE##N, op_neg, (negOpVec)) #define TEST_NOT_VEC(TYPE, N) TEST_NEGATION(TYPE##N, cl_##TYPE##N, op_not, (notOpVec)) +TEST_NEG_HALF TEST_NEG(float) TEST_NEG(double) TEST_NEG(int) diff --git a/test_conformance/spirv_new/types.hpp b/test_conformance/spirv_new/types.hpp index 27a45c5b06..939e6fa8c0 100644 --- a/test_conformance/spirv_new/types.hpp +++ b/test_conformance/spirv_new/types.hpp @@ -161,6 +161,8 @@ Tv negOp(Tv in) return -in; } +inline cl_half negOpHalf(cl_half v) { return v ^ 0x8000; } + template Tv notOp(Tv in) { From fee6d6bb6643f7f5e2b6dab46486c903e2a71680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Jastrz=C4=99bski?= Date: Tue, 27 Jun 2023 17:47:24 +0200 Subject: [PATCH 14/20] Command buffer re-enqueue testing. (#1738) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Command buffer re-enqueue testing. Signed-off-by: Paweł Jastrzębski * Remove reenqueue tests and add reenqueue to existing tests. Signed-off-by: Paweł Jastrzębski * Add re-enqueue for copy and barrier tests. Signed-off-by: Paweł Jastrzębski * Fix review comments. Applied review comments for command buffer re-enqueue testing: - Add second clEnqueueCommandBufferKHR for all tests - Reinitialise memory before second enqueue of command buffers - Add different patterns for second enqueue of command buffers Signed-off-by: Paweł Jastrzębski * Fix verification patterns for second enqueue tests. Signed-off-by: Paweł Jastrzębski * Reinitialise output memory for second command buffer re-enqueue. Signed-off-by: Paweł Jastrzębski * Fix cast for conversion from 'const cl_char' to 'const cl_uint. Signed-off-by: Paweł Jastrzębski * Fix compilation error for MSVC. Signed-off-by: Paweł Jastrzębski * Reinitialise in_mem and out_mem with zero. Signed-off-by: Paweł Jastrzębski * Fix cast for conversion from 'const cl_int' to 'const cl_uint'. Signed-off-by: Paweł Jastrzębski --------- Signed-off-by: Paweł Jastrzębski --- .../basic_command_buffer.cpp | 25 ++- .../command_buffer_test_barrier.cpp | 36 +++- .../command_buffer_test_copy.cpp | 196 +++++++++++++++--- .../command_buffer_test_fill.cpp | 68 ++++-- 4 files changed, 277 insertions(+), 48 deletions(-) diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp index 43734da0a5..6c02f9f788 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp @@ -201,14 +201,33 @@ struct BasicEnqueueTest : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(num_elements); + std::vector output_data_1(num_elements); error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), - output_data.data(), 0, nullptr, nullptr); + output_data_1.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < num_elements; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern, output_data_1[i], i); + } + + const cl_int new_pattern = 12; + error = clEnqueueFillBuffer(queue, in_mem, &new_pattern, sizeof(cl_int), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(num_elements); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), + output_data_2.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(new_pattern, output_data_2[i], i); } return CL_SUCCESS; diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp index d73fc9ce7e..82ff16f0ec 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp @@ -70,15 +70,42 @@ struct BarrierWithWaitListKHR : public BasicCommandBufferTest 0, nullptr, out_of_order_command_buffer, 0, nullptr, &event); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(num_elements); + std::vector output_data_1(num_elements); error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, - data_size(), output_data.data(), 1, &event, - nullptr); + data_size(), output_data_1.data(), 1, + &event, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < num_elements; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = + clEnqueueFillBuffer(queue, in_mem, &zero_pattern, sizeof(cl_int), 0, + data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = + clEnqueueFillBuffer(queue, out_mem, &zero_pattern, sizeof(cl_int), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 0, nullptr, &event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(num_elements); + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, + data_size(), output_data_2.data(), 1, + &event, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern, output_data_2[i], i); } return CL_SUCCESS; @@ -106,6 +133,7 @@ struct BarrierWithWaitListKHR : public BasicCommandBufferTest } const cl_int pattern = 0x16; + const cl_int zero_pattern = 0x0; clCommandQueueWrapper out_of_order_queue; clCommandBufferWrapper out_of_order_command_buffer; clEventWrapper event; diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp index 102ae761e6..7a1f0e6d54 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp @@ -38,7 +38,7 @@ struct CopyImageKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillImageKHR(command_buffer, nullptr, src_image, - fill_color, origin, region, 0, + fill_color_1, origin, region, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillImageKHR failed"); @@ -56,13 +56,38 @@ struct CopyImageKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); - error = clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0, - 0, output_data.data(), 0, nullptr, nullptr); + std::vector output_data_1(data_size); + error = + clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0, 0, + output_data_1.data(), 0, nullptr, nullptr); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillImage(queue, src_image, fill_color_2, origin, + region, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImageKHR failed"); + + error = clEnqueueFillImage(queue, dst_image, fill_color_2, origin, + region, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImageKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); + error = + clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0, 0, + output_data_2.data(), 0, nullptr, nullptr); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; @@ -97,8 +122,12 @@ struct CopyImageKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_uint pattern = 0x05; - const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern }; + const cl_uint pattern_1 = 0x05; + const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1, + pattern_1 }; + const cl_uint pattern_2 = 0x1; + const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2, + pattern_2 }; const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper src_image; clMemWrapper dst_image; @@ -111,7 +140,7 @@ struct CopyBufferKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0, data_size(), 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -127,20 +156,45 @@ struct CopyBufferKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size()); + std::vector output_data_1(data_size()); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), + output_data_1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = clEnqueueFillBuffer(queue, out_mem, &pattern_2, sizeof(cl_char), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size()); error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), - output_data.data(), 0, nullptr, nullptr); + output_data_2.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size(); i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; } - const cl_char pattern = 0x14; + const cl_char pattern_1 = 0x14; + const cl_char pattern_2 = 0x28; }; struct CopyBufferToImageKHR : public BasicCommandBufferTest @@ -150,7 +204,7 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, buffer, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, buffer, &pattern_1, sizeof(cl_char), 0, data_size, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -168,15 +222,40 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); + std::vector output_data_1(data_size); error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, - output_data.data(), 0, nullptr, nullptr); + output_data_1.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadImage failed"); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillBuffer(queue, buffer, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueFillImage(queue, image, &fill_color_2, origin, region, + 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImage failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); + + error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, + output_data_2.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadImage failed"); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; @@ -211,7 +290,14 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_char pattern = 0x11; + const cl_char pattern_1 = 0x11; + const cl_char pattern_2 = 0x22; + + const cl_uint fill_color_2[4] = { static_cast(pattern_2), + static_cast(pattern_2), + static_cast(pattern_2), + static_cast(pattern_2) }; + const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper buffer; @@ -225,7 +311,7 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = - clCommandFillImageKHR(command_buffer, nullptr, image, fill_color, + clCommandFillImageKHR(command_buffer, nullptr, image, fill_color_1, origin, region, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillImageKHR failed"); @@ -243,16 +329,39 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); + std::vector output_data_1(data_size); error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size, - output_data.data(), 0, nullptr, nullptr); + output_data_1.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(static_cast(pattern), - output_data[i], i); + CHECK_VERIFICATION_ERROR(static_cast(pattern_1), + output_data_1[i], i); + } + + error = clEnqueueFillImage(queue, image, fill_color_2, origin, region, + 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImage failed"); + + error = clEnqueueFillBuffer(queue, buffer, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); + + error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size, + output_data_2.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(static_cast(pattern_1), + output_data_2[i], i); } return CL_SUCCESS; @@ -287,8 +396,12 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_uint pattern = 0x12; - const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern }; + const cl_uint pattern_1 = 0x12; + const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1, + pattern_1 }; + const cl_uint pattern_2 = 0x24; + const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2, + pattern_2 }; const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper image; @@ -302,7 +415,7 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0, data_size, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -319,14 +432,38 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); + std::vector output_data_1(data_size); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size, + output_data_1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueFillBuffer(queue, out_mem, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size, - output_data.data(), 0, nullptr, nullptr); + output_data_2.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; @@ -353,7 +490,8 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_char pattern = 0x13; + const cl_char pattern_1 = 0x13; + const cl_char pattern_2 = 0x26; clMemWrapper in_mem; clMemWrapper out_mem; diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp index 88e97a2715..0ba8055a14 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp @@ -35,7 +35,7 @@ struct FillImageKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = - clCommandFillImageKHR(command_buffer, nullptr, image, fill_color, + clCommandFillImageKHR(command_buffer, nullptr, image, fill_color_1, origin, region, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillImageKHR failed"); @@ -47,14 +47,34 @@ struct FillImageKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); + std::vector output_data_1(data_size); error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, - output_data.data(), 0, nullptr, nullptr); + output_data_1.data(), 0, nullptr, nullptr); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(static_cast(pattern), - output_data[i], i); + CHECK_VERIFICATION_ERROR(static_cast(pattern_1), + output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillImage(queue, image, fill_color_2, origin, region, + 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImage failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); + error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, + output_data_2.data(), 0, nullptr, nullptr); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(static_cast(pattern_1), + output_data_2[i], i); } return CL_SUCCESS; @@ -85,8 +105,12 @@ struct FillImageKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_uint pattern = 0x10; - const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern }; + const cl_uint pattern_1 = 0x10; + const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1, + pattern_1 }; + const cl_uint pattern_2 = 0x20; + const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2, + pattern_2 }; const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper image; @@ -99,7 +123,7 @@ struct FillBufferKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0, data_size(), 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -111,20 +135,40 @@ struct FillBufferKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size()); + std::vector output_data_1(data_size()); + error = clEnqueueReadBuffer(queue, in_mem, CL_TRUE, 0, data_size(), + output_data_1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char), 0, + data_size(), 0, nullptr, nullptr); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size()); error = clEnqueueReadBuffer(queue, in_mem, CL_TRUE, 0, data_size(), - output_data.data(), 0, nullptr, nullptr); + output_data_2.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size(); i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; } - const char pattern = 0x15; + const char pattern_1 = 0x15; + const char pattern_2 = 0x30; }; }; From 56974a58585b8c66d9beddccd984990e45ca0ad7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Jastrz=C4=99bski?= Date: Tue, 27 Jun 2023 17:54:14 +0200 Subject: [PATCH 15/20] Add global offset tests for cl_khr_command_buffer_mutable_dispatch. (#1743) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add global offset tests for cl_khr_command_buffer_mutable_dispatch. Signed-off-by: Paweł Jastrzębski * Add kernel with observable output. We should check that there's some observable output from the kernel as a result of the change to global work offset, not just that clGetMutableCommandInfoKHR has been updated. E.g we could call get_global_offset() inside of the kernel, write something to a buffer based on that, and read the buffer after the command-buffer enqueue has finished. Signed-off-by: Paweł Jastrzębski * Fix review comments. Applied review comments for mutable dispatch global offset test: - clFinish to ensure command-buffer has finished executing for calling clUpdateMutableCommandsKHR - Change variable and constant names for global offset - Remove redundant return CL_SUCCESS Signed-off-by: Paweł Jastrzębski * Fix review comments. Changes made: - Fix skip conditions - Remove obsolete variable - Replace a variable with a constant Signed-off-by: Paweł Jastrzębski * Fix review comments. Changes made: - Remove explicit base class call - Fix constant magic number Signed-off-by: Paweł Jastrzębski * Fix constant magic number. Signed-off-by: Paweł Jastrzębski * Fix clang-format. Signed-off-by: Paweł Jastrzębski * Fix condition for result check. Signed-off-by: Paweł Jastrzębski --------- Signed-off-by: Paweł Jastrzębski --- .../CMakeLists.txt | 1 + .../main.cpp | 1 + .../mutable_command_basic.h | 63 ++++-- .../mutable_command_global_offset.cpp | 179 ++++++++++++++++++ .../mutable_command_info.cpp | 60 +++--- .../procs.h | 6 +- 6 files changed, 267 insertions(+), 43 deletions(-) create mode 100644 test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index e06258335a..8021460947 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -3,6 +3,7 @@ set(MODULE_NAME CL_KHR_MUTABLE_DISPATCH) set(${MODULE_NAME}_SOURCES main.cpp mutable_command_info.cpp + mutable_command_global_offset.cpp ../basic_command_buffer.cpp ) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp index 97075792bc..b53914dc56 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp @@ -26,6 +26,7 @@ test_definition test_list[] = { ADD_TEST(mutable_command_info_global_work_offset), ADD_TEST(mutable_command_info_local_work_size), ADD_TEST(mutable_command_info_global_work_size), + ADD_TEST(mutable_dispatch_global_offset), }; int main(int argc, const char *argv[]) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h index 966695834b..c88c14d1c7 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h @@ -19,6 +19,17 @@ #include "../basic_command_buffer.h" #include "../command_buffer_test_base.h" +// If it is supported get the addresses of all the APIs here. +#define GET_EXTENSION_ADDRESS(FUNC) \ + FUNC = reinterpret_cast( \ + clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \ + if (FUNC == nullptr) \ + { \ + log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \ + " with " #FUNC "\n"); \ + return TEST_FAIL; \ + } + struct BasicMutableCommandBufferTest : BasicCommandBufferTest { BasicMutableCommandBufferTest(cl_device_id device, cl_context context, @@ -84,24 +95,52 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest &platform, nullptr); test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); - // If it is supported get the addresses of all the APIs here. -#define GET_EXTENSION_ADDRESS(FUNC) \ - FUNC = reinterpret_cast( \ - clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \ - if (FUNC == nullptr) \ - { \ - log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \ - " with " #FUNC "\n"); \ - return TEST_FAIL; \ + GET_EXTENSION_ADDRESS(clUpdateMutableCommandsKHR); + + return CL_SUCCESS; } + + clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; + + const char* kernelString = "__kernel void empty() {}"; + const size_t global_work_size = 4 * 16; +}; + +struct InfoMutableCommandBufferTest : BasicMutableCommandBufferTest +{ + InfoMutableCommandBufferTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + cl_int error = init_extension_functions(); + test_error(error, "Unable to initialise extension functions"); + + return CL_SUCCESS; + } + + cl_int init_extension_functions() + { + BasicCommandBufferTest::init_extension_functions(); + + cl_platform_id platform; + cl_int error = + clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), + &platform, nullptr); + test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); + GET_EXTENSION_ADDRESS(clGetMutableCommandInfoKHR); return CL_SUCCESS; } clGetMutableCommandInfoKHR_fn clGetMutableCommandInfoKHR = nullptr; - const char* kernelString = "__kernel void empty() {}"; - const size_t global_work_size = 4 * sizeof(cl_int); }; -#endif // CL_KHR_MUTABLE_COMMAND_BASIC_H +#undef GET_EXTENSION_ADDRESS + +#endif //_CL_KHR_MUTABLE_COMMAND_BASIC_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp new file mode 100644 index 0000000000..70e1d9b163 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp @@ -0,0 +1,179 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include "typeWrappers.h" +#include "procs.h" +#include "testHarness.h" +#include "imageHelpers.h" +#include +#include +#include +#include +#include +#include +#include "mutable_command_basic.h" + +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases: +// +// CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR + +struct MutableDispatchGlobalOffset : InfoMutableCommandBufferTest +{ + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; + + MutableDispatchGlobalOffset(cl_device_id device, cl_context context, + cl_command_queue queue) + : InfoMutableCommandBufferTest(device, context, queue) + {} + + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR; + + return !mutable_support || InfoMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + const char *global_offset_kernel = + R"( + __kernel void sample_test(__global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = get_global_offset(0); + })"; + + cl_int error = + create_single_kernel_helper(context, &program, &kernel, 1, + &global_offset_kernel, "sample_test"); + test_error(error, "Creating kernel failed"); + + clMemWrapper stream; + stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, + nullptr, &error); + test_error(error, "Creating test array failed"); + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &global_work_size, nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 0 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + nullptr /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + &update_global_offset /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clGetMutableCommandInfoKHR( + command, CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR, + sizeof(info_global_offset), &info_global_offset, nullptr); + test_error(error, "clGetMutableCommandInfoKHR failed"); + + if (info_global_offset != update_global_offset) + { + log_error("ERROR: Wrong size returned from " + "clGetMutableCommandInfoKHR."); + return TEST_FAIL; + } + + std::vector resultData; + resultData.resize(num_elements); + + error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + if (i < update_global_offset && 0 != resultData[i]) + { + log_error("Data failed to verify: update_global_offset != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + else if (i >= update_global_offset + && update_global_offset != resultData[i]) + { + log_error("Data failed to verify: update_global_offset != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + return CL_SUCCESS; + } + + size_t info_global_offset = 0; + const size_t update_global_offset = 3; + const size_t sizeToAllocate = + (global_work_size + update_global_offset) * sizeof(cl_int); + const size_t num_elements = sizeToAllocate / sizeof(cl_int); + cl_mutable_command_khr command = nullptr; +}; + +int test_mutable_dispatch_global_offset(cl_device_id device, cl_context context, + cl_command_queue queue, + int num_elements) +{ + + return MakeAndRunTest(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp index cc425a4d68..a8ed325ac7 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp @@ -42,13 +42,13 @@ // CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR // CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR -struct InfoDeviceQuery : public BasicMutableCommandBufferTest +struct InfoDeviceQuery : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoDeviceQuery(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -71,12 +71,12 @@ struct InfoDeviceQuery : public BasicMutableCommandBufferTest } }; -struct InfoBuffer : public BasicMutableCommandBufferTest +struct InfoBuffer : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoBuffer(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -108,13 +108,13 @@ struct InfoBuffer : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct PropertiesArray : public BasicMutableCommandBufferTest +struct PropertiesArray : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; PropertiesArray(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -154,12 +154,12 @@ struct PropertiesArray : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct Kernel : public BasicMutableCommandBufferTest +struct Kernel : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; Kernel(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -195,12 +195,12 @@ struct Kernel : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct Dimensions : public BasicMutableCommandBufferTest +struct Dimensions : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; Dimensions(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -234,12 +234,12 @@ struct Dimensions : public BasicMutableCommandBufferTest const size_t dimensions = 3; }; -struct InfoType : public BasicMutableCommandBufferTest +struct InfoType : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoType(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -271,12 +271,12 @@ struct InfoType : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct InfoQueue : public BasicMutableCommandBufferTest +struct InfoQueue : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoQueue(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -308,13 +308,13 @@ struct InfoQueue : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct InfoGlobalWorkOffset : public BasicMutableCommandBufferTest +struct InfoGlobalWorkOffset : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoGlobalWorkOffset(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -346,13 +346,13 @@ struct InfoGlobalWorkOffset : public BasicMutableCommandBufferTest size_t test_global_work_offset = 0; }; -struct InfoGlobalWorkSize : public BasicMutableCommandBufferTest +struct InfoGlobalWorkSize : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoGlobalWorkSize(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -383,13 +383,13 @@ struct InfoGlobalWorkSize : public BasicMutableCommandBufferTest size_t test_global_work_size = 0; }; -struct InfoLocalWorkSize : public BasicMutableCommandBufferTest +struct InfoLocalWorkSize : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoLocalWorkSize(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h index 4b6dacb699..588bdc817e 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h @@ -59,4 +59,8 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); -#endif // CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H +extern int test_mutable_dispatch_global_offset(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +#endif /*_CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H*/ From 75aca34e600a9ac0fbee524404a2ac7cf4d37801 Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Wed, 28 Jun 2023 08:13:15 +0100 Subject: [PATCH 16/20] Test CL_COMMAND_BUFFER_CONTEXT_KHR (#1697) Test coverage for spec PR https://github.com/KhronosGroup/OpenCL-Docs/pull/899 which introduces a new cl_khr_command_buffer query for the cl_context --- ...command_buffer_get_command_buffer_info.cpp | 54 +++++++++++++++++++ .../extensions/cl_khr_command_buffer/main.cpp | 1 + .../extensions/cl_khr_command_buffer/procs.h | 2 + 3 files changed, 57 insertions(+) diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp index d46b288877..1ada904d6b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp @@ -26,6 +26,7 @@ enum class CombufInfoTestMode CITM_REF_COUNT, CITM_STATE, CITM_PROP_ARRAY, + CITM_CONTEXT, }; namespace { @@ -38,6 +39,7 @@ namespace { // -test case for CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR query // -test case for CL_COMMAND_BUFFER_STATE_KHR query // -test case for CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR query +// -test case for CL_COMMAND_BUFFER_CONTEXT_KHR query template struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest @@ -70,6 +72,10 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest error = RunPropArrayInfoTest(); test_error(error, "RunPropArrayInfoTest failed"); break; + case CombufInfoTestMode::CITM_CONTEXT: + error = RunContextInfoTest(); + test_error(error, "RunContextInfoTest failed"); + break; } return CL_SUCCESS; @@ -323,6 +329,46 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest return TEST_FAIL; } + cl_int RunContextInfoTest() + { + cl_int error = TEST_PASS; + + // record command buffers + error = RecordCommandBuffer(); + test_error(error, "RecordCommandBuffer failed"); + + size_t ret_value_size = 0; + error = clGetCommandBufferInfoKHR(command_buffer, + CL_COMMAND_BUFFER_CONTEXT_KHR, 0, + nullptr, &ret_value_size); + test_error(error, "clGetCommandBufferInfoKHR failed"); + + test_assert_error( + ret_value_size == sizeof(cl_context), + "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!"); + + cl_context ret_context = nullptr; + error = clGetCommandBufferInfoKHR( + command_buffer, CL_COMMAND_BUFFER_CONTEXT_KHR, sizeof(cl_context), + &ret_context, nullptr); + test_error(error, "clGetCommandBufferInfoKHR failed"); + test_assert_error( + ret_context != nullptr, + "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!"); + + cl_context expected_context = nullptr; + error = + clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), + &expected_context, nullptr); + test_error(error, "clGetCommandQueueInfo failed"); + + test_assert_error( + ret_context == expected_context, + "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!"); + + return TEST_PASS; + } + const cl_int pattern = 0xE; }; @@ -360,3 +406,11 @@ int test_info_prop_array(cl_device_id device, cl_context context, CommandBufferGetCommandBufferInfo>( device, context, queue, num_elements); } + +int test_info_context(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest< + CommandBufferGetCommandBufferInfo>( + device, context, queue, num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp index 4eefc8ab1f..3562282746 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp @@ -26,6 +26,7 @@ test_definition test_list[] = { ADD_TEST(info_ref_count), ADD_TEST(info_state), ADD_TEST(info_prop_array), + ADD_TEST(info_context), ADD_TEST(basic_profiling), ADD_TEST(simultaneous_profiling), ADD_TEST(regular_wait_for_command_buffer), diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h index 53a7d93490..5c4e67fe35 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h @@ -41,6 +41,8 @@ extern int test_info_state(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_info_prop_array(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_info_context(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); extern int test_basic_set_kernel_arg(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_pending_set_kernel_arg(cl_device_id device, cl_context context, From 729cd8b7a94de09589d7703e59d266ab3eed8cdd Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Wed, 28 Jun 2023 09:34:07 +0100 Subject: [PATCH 17/20] [NFC] device_execution: use raw string literals for block kernels (#1767) Modernize by using raw string literals, which makes the kernel sources easier to read/extract. Signed-off-by: Sven van Haastregt --- .../device_execution/enqueue_block.cpp | 1061 ++++++++--------- 1 file changed, 519 insertions(+), 542 deletions(-) diff --git a/test_conformance/device_execution/enqueue_block.cpp b/test_conformance/device_execution/enqueue_block.cpp index 29a6cec15b..4ddd1db7f8 100644 --- a/test_conformance/device_execution/enqueue_block.cpp +++ b/test_conformance/device_execution/enqueue_block.cpp @@ -27,561 +27,538 @@ #ifdef CL_VERSION_2_0 extern int gWimpyMode; -static const char* enqueue_simple_block[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " res[tid] = mul * 7 - 21;" - NL, "}" - NL, "" - NL, "kernel void enqueue_simple_block(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; -static const char* enqueue_block_with_local_arg1[] = -{ - NL, "#define LOCAL_MEM_SIZE 10" - NL, "" - NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp)" - NL, "{" - NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)" - NL, " {" - NL, " tmp[i] = mul * 7 - 21;" - NL, " res[tid] += tmp[i];" - NL, " }" - NL, " res[tid] += 2;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_local_arg1(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); };" - NL, "" - NL, " res[tid] = -2;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)));" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; +// clang-format off +static const char* enqueue_simple_block[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + res[tid] = mul * 7 - 21; + } -static const char* enqueue_block_with_local_arg2[] = -{ - NL, "#define LOCAL_MEM_SIZE 10" - NL, "" - NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2)" - NL, "{" - NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)" - NL, " {" - NL, " tmp1[i] = mul * 7 - 21;" - NL, " tmp2[i].x = (float)(mul * 7 - 21);" - NL, " tmp2[i].y = (float)(mul * 7 - 21);" - NL, " tmp2[i].z = (float)(mul * 7 - 21);" - NL, " tmp2[i].w = (float)(mul * 7 - 21);" - NL, "" - NL, " res[tid] += tmp1[i];" - NL, " res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w);" - NL, " }" - NL, " res[tid] += 2;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_local_arg2(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2)" - NL, " { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); };" - NL, "" - NL, " res[tid] = -2;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4)));" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; + kernel void enqueue_simple_block(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); -static const char* enqueue_block_with_wait_list[] = -{ - NL, "#define BLOCK_SUBMITTED 1" - NL, "#define BLOCK_COMPLETED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_block_with_wait_list(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt;" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt," - NL, " ^{" - NL, " res[tid] = BLOCK_COMPLETED;" - NL, " });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " retain_event(block_evt);" - NL, " release_event(block_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(user_evt);" - NL, " release_event(block_evt);" - NL, "}" - NL -}; + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; -static const char* enqueue_block_with_wait_list_and_local_arg[] = -{ - NL, "#define LOCAL_MEM_SIZE 10" - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define BLOCK_STARTED 3" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp)" - NL, "{" - NL, " res[tid] = BLOCK_STARTED;" - NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)" - NL, " {" - NL, " tmp[i] = mul * 7 - 21;" - NL, " res[tid] += tmp[i];" - NL, " }" - NL, " if(res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt;" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, " - NL, " ^(__local void* buf) {" - NL, " block_fn_local_arg(tid, multiplier, res, (__local int*)buf);" - NL, " }, LOCAL_MEM_SIZE*sizeof(int));" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " retain_event(block_evt);" - NL, " release_event(block_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(user_evt);" - NL, " release_event(block_evt);" - NL, "}" - NL -}; + res[tid] = -1; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; -static const char* enqueue_block_get_kernel_work_group_size[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " res[tid] = mul * 7 - 21;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_get_kernel_work_group_size(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " size_t local_work_size = get_kernel_work_group_size(kernelBlock);" - NL, " if (local_work_size <= 0){ res[tid] = -1; return; }" - NL, " size_t global_work_size = local_work_size * 4;" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t q1 = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);" - NL, "" - NL, " int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" -}; +static const char* enqueue_block_with_local_arg1[] = { R"( + #define LOCAL_MEM_SIZE 10 -static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " res[tid] = mul * 7 - 21;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock);" - NL, " if (local_work_size <= 0){ res[tid] = -1; return; }" - NL, " size_t global_work_size = local_work_size * 4;" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t q1 = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);" - NL, "" - NL, " int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" -}; + void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp) + { + for (int i = 0; i < LOCAL_MEM_SIZE; i++) + { + tmp[i] = mul * 7 - 21; + res[tid] += tmp[i]; + } + res[tid] += 2; + } -static const char* enqueue_block_capture_event_profiling_info_after_execution[] = -{ - NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) - NL, "" - NL, "__global ulong value[MAX_GWS*2] = {0};" - NL, "" - NL, "void block_fn(size_t tid, __global int* res)" - NL, "{" - NL, " res[tid] = -2;" - NL, "}" - NL, "" - NL, "void check_res(size_t tid, const clk_event_t evt, __global int* res)" - NL, "{" - NL, " capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);" - NL, "" - NL, " if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;" - NL, " else res[tid] = -4;" - NL, " release_event(evt);" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt1;" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn (tid, res); };" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " void (^checkBlock) (void) = ^{ check_res(tid, block_evt1, res); };" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "}" - NL -}; + kernel void enqueue_block_with_local_arg1(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); -static const char* enqueue_block_capture_event_profiling_info_before_execution[] = -{ - NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) - NL, "" - NL, "__global ulong value[MAX_GWS*2] = {0};" - NL, "" - NL, "void block_fn(size_t tid, __global int* res)" - NL, "{" - NL, " res[tid] = -2;" - NL, "}" - NL, "" - NL, "void check_res(size_t tid, const ulong *value, __global int* res)" - NL, "{" - NL, " if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;" - NL, " else res[tid] = -4;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt1;" - NL, " clk_event_t block_evt2;" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn (tid, res); };" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " void (^checkBlock) (void) = ^{ check_res(tid, &value, res); };" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " release_event(user_evt);" - NL, " release_event(block_evt1);" - NL, " release_event(block_evt2);" - NL, "}" - NL -}; + void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); }; -static const char* enqueue_block_with_barrier[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " if(mul > 0) barrier(CLK_GLOBAL_MEM_FENCE);" - NL, " res[tid] = mul * 7 -21;" - NL, "}" - NL, "" - NL, "void loop_fn(size_t tid, int n, __global int* res)" - NL, "{" - NL, " while(n > 0)" - NL, " {" - NL, " barrier(CLK_GLOBAL_MEM_FENCE);" - NL, " res[tid] = 0;" - NL, " --n;" - NL, " }" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_barrier(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, " queue_t def_q = get_default_queue();" - NL, " res[tid] = -1;" - NL, " size_t n = 256;" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " ndrange_t ndrange = ndrange_1D(n);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); };" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; + res[tid] = -2; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int))); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; -static const char* enqueue_marker_with_block_event[] = -{ - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_marker_with_block_event(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, "" - NL, " clk_event_t block_evt1;" - NL, " clk_event_t marker_evt;" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1," - NL, " ^{" - NL, " res[tid] = BLOCK_COMPLETED;" - NL, " });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }" - NL, "" - NL, " enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " retain_event(marker_evt);" - NL, " release_event(marker_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(block_evt1);" - NL, " release_event(marker_evt);" - NL, " release_event(user_evt);" - NL, "}" - NL -}; +static const char* enqueue_block_with_local_arg2[] = { R"( + #define LOCAL_MEM_SIZE 10 -static const char* enqueue_marker_with_user_event[] = -{ - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_marker_with_user_event(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, " uint multiplier = 7;" - NL, "" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, "" - NL, " clk_event_t marker_evt;" - NL, " clk_event_t block_evt;" - NL, "" - NL, " int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " retain_event(marker_evt);" - NL, " release_event(marker_evt);" - NL, "" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt, " - NL, " ^{" - NL, " if(res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] != BLOCK_SUBMITTED) { res[tid] = -2; return; }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(block_evt);" - NL, " release_event(marker_evt);" - NL, " release_event(user_evt);" - NL, "}" - NL -}; + void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2) + { + for (int i = 0; i < LOCAL_MEM_SIZE; i++) + { + tmp1[i] = mul * 7 - 21; + tmp2[i].x = (float)(mul * 7 - 21); + tmp2[i].y = (float)(mul * 7 - 21); + tmp2[i].z = (float)(mul * 7 - 21); + tmp2[i].w = (float)(mul * 7 - 21); + + res[tid] += tmp1[i]; + res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w); + } + res[tid] += 2; + } -static const char* enqueue_marker_with_mixed_events[] = -{ - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_marker_with_mixed_events(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " clk_event_t mix_ev[2];" - NL, " mix_ev[0] = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1]," - NL, " ^{" - NL, " res[tid] = BLOCK_COMPLETED;" - NL, " });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }" - NL, "" - NL, " clk_event_t marker_evt;" - NL, "" - NL, " enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " retain_event(marker_evt);" - NL, " release_event(marker_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(mix_ev[0], CL_COMPLETE);" - NL, "" - NL, " release_event(mix_ev[1]);" - NL, " release_event(marker_evt);" - NL, " release_event(mix_ev[0]);" - NL, "}" - NL -}; + kernel void enqueue_block_with_local_arg2(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); -static const char* enqueue_block_with_mixed_events[] = -{ - NL, "kernel void enqueue_block_with_mixed_events(__global int* res)" - NL, "{" - NL, " int enq_res;" - NL, " size_t tid = get_global_id(0);" - NL, " clk_event_t mix_ev[3];" - NL, " mix_ev[0] = create_user_event();" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " res[tid] = -2;" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -4; return; }" - NL, "" - NL, " set_user_event_status(mix_ev[0], CL_COMPLETE);" - NL, "" - NL, " release_event(mix_ev[0]);" - NL, " release_event(mix_ev[1]);" - NL, " release_event(mix_ev[2]);" - NL, "}" - NL -}; + void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2) + { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); }; + + res[tid] = -2; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4))); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_block_with_wait_list[] = { R"( + #define BLOCK_SUBMITTED 1 + #define BLOCK_COMPLETED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_block_with_wait_list(__global int* res) + { + size_t tid = get_global_id(0); + + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt; + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, + ^{ + res[tid] = BLOCK_COMPLETED; + }); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + retain_event(block_evt); + release_event(block_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(user_evt); + release_event(block_evt); + } +)" }; + +static const char* enqueue_block_with_wait_list_and_local_arg[] = { R"( + #define LOCAL_MEM_SIZE 10 + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define BLOCK_STARTED 3 + #define CHECK_SUCCESS 0 + + void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp) + { + res[tid] = BLOCK_STARTED; + for (int i = 0; i < LOCAL_MEM_SIZE; i++) + { + tmp[i] = mul * 7 - 21; + res[tid] += tmp[i]; + } + if (res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED; + } + + kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt; + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, + ^(__local void* buf) { + block_fn_local_arg(tid, multiplier, res, (__local int*)buf); + }, LOCAL_MEM_SIZE*sizeof(int)); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + retain_event(block_evt); + release_event(block_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(user_evt); + release_event(block_evt); + } +)" }; + +static const char* enqueue_block_get_kernel_work_group_size[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + res[tid] = mul * 7 - 21; + } + + kernel void enqueue_block_get_kernel_work_group_size(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; + + size_t local_work_size = get_kernel_work_group_size(kernelBlock); + if (local_work_size <= 0){ res[tid] = -1; return; } + size_t global_work_size = local_work_size * 4; + + res[tid] = -1; + queue_t q1 = get_default_queue(); + ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size); + + int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + res[tid] = mul * 7 - 21; + } + + kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; + + size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock); + if (local_work_size <= 0){ res[tid] = -1; return; } + size_t global_work_size = local_work_size * 4; + + res[tid] = -1; + queue_t q1 = get_default_queue(); + ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size); + + int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_block_capture_event_profiling_info_after_execution[] = { + "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) "\n" + , R"( + __global ulong value[MAX_GWS*2] = {0}; + + void block_fn(size_t tid, __global int* res) + { + res[tid] = -2; + } + + void check_res(size_t tid, const clk_event_t evt, __global int* res) + { + capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]); + + if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0; + else res[tid] = -4; + release_event(evt); + } + + kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res) + { + size_t tid = get_global_id(0); + + res[tid] = -1; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt1; + + void (^kernelBlock)(void) = ^{ block_fn (tid, res); }; + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + void (^checkBlock) (void) = ^{ check_res(tid, block_evt1, res); }; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + } +)" }; + +static const char* enqueue_block_capture_event_profiling_info_before_execution[] = { + "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) "\n" + , R"( + __global ulong value[MAX_GWS*2] = {0}; + + void block_fn(size_t tid, __global int* res) + { + res[tid] = -2; + } + + void check_res(size_t tid, const ulong *value, __global int* res) + { + if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0; + else res[tid] = -4; + } + + kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + clk_event_t user_evt = create_user_event(); + + res[tid] = -1; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt1; + clk_event_t block_evt2; + + void (^kernelBlock)(void) = ^{ block_fn (tid, res); }; + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]); + + set_user_event_status(user_evt, CL_COMPLETE); + + void (^checkBlock) (void) = ^{ check_res(tid, &value, res); }; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + release_event(user_evt); + release_event(block_evt1); + release_event(block_evt2); + } +)" }; + +static const char* enqueue_block_with_barrier[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + if (mul > 0) barrier(CLK_GLOBAL_MEM_FENCE); + res[tid] = mul * 7 -21; + } + + void loop_fn(size_t tid, int n, __global int* res) + { + while (n > 0) + { + barrier(CLK_GLOBAL_MEM_FENCE); + res[tid] = 0; + --n; + } + } + + kernel void enqueue_block_with_barrier(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + queue_t def_q = get_default_queue(); + res[tid] = -1; + size_t n = 256; + + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; + + ndrange_t ndrange = ndrange_1D(n); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); }; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_marker_with_block_event[] = { R"( + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_marker_with_block_event(__global int* res) + { + size_t tid = get_global_id(0); + + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + + clk_event_t block_evt1; + clk_event_t marker_evt; + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, + ^{ + res[tid] = BLOCK_COMPLETED; + }); + if (enq_res != CLK_SUCCESS) { res[tid] = -2; return; } + + enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + retain_event(marker_evt); + release_event(marker_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(block_evt1); + release_event(marker_evt); + release_event(user_evt); + } +)" }; + +static const char* enqueue_marker_with_user_event[] = { R"( + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_marker_with_user_event(__global int* res) + { + size_t tid = get_global_id(0); + uint multiplier = 7; + + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + + clk_event_t marker_evt; + clk_event_t block_evt; + + int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + retain_event(marker_evt); + release_event(marker_evt); + + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt, + ^{ + if (res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS; + }); + + //check block is not started + if (res[tid] != BLOCK_SUBMITTED) { res[tid] = -2; return; } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(block_evt); + release_event(marker_evt); + release_event(user_evt); + } +)" }; + +static const char* enqueue_marker_with_mixed_events[] = { R"( + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_marker_with_mixed_events(__global int* res) + { + size_t tid = get_global_id(0); + + clk_event_t mix_ev[2]; + mix_ev[0] = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], + ^{ + res[tid] = BLOCK_COMPLETED; + }); + if (enq_res != CLK_SUCCESS) { res[tid] = -2; return; } + + clk_event_t marker_evt; + + enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + retain_event(marker_evt); + release_event(marker_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(mix_ev[0], CL_COMPLETE); + + release_event(mix_ev[1]); + release_event(marker_evt); + release_event(mix_ev[0]); + } +)" }; + +static const char* enqueue_block_with_mixed_events[] = { R"( + kernel void enqueue_block_with_mixed_events(__global int* res) + { + int enq_res; + size_t tid = get_global_id(0); + clk_event_t mix_ev[3]; + mix_ev[0] = create_user_event(); + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + res[tid] = -2; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; }); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; }); + if (enq_res != CLK_SUCCESS) { res[tid] = -4; return; } + + set_user_event_status(mix_ev[0], CL_COMPLETE); + + release_event(mix_ev[0]); + release_event(mix_ev[1]); + release_event(mix_ev[2]); + } +)" }; +// clang-format on static const kernel_src sources_enqueue_block[] = { From 845ec694bbc333a563de33e5cce8e541a7b8b910 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Thu, 29 Jun 2023 12:20:15 +0100 Subject: [PATCH 18/20] workgroups: fix -Wsign-compare warnings (#1774) In preparation of re-enabling -Wsign-compare globally, fix some instances of this warning. Signed-off-by: Sven van Haastregt --- test_conformance/workgroups/test_wg_all.cpp | 3 +-- test_conformance/workgroups/test_wg_any.cpp | 3 +-- test_conformance/workgroups/test_wg_broadcast.cpp | 11 ++++------- .../workgroups/test_wg_suggested_local_work_size.cpp | 4 ++-- 4 files changed, 8 insertions(+), 13 deletions(-) diff --git a/test_conformance/workgroups/test_wg_all.cpp b/test_conformance/workgroups/test_wg_all.cpp index 41abd1249f..f9b574e454 100644 --- a/test_conformance/workgroups/test_wg_all.cpp +++ b/test_conformance/workgroups/test_wg_all.cpp @@ -75,7 +75,6 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu size_t wg_size[1]; size_t num_elements; int err; - int i; MTdata d; err = create_single_kernel_helper(context, &program, &kernel, 1, @@ -110,7 +109,7 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu p = input_ptr[0]; d = init_genrand( gRandomSeed ); - for (i=0; i<(num_elements+1); i++) + for (size_t i = 0; i < (num_elements + 1); i++) { p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d); } diff --git a/test_conformance/workgroups/test_wg_any.cpp b/test_conformance/workgroups/test_wg_any.cpp index e0242cfb44..f7ff899a33 100644 --- a/test_conformance/workgroups/test_wg_any.cpp +++ b/test_conformance/workgroups/test_wg_any.cpp @@ -75,7 +75,6 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu size_t wg_size[1]; size_t num_elements; int err; - int i; MTdata d; err = create_single_kernel_helper(context, &program, &kernel, 1, @@ -110,7 +109,7 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu p = input_ptr[0]; d = init_genrand( gRandomSeed ); - for (i=0; i<(num_elements+1); i++) + for (size_t i = 0; i < (num_elements + 1); i++) { p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d); } diff --git a/test_conformance/workgroups/test_wg_broadcast.cpp b/test_conformance/workgroups/test_wg_broadcast.cpp index e24ac7b986..a4cb0c6fe2 100644 --- a/test_conformance/workgroups/test_wg_broadcast.cpp +++ b/test_conformance/workgroups/test_wg_broadcast.cpp @@ -70,7 +70,7 @@ verify_wg_broadcast_1D(float *inptr, float *outptr, size_t n, size_t wg_size) for (i=0,group_id=0; i wg_size ? wg_size : (n-i); + size_t local_size = (n - i) > wg_size ? wg_size : (n - i); float broadcast_result = inptr[i + (group_id % local_size)]; for (j=0; j Date: Fri, 30 Jun 2023 11:22:43 +0100 Subject: [PATCH 19/20] workgroups: fix program/kernel object leak (#1775) `create_single_kernel_helper` is called in a loop, overwriting the objects contained in the wrapper classes. The wrapper class is not aware of this, as the overwriting happens through its `operator&`. Move the wrapper objects into the loop, so that the contained objects get released as soon as the program and kernel objects are no longer needed. Signed-off-by: Sven van Haastregt --- .../workgroups/test_wg_suggested_local_work_size.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp index 989f1dfd9f..a31fca63f8 100644 --- a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp +++ b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp @@ -221,8 +221,6 @@ int do_test_work_group_suggested_local_size( bool (*skip_cond)(size_t), size_t start, size_t end, size_t incr, cl_ulong max_local_mem_size, size_t global_work_offset[], num_dims dim) { - clProgramWrapper scan_program; - clKernelWrapper scan_kernel; int err; size_t test_values[] = { 1, 1, 1 }; std::string kernel_names[6] = { @@ -244,6 +242,8 @@ int do_test_work_group_suggested_local_size( for (int kernel_num = 0; kernel_num < 6; kernel_num++) { if (max_local_mem_size < local_mem_size[kernel_num]) continue; + clProgramWrapper scan_program; + clKernelWrapper scan_kernel; // Create the kernel err = create_single_kernel_helper( context, &scan_program, &scan_kernel, 1, From 9e8430a6a69b4c2f2c714137a68e460ae8f14515 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Mon, 3 Jul 2023 10:07:32 +0100 Subject: [PATCH 20/20] [NFC] clang-format basic/test_enqueue_map.cpp (#1777) Signed-off-by: Sven van Haastregt --- test_conformance/basic/test_enqueue_map.cpp | 308 +++++++++++--------- 1 file changed, 168 insertions(+), 140 deletions(-) diff --git a/test_conformance/basic/test_enqueue_map.cpp b/test_conformance/basic/test_enqueue_map.cpp index d28f7e41a3..6b650c0d82 100644 --- a/test_conformance/basic/test_enqueue_map.cpp +++ b/test_conformance/basic/test_enqueue_map.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -26,6 +26,7 @@ #include "harness/conversions.h" #include "harness/typeWrappers.h" +// clang-format off const cl_mem_flags flag_set[] = { CL_MEM_ALLOC_HOST_PTR, CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, @@ -33,93 +34,104 @@ const cl_mem_flags flag_set[] = { CL_MEM_COPY_HOST_PTR, 0 }; -const char* flag_set_names[] = { + +const char *flag_set_names[] = { "CL_MEM_ALLOC_HOST_PTR", "CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR", "CL_MEM_USE_HOST_PTR", "CL_MEM_COPY_HOST_PTR", "0" }; +// clang-format on -int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; - const size_t bufferSize = 256*256; - MTdataHolder d{gRandomSeed}; + const size_t bufferSize = 256 * 256; + MTdataHolder d{ gRandomSeed }; BufferOwningPtr hostPtrData{ malloc(bufferSize) }; BufferOwningPtr referenceData{ malloc(bufferSize) }; - BufferOwningPtr finalData{malloc(bufferSize)}; + BufferOwningPtr finalData{ malloc(bufferSize) }; - for (int src_flag_id=0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) + for (int src_flag_id = 0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) { clMemWrapper memObject; - log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]); + log_info("Testing with cl_mem_flags src: %s\n", + flag_set_names[src_flag_id]); generate_random_data(kChar, (unsigned int)bufferSize, d, hostPtrData); memcpy(referenceData, hostPtrData, bufferSize); void *hostPtr = nullptr; cl_mem_flags flags = flag_set[src_flag_id]; - bool hasHostPtr = (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); + bool hasHostPtr = + (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); if (hasHostPtr) hostPtr = hostPtrData; - memObject = clCreateBuffer(context, flags, bufferSize, hostPtr, &error); - test_error( error, "Unable to create testing buffer" ); + memObject = clCreateBuffer(context, flags, bufferSize, hostPtr, &error); + test_error(error, "Unable to create testing buffer"); if (!hasHostPtr) { error = - clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize, - hostPtrData, 0, NULL, NULL); - test_error( error, "clEnqueueWriteBuffer failed"); + clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize, + hostPtrData, 0, NULL, NULL); + test_error(error, "clEnqueueWriteBuffer failed"); } - for( int i = 0; i < 128; i++ ) + for (int i = 0; i < 128; i++) { - size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d ); - size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d ); - - cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, - offset, length, 0, NULL, NULL, &error ); - if( error != CL_SUCCESS ) - { - print_error( error, "clEnqueueMapBuffer call failed" ); - log_error( "\tOffset: %d Length: %d\n", (int)offset, (int)length ); - return -1; - } - - // Write into the region - for( size_t j = 0; j < length; j++ ) - { - cl_char spin = (cl_char)genrand_int32( d ); - - // Test read AND write in one swipe - cl_char value = mappedRegion[ j ]; - value = spin - value; - mappedRegion[ j ] = value; - - // Also update the initial data array - value = referenceData[offset + j]; - value = spin - value; - referenceData[offset + j] = value; - } - - // Unmap - error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL ); - test_error( error, "Unable to unmap buffer" ); + size_t offset = (size_t)random_in_range(0, (int)bufferSize - 1, d); + size_t length = + (size_t)random_in_range(1, (int)(bufferSize - offset), d); + + cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( + queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, offset, + length, 0, NULL, NULL, &error); + if (error != CL_SUCCESS) + { + print_error(error, "clEnqueueMapBuffer call failed"); + log_error("\tOffset: %d Length: %d\n", (int)offset, + (int)length); + return -1; + } + + // Write into the region + for (size_t j = 0; j < length; j++) + { + cl_char spin = (cl_char)genrand_int32(d); + + // Test read AND write in one swipe + cl_char value = mappedRegion[j]; + value = spin - value; + mappedRegion[j] = value; + + // Also update the initial data array + value = referenceData[offset + j]; + value = spin - value; + referenceData[offset + j] = value; + } + + // Unmap + error = clEnqueueUnmapMemObject(queue, memObject, mappedRegion, 0, + NULL, NULL); + test_error(error, "Unable to unmap buffer"); } - // Final validation: read actual values of buffer and compare against our reference - error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, bufferSize, finalData, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); + // Final validation: read actual values of buffer and compare against + // our reference + error = clEnqueueReadBuffer(queue, memObject, CL_TRUE, 0, bufferSize, + finalData, 0, NULL, NULL); + test_error(error, "Unable to read results"); - for( size_t q = 0; q < bufferSize; q++ ) + for (size_t q = 0; q < bufferSize; q++) { if (referenceData[q] != finalData[q]) { log_error( - "ERROR: Sample %d did not validate! Got %d, expected %d\n", - (int)q, (int)finalData[q], (int)referenceData[q]); + "ERROR: Sample %d did not validate! Got %d, expected %d\n", + (int)q, (int)finalData[q], (int)referenceData[q]); return -1; } } @@ -128,112 +140,128 @@ int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_comman return 0; } -int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_enqueue_map_image(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 }; const size_t imageSize = 256; const size_t imageDataSize = imageSize * imageSize * 4 * sizeof(cl_uint); - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) BufferOwningPtr hostPtrData{ malloc(imageDataSize) }; BufferOwningPtr referenceData{ malloc(imageDataSize) }; - BufferOwningPtr finalData{malloc(imageDataSize)}; - - MTdataHolder d{gRandomSeed}; - for (int src_flag_id=0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) { - clMemWrapper memObject; - log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]); - - generate_random_data(kUInt, (unsigned int)(imageSize * imageSize * 4), d, - hostPtrData); - memcpy(referenceData, hostPtrData, imageDataSize); - - cl_mem_flags flags = flag_set[src_flag_id]; - bool hasHostPtr = (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); - void *hostPtr = nullptr; - if (hasHostPtr) hostPtr = hostPtrData; - memObject = create_image_2d(context, CL_MEM_READ_WRITE | flags, &format, - imageSize, imageSize, 0, hostPtr, &error ); - test_error( error, "Unable to create testing buffer" ); - - if (!hasHostPtr) { - size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1}; - error = - clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region, - 0, 0, hostPtrData, 0, NULL, NULL); - test_error( error, "Unable to write to testing buffer" ); - } - - for( int i = 0; i < 128; i++ ) + BufferOwningPtr finalData{ malloc(imageDataSize) }; + + MTdataHolder d{ gRandomSeed }; + for (int src_flag_id = 0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) { + clMemWrapper memObject; + log_info("Testing with cl_mem_flags src: %s\n", + flag_set_names[src_flag_id]); + + generate_random_data(kUInt, (unsigned int)(imageSize * imageSize * 4), + d, hostPtrData); + memcpy(referenceData, hostPtrData, imageDataSize); + + cl_mem_flags flags = flag_set[src_flag_id]; + bool hasHostPtr = + (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); + void *hostPtr = nullptr; + if (hasHostPtr) hostPtr = hostPtrData; + memObject = create_image_2d(context, CL_MEM_READ_WRITE | flags, &format, + imageSize, imageSize, 0, hostPtr, &error); + test_error(error, "Unable to create testing buffer"); - size_t offset[3], region[3]; - size_t rowPitch; - - offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d ); - region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d ); - offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d ); - region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d ); - offset[ 2 ] = 0; - region[ 2 ] = 1; - cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, - offset, region, &rowPitch, NULL, 0, NULL, NULL, &error ); - if( error != CL_SUCCESS ) - { - print_error( error, "clEnqueueMapImage call failed" ); - log_error( "\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] ); - return -1; - } - - // Write into the region - cl_uint *mappedPtr = mappedRegion; - for( size_t y = 0; y < region[ 1 ]; y++ ) - { - for( size_t x = 0; x < region[ 0 ] * 4; x++ ) + if (!hasHostPtr) { - cl_int spin = (cl_int)random_in_range( 16, 1024, d ); - - cl_int value; - // Test read AND write in one swipe - value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ]; - value = spin - value; - mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value; - - // Also update the initial data array - value = - referenceData[((offset[1] + y) * imageSize + offset[0]) * 4 + x]; - value = spin - value; - referenceData[((offset[1] + y) * imageSize + offset[0]) * 4 + x] = - value; + size_t write_origin[3] = { 0, 0, 0 }, + write_region[3] = { imageSize, imageSize, 1 }; + error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, + write_region, 0, 0, hostPtrData, 0, + NULL, NULL); + test_error(error, "Unable to write to testing buffer"); } - } - // Unmap - error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL ); - test_error( error, "Unable to unmap buffer" ); - } + for (int i = 0; i < 128; i++) + { - // Final validation: read actual values of buffer and compare against our reference - size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 }; - error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); + size_t offset[3], region[3]; + size_t rowPitch; + + offset[0] = (size_t)random_in_range(0, (int)imageSize - 1, d); + region[0] = + (size_t)random_in_range(1, (int)(imageSize - offset[0] - 1), d); + offset[1] = (size_t)random_in_range(0, (int)imageSize - 1, d); + region[1] = + (size_t)random_in_range(1, (int)(imageSize - offset[1] - 1), d); + offset[2] = 0; + region[2] = 1; + cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( + queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, offset, + region, &rowPitch, NULL, 0, NULL, NULL, &error); + if (error != CL_SUCCESS) + { + print_error(error, "clEnqueueMapImage call failed"); + log_error("\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], + (int)offset[1], (int)region[0], (int)region[1]); + return -1; + } - for( size_t q = 0; q < imageSize * imageSize * 4; q++ ) - { - if (referenceData[q] != finalData[q]) + // Write into the region + cl_uint *mappedPtr = mappedRegion; + for (size_t y = 0; y < region[1]; y++) + { + for (size_t x = 0; x < region[0] * 4; x++) + { + cl_int spin = (cl_int)random_in_range(16, 1024, d); + + cl_int value; + // Test read AND write in one swipe + value = mappedPtr[(y * rowPitch / sizeof(cl_uint)) + x]; + value = spin - value; + mappedPtr[(y * rowPitch / sizeof(cl_uint)) + x] = value; + + // Also update the initial data array + value = + referenceData[((offset[1] + y) * imageSize + offset[0]) + * 4 + + x]; + value = spin - value; + referenceData[((offset[1] + y) * imageSize + offset[0]) * 4 + + x] = value; + } + } + + // Unmap + error = clEnqueueUnmapMemObject(queue, memObject, mappedRegion, 0, + NULL, NULL); + test_error(error, "Unable to unmap buffer"); + } + + // Final validation: read actual values of buffer and compare against + // our reference + size_t finalOrigin[3] = { 0, 0, 0 }, + finalRegion[3] = { imageSize, imageSize, 1 }; + error = clEnqueueReadImage(queue, memObject, CL_TRUE, finalOrigin, + finalRegion, 0, 0, finalData, 0, NULL, NULL); + test_error(error, "Unable to read results"); + + for (size_t q = 0; q < imageSize * imageSize * 4; q++) { - log_error("ERROR: Sample %d (coord %d,%d) did not validate! Got " - "%d, expected %d\n", - (int)q, (int)((q / 4) % imageSize), - (int)((q / 4) / imageSize), (int)finalData[q], - (int)referenceData[q]); - return -1; + if (referenceData[q] != finalData[q]) + { + log_error( + "ERROR: Sample %d (coord %d,%d) did not validate! Got " + "%d, expected %d\n", + (int)q, (int)((q / 4) % imageSize), + (int)((q / 4) / imageSize), (int)finalData[q], + (int)referenceData[q]); + return -1; + } } - } - } // cl_mem_flags + } // cl_mem_flags return 0; } -