diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 1ba63abdd..635e4a7eb 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -13,16 +13,16 @@ jobs: fail-fast: false matrix: mainmatrix: [true] - os: [ubuntu-20.04, macos-latest, windows-latest] + os: [ubuntu-22.04, macos-latest, windows-latest] include: - - os: ubuntu-20.04 + - os: ubuntu-22.04 mainmatrix: true gl: 1 extra: " gl" - - os: ubuntu-20.04 + - os: ubuntu-22.04 mainmatrix: false arch: arm - - os: ubuntu-20.04 + - os: ubuntu-22.04 mainmatrix: false arch: aarch64 debug: 1 @@ -55,10 +55,10 @@ jobs: run: ./presubmit.sh formatcheck: name: Check code format - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Install packages - run: sudo apt install -y clang-format clang-format-9 + run: sudo apt install -y clang-format clang-format-11 - uses: actions/checkout@v3 with: fetch-depth: 0 diff --git a/check-format.sh b/check-format.sh index be8f9d785..b5dc0a72c 100755 --- a/check-format.sh +++ b/check-format.sh @@ -2,7 +2,7 @@ # Arg used to specify non-'origin/main' comparison branch ORIGIN_BRANCH=${1:-"origin/main"} -CLANG_BINARY=${2:-"`which clang-format-9`"} +CLANG_BINARY=${2:-"`which clang-format-11`"} # Run git-clang-format to check for violations CLANG_FORMAT_OUTPUT=$(git-clang-format --diff $ORIGIN_BRANCH --extensions c,cpp,h,hpp --binary $CLANG_BINARY) diff --git a/test_common/gl/helpers.cpp b/test_common/gl/helpers.cpp index b9f95a94a..1fb85035e 100644 --- a/test_common/gl/helpers.cpp +++ b/test_common/gl/helpers.cpp @@ -1715,7 +1715,7 @@ void * CreateGLRenderbuffer( GLsizei width, GLsizei height, // Reverse and reorder to validate since in the // kernel the read_imagef() call always returns RGBA cl_uchar *p = (cl_uchar *)buffer; - for( size_t i = 0; i < (size_t)width * height; i++ ) + for (GLsizei i = 0; i < width * height; i++) { cl_uchar uc0 = p[i * 4 + 0]; cl_uchar uc1 = p[i * 4 + 1]; @@ -1733,7 +1733,7 @@ void * CreateGLRenderbuffer( GLsizei width, GLsizei height, // Reverse and reorder to validate since in the // kernel the read_imagef() call always returns RGBA cl_uchar *p = (cl_uchar *)buffer; - for( size_t i = 0; i < width * height; i++ ) + for (GLsizei i = 0; i < width * height; i++) { cl_uchar uc0 = p[i * 4 + 0]; cl_uchar uc1 = p[i * 4 + 1]; diff --git a/test_common/harness/mt19937.cpp b/test_common/harness/mt19937.cpp index f5665deb2..2d503eb5f 100644 --- a/test_common/harness/mt19937.cpp +++ b/test_common/harness/mt19937.cpp @@ -51,6 +51,7 @@ #include "harness/alloc.h" #ifdef __SSE2__ +#include #include #endif @@ -107,7 +108,7 @@ cl_uint genrand_int32(MTdata d) /* mag01[x] = x * MATRIX_A for x=0,1 */ static const cl_uint mag01[2] = { 0x0UL, MATRIX_A }; #ifdef __SSE2__ - static volatile int init = 0; + static std::once_flag init_flag; static union { __m128i v; cl_uint s[4]; @@ -123,8 +124,7 @@ cl_uint genrand_int32(MTdata d) int kk; #ifdef __SSE2__ - if (0 == init) - { + auto init_fn = []() { upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK; lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = @@ -134,8 +134,8 @@ cl_uint genrand_int32(MTdata d) MATRIX_A; c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint)0x9d2c5680UL; c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint)0xefc60000UL; - init = 1; - } + }; + std::call_once(init_flag, init_fn); #endif kk = 0; diff --git a/test_common/harness/stringHelpers.h b/test_common/harness/stringHelpers.h index 3f6bf64db..e1275f103 100644 --- a/test_common/harness/stringHelpers.h +++ b/test_common/harness/stringHelpers.h @@ -14,10 +14,11 @@ // limitations under the License. // -#ifndef BASIC_UTILS_H -#define BASIC_UTILS_H +#ifndef STRING_HELPERS_H +#define STRING_HELPERS_H #include +#include #include inline std::string concat_kernel(const char *sstr[], int num) @@ -38,4 +39,4 @@ inline std::string str_sprintf(const std::string &str, Args... args) return std::string(buffer.get(), buffer.get() + s - 1); } -#endif // BASIC_UTIL_H +#endif // STRING_HELPERS_H diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index 95ea81631..3d743e717 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -835,9 +835,9 @@ void callTestFunctions(test_definition testList[], std::vector threads; test_harness_state state = { testList, resultTestList, deviceToUse, config }; - for (int i = 0; i < config.numWorkerThreads; i++) + for (unsigned i = 0; i < config.numWorkerThreads; i++) { - log_info("Spawning worker thread %i\n", i); + log_info("Spawning worker thread %u\n", i); threads.push_back(new std::thread(test_function_runner, &state)); } diff --git a/test_conformance/allocations/main.cpp b/test_conformance/allocations/main.cpp index 43e81277e..827072fc7 100644 --- a/test_conformance/allocations/main.cpp +++ b/test_conformance/allocations/main.cpp @@ -326,6 +326,7 @@ int main(int argc, const char *argv[]) else if ( strcmp( argv[i], "--help" ) == 0 || strcmp( argv[i], "-h" ) == 0 ) { printUsage( argv[0] ); + free(argList); return -1; } diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp index fa5c227fa..92ae1d7b1 100644 --- a/test_conformance/api/test_queries.cpp +++ b/test_conformance/api/test_queries.cpp @@ -799,8 +799,8 @@ int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, c test_error(error, "clFinish failed"); if (max_dimensions == 2) { - return 0; free(source); + return 0; } local[1]--; local[2]++; diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp index 7da2dfa79..ce0410bcf 100644 --- a/test_conformance/atomics/test_indexed_cases.cpp +++ b/test_conformance/atomics/test_indexed_cases.cpp @@ -13,6 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. // + +#include + #include "testBase.h" #include "harness/conversions.h" @@ -226,13 +229,13 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, (int)global_threads[0], (int)local_threads[0]); // Allocate our storage - cl_mem bin_counters = + clMemWrapper bin_counters = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * number_of_bins, NULL, NULL); - cl_mem bins = clCreateBuffer( + clMemWrapper bins = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(cl_int) * number_of_bins * max_counts_per_bin, NULL, NULL); - cl_mem bin_assignments = + clMemWrapper bin_assignments = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * number_of_items, NULL, NULL); @@ -253,7 +256,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, } // Initialize our storage - cl_int *l_bin_counts = (cl_int *)malloc(sizeof(cl_int) * number_of_bins); + std::unique_ptr l_bin_counts(new cl_int[number_of_bins]); if (!l_bin_counts) { log_error("add_index_bin_test FAILED to allocate initial values for " @@ -263,8 +266,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, int i; for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0; err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, - sizeof(cl_int) * number_of_bins, l_bin_counts, 0, - NULL, NULL); + sizeof(cl_int) * number_of_bins, + l_bin_counts.get(), 0, NULL, NULL); if (err) { log_error("add_index_bin_test FAILED to set initial values for " @@ -273,8 +276,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, return -1; } - cl_int *values = - (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin); + std::unique_ptr values( + new cl_int[number_of_bins * max_counts_per_bin]); if (!values) { log_error( @@ -285,7 +288,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int) * number_of_bins * max_counts_per_bin, - values, 0, NULL, NULL); + values.get(), 0, NULL, NULL); if (err) { log_error( @@ -293,10 +296,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, err); return -1; } - free(values); - cl_int *l_bin_assignments = - (cl_int *)malloc(sizeof(cl_int) * number_of_items); + std::unique_ptr l_bin_assignments(new cl_int[number_of_items]); if (!l_bin_assignments) { log_error("add_index_bin_test FAILED to allocate initial values for " @@ -326,7 +327,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, } err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int) * number_of_items, - l_bin_assignments, 0, NULL, NULL); + l_bin_assignments.get(), 0, NULL, NULL); if (err) { log_error("add_index_bin_test FAILED to set initial values for " @@ -355,8 +356,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, return -1; } - cl_int *final_bin_assignments = - (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin); + std::unique_ptr final_bin_assignments( + new cl_int[number_of_bins * max_counts_per_bin]); if (!final_bin_assignments) { log_error("add_index_bin_test FAILED to allocate initial values for " @@ -366,15 +367,14 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, err = clEnqueueReadBuffer(queue, bins, true, 0, sizeof(cl_int) * number_of_bins * max_counts_per_bin, - final_bin_assignments, 0, NULL, NULL); + final_bin_assignments.get(), 0, NULL, NULL); if (err) { log_error("add_index_bin_test FAILED to read back bins: %d\n", err); return -1; } - cl_int *final_bin_counts = - (cl_int *)malloc(sizeof(cl_int) * number_of_bins); + std::unique_ptr final_bin_counts(new cl_int[number_of_bins]); if (!final_bin_counts) { log_error("add_index_bin_test FAILED to allocate initial values for " @@ -382,8 +382,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, return -1; } err = clEnqueueReadBuffer(queue, bin_counters, true, 0, - sizeof(cl_int) * number_of_bins, final_bin_counts, - 0, NULL, NULL); + sizeof(cl_int) * number_of_bins, + final_bin_counts.get(), 0, NULL, NULL); if (err) { log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", @@ -460,13 +460,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, errors++; } } - free(l_bin_counts); - free(l_bin_assignments); - free(final_bin_assignments); - free(final_bin_counts); - clReleaseMemObject(bin_counters); - clReleaseMemObject(bins); - clReleaseMemObject(bin_assignments); + if (errors == 0) { log_info("add_index_bin_test passed. Each item was put in the correct " diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index 47c1c980f..9dcf1d5a6 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -52,14 +52,12 @@ set(${MODULE_NAME}_SOURCES test_kernel_call_kernel_function.cpp test_local_kernel_scope.cpp test_progvar.cpp - test_wg_barrier.cpp test_global_linear_id.cpp test_local_linear_id.cpp test_enqueued_local_size.cpp test_simple_image_pitch.cpp test_get_linear_ids.cpp test_rw_image_access_qualifier.cpp - test_wg_barrier.cpp test_enqueued_local_size.cpp test_global_linear_id.cpp test_local_linear_id.cpp diff --git a/test_conformance/basic/test_barrier.cpp b/test_conformance/basic/test_barrier.cpp index d20af14a4..6352b42fa 100644 --- a/test_conformance/basic/test_barrier.cpp +++ b/test_conformance/basic/test_barrier.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -21,143 +21,136 @@ #include #include +#include +#include +#include #include "procs.h" -const char *barrier_kernel_code = -"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n" -"{\n" -" int tid = get_local_id(0);\n" -" int lsize = get_local_size(0);\n" -" int i;\n" -"\n" -" tmp_sum[tid] = 0;\n" -" for (i=tid; i1; i = hadd(i,1))\n" -" {\n" -" barrier(CLK_GLOBAL_MEM_FENCE);\n" -" if (tid + i < lsize)\n" -" tmp_sum[tid] += tmp_sum[tid + i];\n" -" lsize = i; \n" -" }\n" -"\n" -" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n" -" if (tid == 0)\n" -" *sum = tmp_sum[0];\n" -"}\n"; - - -static int -verify_sum(int *inptr, int *outptr, int n) +namespace { +const char *barrier_kernel_code = R"( +__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, + __global int *sum) { - int r = 0; - int i; + int tid = get_local_id(0); + int lsize = get_local_size(0); + int i; - for (i=0; i 1; i = hadd(i, 1)) { - log_error("BARRIER test failed\n"); - return -1; + BARRIER(CLK_GLOBAL_MEM_FENCE); + if (tid + i < lsize) tmp_sum[tid] += tmp_sum[tid + i]; + lsize = i; } - log_info("BARRIER test passed\n"); - return 0; + // no barrier is required here because last person to write to tmp_sum[0] + // was tid 0 + if (tid == 0) *sum = tmp_sum[0]; } +)"; -int -test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +void generate_random_inputs(std::vector &v) { - cl_mem streams[3]; - cl_int *input_ptr = NULL, *output_ptr = NULL; - cl_program program; - cl_kernel kernel; - size_t global_threads[3]; - size_t local_threads[3]; - int err; - int i; - size_t max_local_workgroup_size[3]; - size_t max_threadgroup_size = 0; - MTdata d; + RandomSeed seed(gRandomSeed); - err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" ); - test_error(err, "Failed to build kernel/program."); + auto random_generator = [&seed]() { + return static_cast( + get_random_float(-0x01000000, 0x01000000, seed)); + }; + + std::generate(v.begin(), v.end(), random_generator); +} + +int test_barrier_common(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + std::string barrier_str) +{ + clMemWrapper streams[3]; + clProgramWrapper program; + clKernelWrapper kernel; - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, - sizeof(max_threadgroup_size), &max_threadgroup_size, NULL); - test_error(err, "clGetKernelWorkgroupInfo failed."); + cl_int output; + int err; - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL); - test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES"); + size_t max_threadgroup_size = 0; + std::string build_options = std::string("-DBARRIER=") + barrier_str; + err = create_single_kernel_helper(context, &program, &kernel, 1, + &barrier_kernel_code, "compute_sum", + build_options.c_str()); + test_error(err, "Failed to build kernel/program."); - // Pick the minimum of the device and the kernel - if (max_threadgroup_size > max_local_workgroup_size[0]) - max_threadgroup_size = max_local_workgroup_size[0]; + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, + &max_threadgroup_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed."); // work group size must divide evenly into the global size - while( num_elements % max_threadgroup_size ) - max_threadgroup_size--; + while (num_elements % max_threadgroup_size) max_threadgroup_size--; - input_ptr = (int*)malloc(sizeof(int) * num_elements); - output_ptr = (int*)malloc(sizeof(int)); + std::vector input(num_elements); streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, &err); + sizeof(cl_int) * num_elements, nullptr, &err); test_error(err, "clCreateBuffer failed."); - streams[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err); + streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), + nullptr, &err); test_error(err, "clCreateBuffer failed."); streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * max_threadgroup_size, NULL, &err); + sizeof(cl_int) * max_threadgroup_size, nullptr, &err); test_error(err, "clCreateBuffer failed."); - d = init_genrand( gRandomSeed ); - for (i=0; i #include +#include +#include #include "procs.h" -const char *constant_kernel_code = -"__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" float ftmp = tmpF[tid]; \n" -" float Itmp = tmpI[tid]; \n" -" out[tid] = ftmp * Itmp; \n" -"}\n"; - -const char *loop_constant_kernel_code = -"kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n" -"{\n" -" int tid = get_global_id(0);\n" -" float sum = 0;\n" -" for (int i = 0; i < num; i++) {\n" -" float pos = i_pos[i*3];\n" -" sum += pos;\n" -" }\n" -" out[tid] = sum;\n" -"}\n"; - - -static int -verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n) +namespace { +const char* constant_kernel_code = R"( +__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI) +{ + int tid = get_global_id(0); + + float ftmp = tmpF[tid]; + float Itmp = tmpI[tid]; + out[tid] = ftmp * Itmp; +} +)"; + +const char* loop_constant_kernel_code = R"( +kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num) { - int i; + int tid = get_global_id(0); + float sum = 0; + for (int i = 0; i < num; i++) { + float pos = i_pos[i*3]; + sum += pos; + } + out[tid] = sum; +} +)"; + - for (i=0; i < n; i++) +int verify(std::vector& tmpF, std::vector& tmpI, + std::vector& out) +{ + for (int i = 0; i < out.size(); i++) { float f = tmpF[i] * tmpI[i]; - if( out[i] != f ) + if (out[i] != f) { log_error("CONSTANT test failed\n"); return -1; @@ -66,214 +69,172 @@ verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n) return 0; } - -static int -verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n) +int verify_loop_constant(const std::vector& tmp, + std::vector& out, cl_int l) { - int i; - cl_int j; - for (i=0; i < n; i++) - { - float sum = 0; - for (j=0; j < l; ++j) - sum += tmp[j*3]; + float sum = 0; + for (int j = 0; j < l; ++j) sum += tmp[j * 3]; - if( out[i] != sum ) - { - log_error("loop CONSTANT test failed\n"); - return -1; - } + auto predicate = [&sum](cl_float elem) { return sum != elem; }; + + if (std::any_of(out.cbegin(), out.cend(), predicate)) + { + log_error("loop CONSTANT test failed\n"); + return -1; } log_info("loop CONSTANT test passed\n"); return 0; } -int -test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +template void generate_random_inputs(std::vector& v) +{ + RandomSeed seed(gRandomSeed); + + auto random_generator = [&seed]() { + return static_cast(get_random_float(-0x02000000, 0x02000000, seed)); + }; + + std::generate(v.begin(), v.end(), random_generator); +} +} + +int test_constant(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) { - cl_mem streams[3]; - cl_int *tmpI; - cl_float *tmpF, *out; - cl_program program; - cl_kernel kernel; - size_t global_threads[3]; - int err; - unsigned int i; + clMemWrapper streams[3]; + clProgramWrapper program; + clKernelWrapper kernel; + + size_t global_threads[3]; + int err; cl_ulong maxSize, maxGlobalSize, maxAllocSize; size_t num_floats, num_ints, constant_values; - MTdata d; - RoundingMode oldRoundMode; + RoundingMode oldRoundMode; int isRTZ = 0; - /* Verify our test buffer won't be bigger than allowed */ - err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 ); - test_error( err, "Unable to get max constant buffer size" ); - - log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize); - - // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE - err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0); - test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE"); - - if (maxSize > maxGlobalSize / 4) - maxSize = maxGlobalSize / 4; - - err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0); - test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE "); - - if (maxSize > maxAllocSize) - maxSize = maxAllocSize; - - maxSize/=4; - num_ints = (size_t)maxSize/sizeof(cl_int); - num_floats = (size_t)maxSize/sizeof(cl_float); - if (num_ints >= num_floats) { - constant_values = num_floats; - } else { - constant_values = num_ints; - } - - log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n", - constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float)); - - tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values); - tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values); - out = (cl_float*)malloc(sizeof(cl_float) * constant_values); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * constant_values, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * constant_values, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * constant_values, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } + /* Verify our test buffer won't be bigger than allowed */ + err = clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, + sizeof(maxSize), &maxSize, 0); + test_error(err, "Unable to get max constant buffer size"); + log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", + maxSize); - d = init_genrand( gRandomSeed ); - for (i=0; i(maxSize / sizeof(cl_int)); + num_floats = static_cast(maxSize / sizeof(cl_float)); + constant_values = std::min(num_floats, num_ints); + + + log_info( + "Test will attempt to use %lu bytes with one %lu byte constant int " + "buffer and one %lu byte constant float buffer.\n", + constant_values * sizeof(cl_int) + constant_values * sizeof(cl_float), + constant_values * sizeof(cl_int), constant_values * sizeof(cl_float)); + + std::vector tmpI(constant_values); + std::vector tmpF(constant_values); + std::vector out(constant_values); + + + streams[0] = + clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_float) * constant_values, nullptr, &err); + test_error(err, "clCreateBuffer failed"); - err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" ); - if (err) { - log_error("Failed to create kernel and program: %d\n", err); - return -1; - } + streams[1] = + clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_float) * constant_values, nullptr, &err); + test_error(err, "clCreateBuffer failed"); + + streams[2] = + clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_int) * constant_values, nullptr, &err); + test_error(err, "clCreateBuffer failed"); + + generate_random_inputs(tmpI); + generate_random_inputs(tmpF); + + err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, + sizeof(cl_float) * constant_values, tmpF.data(), + 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed"); + err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, + sizeof(cl_int) * constant_values, tmpI.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer faile."); + + err = create_single_kernel_helper(context, &program, &kernel, 1, + &constant_kernel_code, "constant_kernel"); + test_error(err, "Failed to create kernel and program"); err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed\n"); - return -1; - } + test_error(err, "clSetKernelArgs failed"); global_threads[0] = constant_values; - err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - log_error("clEnqueueNDRangeKernel failed: %d\n", err); - return -1; - } - err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - log_error("clEnqueueReadBuffer failed\n"); - return -1; - } + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, global_threads, + nullptr, 0, nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed"); + + err = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, + sizeof(cl_float) * constant_values, out.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); - //If we only support rtz mode - if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded) + // If we only support rtz mode + if (CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded) { oldRoundMode = set_round(kRoundTowardZero, kfloat); isRTZ = 1; } - err = verify(tmpF, tmpI, out, (int)constant_values); + err = verify(tmpF, tmpI, out); - if (isRTZ) - (void)set_round(oldRoundMode, kfloat); + if (isRTZ) (void)set_round(oldRoundMode, kfloat); // Loop constant buffer test - cl_program loop_program; - cl_kernel loop_kernel; + clProgramWrapper loop_program; + clKernelWrapper loop_kernel; cl_int limit = 2; - memset(out, 0, sizeof(cl_float) * constant_values); + memset(out.data(), 0, sizeof(cl_float) * constant_values); err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1, - &loop_constant_kernel_code, "loop_constant_kernel" ); - if (err) { - log_error("Failed to create loop kernel and program: %d\n", err); - return -1; - } + &loop_constant_kernel_code, + "loop_constant_kernel"); + test_error(err, "Failed to create kernel and program"); err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]); err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]); err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit); - if (err != CL_SUCCESS) { - log_error("clSetKernelArgs for loop kernel failed\n"); - return -1; - } + test_error(err, "clSetKernelArgs failed"); - err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL ); - if (err != CL_SUCCESS) { - log_error("clEnqueueNDRangeKernel failed: %d\n", err); - return -1; - } - err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL ); - if (err != CL_SUCCESS) { - log_error("clEnqueueReadBuffer failed\n"); - return -1; - } + err = clEnqueueNDRangeKernel(queue, loop_kernel, 1, nullptr, global_threads, + nullptr, 0, nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed"); - err = verify_loop_constant(tmpF, out, limit, (int)constant_values); + err = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, + sizeof(cl_float) * constant_values, out.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); + + err = verify_loop_constant(tmpF, out, limit); - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseKernel(kernel); - clReleaseProgram(program); - clReleaseKernel(loop_kernel); - clReleaseProgram(loop_program); - free(tmpI); - free(tmpF); - free(out); return err; } - - - - - diff --git a/test_conformance/basic/test_enqueue_map.cpp b/test_conformance/basic/test_enqueue_map.cpp index d28f7e41a..6b650c0d8 100644 --- a/test_conformance/basic/test_enqueue_map.cpp +++ b/test_conformance/basic/test_enqueue_map.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -26,6 +26,7 @@ #include "harness/conversions.h" #include "harness/typeWrappers.h" +// clang-format off const cl_mem_flags flag_set[] = { CL_MEM_ALLOC_HOST_PTR, CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, @@ -33,93 +34,104 @@ const cl_mem_flags flag_set[] = { CL_MEM_COPY_HOST_PTR, 0 }; -const char* flag_set_names[] = { + +const char *flag_set_names[] = { "CL_MEM_ALLOC_HOST_PTR", "CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR", "CL_MEM_USE_HOST_PTR", "CL_MEM_COPY_HOST_PTR", "0" }; +// clang-format on -int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; - const size_t bufferSize = 256*256; - MTdataHolder d{gRandomSeed}; + const size_t bufferSize = 256 * 256; + MTdataHolder d{ gRandomSeed }; BufferOwningPtr hostPtrData{ malloc(bufferSize) }; BufferOwningPtr referenceData{ malloc(bufferSize) }; - BufferOwningPtr finalData{malloc(bufferSize)}; + BufferOwningPtr finalData{ malloc(bufferSize) }; - for (int src_flag_id=0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) + for (int src_flag_id = 0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) { clMemWrapper memObject; - log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]); + log_info("Testing with cl_mem_flags src: %s\n", + flag_set_names[src_flag_id]); generate_random_data(kChar, (unsigned int)bufferSize, d, hostPtrData); memcpy(referenceData, hostPtrData, bufferSize); void *hostPtr = nullptr; cl_mem_flags flags = flag_set[src_flag_id]; - bool hasHostPtr = (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); + bool hasHostPtr = + (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); if (hasHostPtr) hostPtr = hostPtrData; - memObject = clCreateBuffer(context, flags, bufferSize, hostPtr, &error); - test_error( error, "Unable to create testing buffer" ); + memObject = clCreateBuffer(context, flags, bufferSize, hostPtr, &error); + test_error(error, "Unable to create testing buffer"); if (!hasHostPtr) { error = - clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize, - hostPtrData, 0, NULL, NULL); - test_error( error, "clEnqueueWriteBuffer failed"); + clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize, + hostPtrData, 0, NULL, NULL); + test_error(error, "clEnqueueWriteBuffer failed"); } - for( int i = 0; i < 128; i++ ) + for (int i = 0; i < 128; i++) { - size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d ); - size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d ); - - cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, - offset, length, 0, NULL, NULL, &error ); - if( error != CL_SUCCESS ) - { - print_error( error, "clEnqueueMapBuffer call failed" ); - log_error( "\tOffset: %d Length: %d\n", (int)offset, (int)length ); - return -1; - } - - // Write into the region - for( size_t j = 0; j < length; j++ ) - { - cl_char spin = (cl_char)genrand_int32( d ); - - // Test read AND write in one swipe - cl_char value = mappedRegion[ j ]; - value = spin - value; - mappedRegion[ j ] = value; - - // Also update the initial data array - value = referenceData[offset + j]; - value = spin - value; - referenceData[offset + j] = value; - } - - // Unmap - error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL ); - test_error( error, "Unable to unmap buffer" ); + size_t offset = (size_t)random_in_range(0, (int)bufferSize - 1, d); + size_t length = + (size_t)random_in_range(1, (int)(bufferSize - offset), d); + + cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( + queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, offset, + length, 0, NULL, NULL, &error); + if (error != CL_SUCCESS) + { + print_error(error, "clEnqueueMapBuffer call failed"); + log_error("\tOffset: %d Length: %d\n", (int)offset, + (int)length); + return -1; + } + + // Write into the region + for (size_t j = 0; j < length; j++) + { + cl_char spin = (cl_char)genrand_int32(d); + + // Test read AND write in one swipe + cl_char value = mappedRegion[j]; + value = spin - value; + mappedRegion[j] = value; + + // Also update the initial data array + value = referenceData[offset + j]; + value = spin - value; + referenceData[offset + j] = value; + } + + // Unmap + error = clEnqueueUnmapMemObject(queue, memObject, mappedRegion, 0, + NULL, NULL); + test_error(error, "Unable to unmap buffer"); } - // Final validation: read actual values of buffer and compare against our reference - error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, bufferSize, finalData, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); + // Final validation: read actual values of buffer and compare against + // our reference + error = clEnqueueReadBuffer(queue, memObject, CL_TRUE, 0, bufferSize, + finalData, 0, NULL, NULL); + test_error(error, "Unable to read results"); - for( size_t q = 0; q < bufferSize; q++ ) + for (size_t q = 0; q < bufferSize; q++) { if (referenceData[q] != finalData[q]) { log_error( - "ERROR: Sample %d did not validate! Got %d, expected %d\n", - (int)q, (int)finalData[q], (int)referenceData[q]); + "ERROR: Sample %d did not validate! Got %d, expected %d\n", + (int)q, (int)finalData[q], (int)referenceData[q]); return -1; } } @@ -128,112 +140,128 @@ int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_comman return 0; } -int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_enqueue_map_image(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 }; const size_t imageSize = 256; const size_t imageDataSize = imageSize * imageSize * 4 * sizeof(cl_uint); - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) BufferOwningPtr hostPtrData{ malloc(imageDataSize) }; BufferOwningPtr referenceData{ malloc(imageDataSize) }; - BufferOwningPtr finalData{malloc(imageDataSize)}; - - MTdataHolder d{gRandomSeed}; - for (int src_flag_id=0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) { - clMemWrapper memObject; - log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]); - - generate_random_data(kUInt, (unsigned int)(imageSize * imageSize * 4), d, - hostPtrData); - memcpy(referenceData, hostPtrData, imageDataSize); - - cl_mem_flags flags = flag_set[src_flag_id]; - bool hasHostPtr = (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); - void *hostPtr = nullptr; - if (hasHostPtr) hostPtr = hostPtrData; - memObject = create_image_2d(context, CL_MEM_READ_WRITE | flags, &format, - imageSize, imageSize, 0, hostPtr, &error ); - test_error( error, "Unable to create testing buffer" ); - - if (!hasHostPtr) { - size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1}; - error = - clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region, - 0, 0, hostPtrData, 0, NULL, NULL); - test_error( error, "Unable to write to testing buffer" ); - } - - for( int i = 0; i < 128; i++ ) + BufferOwningPtr finalData{ malloc(imageDataSize) }; + + MTdataHolder d{ gRandomSeed }; + for (int src_flag_id = 0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) { + clMemWrapper memObject; + log_info("Testing with cl_mem_flags src: %s\n", + flag_set_names[src_flag_id]); + + generate_random_data(kUInt, (unsigned int)(imageSize * imageSize * 4), + d, hostPtrData); + memcpy(referenceData, hostPtrData, imageDataSize); + + cl_mem_flags flags = flag_set[src_flag_id]; + bool hasHostPtr = + (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); + void *hostPtr = nullptr; + if (hasHostPtr) hostPtr = hostPtrData; + memObject = create_image_2d(context, CL_MEM_READ_WRITE | flags, &format, + imageSize, imageSize, 0, hostPtr, &error); + test_error(error, "Unable to create testing buffer"); - size_t offset[3], region[3]; - size_t rowPitch; - - offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d ); - region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d ); - offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d ); - region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d ); - offset[ 2 ] = 0; - region[ 2 ] = 1; - cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, - offset, region, &rowPitch, NULL, 0, NULL, NULL, &error ); - if( error != CL_SUCCESS ) - { - print_error( error, "clEnqueueMapImage call failed" ); - log_error( "\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] ); - return -1; - } - - // Write into the region - cl_uint *mappedPtr = mappedRegion; - for( size_t y = 0; y < region[ 1 ]; y++ ) - { - for( size_t x = 0; x < region[ 0 ] * 4; x++ ) + if (!hasHostPtr) { - cl_int spin = (cl_int)random_in_range( 16, 1024, d ); - - cl_int value; - // Test read AND write in one swipe - value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ]; - value = spin - value; - mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value; - - // Also update the initial data array - value = - referenceData[((offset[1] + y) * imageSize + offset[0]) * 4 + x]; - value = spin - value; - referenceData[((offset[1] + y) * imageSize + offset[0]) * 4 + x] = - value; + size_t write_origin[3] = { 0, 0, 0 }, + write_region[3] = { imageSize, imageSize, 1 }; + error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, + write_region, 0, 0, hostPtrData, 0, + NULL, NULL); + test_error(error, "Unable to write to testing buffer"); } - } - // Unmap - error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL ); - test_error( error, "Unable to unmap buffer" ); - } + for (int i = 0; i < 128; i++) + { - // Final validation: read actual values of buffer and compare against our reference - size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 }; - error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); + size_t offset[3], region[3]; + size_t rowPitch; + + offset[0] = (size_t)random_in_range(0, (int)imageSize - 1, d); + region[0] = + (size_t)random_in_range(1, (int)(imageSize - offset[0] - 1), d); + offset[1] = (size_t)random_in_range(0, (int)imageSize - 1, d); + region[1] = + (size_t)random_in_range(1, (int)(imageSize - offset[1] - 1), d); + offset[2] = 0; + region[2] = 1; + cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( + queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, offset, + region, &rowPitch, NULL, 0, NULL, NULL, &error); + if (error != CL_SUCCESS) + { + print_error(error, "clEnqueueMapImage call failed"); + log_error("\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], + (int)offset[1], (int)region[0], (int)region[1]); + return -1; + } - for( size_t q = 0; q < imageSize * imageSize * 4; q++ ) - { - if (referenceData[q] != finalData[q]) + // Write into the region + cl_uint *mappedPtr = mappedRegion; + for (size_t y = 0; y < region[1]; y++) + { + for (size_t x = 0; x < region[0] * 4; x++) + { + cl_int spin = (cl_int)random_in_range(16, 1024, d); + + cl_int value; + // Test read AND write in one swipe + value = mappedPtr[(y * rowPitch / sizeof(cl_uint)) + x]; + value = spin - value; + mappedPtr[(y * rowPitch / sizeof(cl_uint)) + x] = value; + + // Also update the initial data array + value = + referenceData[((offset[1] + y) * imageSize + offset[0]) + * 4 + + x]; + value = spin - value; + referenceData[((offset[1] + y) * imageSize + offset[0]) * 4 + + x] = value; + } + } + + // Unmap + error = clEnqueueUnmapMemObject(queue, memObject, mappedRegion, 0, + NULL, NULL); + test_error(error, "Unable to unmap buffer"); + } + + // Final validation: read actual values of buffer and compare against + // our reference + size_t finalOrigin[3] = { 0, 0, 0 }, + finalRegion[3] = { imageSize, imageSize, 1 }; + error = clEnqueueReadImage(queue, memObject, CL_TRUE, finalOrigin, + finalRegion, 0, 0, finalData, 0, NULL, NULL); + test_error(error, "Unable to read results"); + + for (size_t q = 0; q < imageSize * imageSize * 4; q++) { - log_error("ERROR: Sample %d (coord %d,%d) did not validate! Got " - "%d, expected %d\n", - (int)q, (int)((q / 4) % imageSize), - (int)((q / 4) / imageSize), (int)finalData[q], - (int)referenceData[q]); - return -1; + if (referenceData[q] != finalData[q]) + { + log_error( + "ERROR: Sample %d (coord %d,%d) did not validate! Got " + "%d, expected %d\n", + (int)q, (int)((q / 4) % imageSize), + (int)((q / 4) / imageSize), (int)finalData[q], + (int)referenceData[q]); + return -1; + } } - } - } // cl_mem_flags + } // cl_mem_flags return 0; } - diff --git a/test_conformance/basic/test_image_r8.cpp b/test_conformance/basic/test_image_r8.cpp index b633d6abb..2dca1611e 100644 --- a/test_conformance/basic/test_image_r8.cpp +++ b/test_conformance/basic/test_image_r8.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -21,163 +21,111 @@ #include #include +#include +#include #include "procs.h" -static const char *r_uint8_kernel_code = -"__kernel void test_r_uint8(read_only image2d_t srcimg, __global unsigned char *dst, sampler_t sampler)\n" -"{\n" -" int tid_x = get_global_id(0);\n" -" int tid_y = get_global_id(1);\n" -" int indx = tid_y * get_image_width(srcimg) + tid_x;\n" -" uint4 color;\n" -"\n" -" color = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n" -" dst[indx] = (unsigned char)(color.x);\n" -"\n" -"}\n"; - - -static unsigned char * -generate_8bit_image(int w, int h, MTdata d) +namespace { +const char *r_uint8_kernel_code = R"( +__kernel void test_r_uint8(read_only image2d_t srcimg, __global unsigned char *dst, sampler_t sampler) { - unsigned char *ptr = (unsigned char*)malloc(w * h * sizeof(unsigned char)); - int i; + int tid_x = get_global_id(0); + int tid_y = get_global_id(1); + int indx = tid_y * get_image_width(srcimg) + tid_x; + uint4 color; - for (i=0; i &v) { - int i; + RandomSeed seed(gRandomSeed); - for (i=0; i(genrand_int32(seed)); + }; - log_info("READ_IMAGE_R_UNSIGNED_INT8 test passed\n"); - return 0; + std::generate(v.begin(), v.end(), random_generator); } -int -test_image_r8(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +} +int test_image_r8(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) { - cl_mem streams[2]; - cl_image_format img_format; - cl_uchar *input_ptr, *output_ptr; - cl_program program; - cl_kernel kernel; - size_t threads[3]; - int img_width = 512; - int img_height = 512; - int err; - MTdata d; - - PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) - - img_format.image_channel_order = CL_R; - img_format.image_channel_data_type = CL_UNSIGNED_INT8; + clMemWrapper streams[2]; + clProgramWrapper program; + clKernelWrapper kernel; + const size_t img_width = 512; + const size_t img_height = 512; + const size_t length = img_width * img_height; + int err; + + PASSIVE_REQUIRE_IMAGE_SUPPORT(device) + + const cl_image_format img_format = { CL_R, CL_UNSIGNED_INT8 }; // early out if this image type is not supported if (!is_image_format_supported(context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &img_format)) { log_info("WARNING: Image type not supported; skipping test.\n"); - return 0; + return TEST_SKIPPED_ITSELF; } - d = init_genrand( gRandomSeed ); - input_ptr = generate_8bit_image(img_width, img_height, d); - free_mtdata(d); d = NULL; + std::vector input(length); + std::vector output(length); + + generate_random_inputs(input); - output_ptr = (cl_uchar*)malloc(sizeof(cl_uchar) * img_width * img_height); streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &img_format, - img_width, img_height, 0, NULL, NULL); - if (!streams[0]) - { - log_error("create_image_2d failed\n"); - return -1; - } + img_width, img_height, 0, nullptr, &err); + test_error(err, "create_image_2d failed."); streams[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uchar) * img_width * img_height, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } + clCreateBuffer(context, CL_MEM_READ_WRITE, length, nullptr, &err); + test_error(err, "clCreateBuffer failed."); - size_t origin[3] = {0,0,0}, region[3]={img_width, img_height, 1}; - err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, - origin, region, 0, 0, - input_ptr, - 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clWriteImage failed: %d\n", err); - return -1; - } + const size_t origin[3] = { 0, 0, 0 }, + region[3] = { img_width, img_height, 1 }; + err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, + input.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteImage failed."); - err = create_single_kernel_helper(context, &program, &kernel, 1, &r_uint8_kernel_code, "test_r_uint8" ); - if (err) { - log_error("Failed to create kernel and program: %d\n", err); - return -1; - } + err = create_single_kernel_helper(context, &program, &kernel, 1, + &r_uint8_kernel_code, "test_r_uint8"); + test_error(err, "create_single_kernel_helper failed."); - cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err); - test_error(err, "clCreateSampler failed"); + clSamplerWrapper sampler = clCreateSampler( + context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err); + test_error(err, "clCreateSampler failed"); + + err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); + err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); + err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler); + test_error(err, "clSetKernelArgs failed\n"); + + size_t threads[] = { img_width, img_height }; + err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, threads, nullptr, 0, + nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed\n"); - err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); - err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); - err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed: %d\n", err); - return -1; - } - threads[0] = (size_t)img_width; - threads[1] = (size_t)img_height; - err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL ); - if (err != CL_SUCCESS) + err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, + output.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed\n"); + + if (0 != memcmp(input.data(), output.data(), length)) { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; + log_error("READ_IMAGE_R_UNSIGNED_INT8 test failed\n"); + err = -1; } - - err = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_uchar)*img_width*img_height, (void *)output_ptr, 0, NULL, NULL ); - if (err != CL_SUCCESS) + else { - log_error("clEnqueueReadBuffer failed\n"); - return -1; + log_info("READ_IMAGE_R_UNSIGNED_INT8 test passed\n"); } - err = verify_8bit_image(input_ptr, output_ptr, img_width, img_height); - - - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - clReleaseSampler(sampler); - free(input_ptr); - free(output_ptr); - return err; } - - - - - diff --git a/test_conformance/basic/test_int2fp.cpp b/test_conformance/basic/test_int2fp.cpp index 8b1203a71..dd5cc9a18 100644 --- a/test_conformance/basic/test_int2fp.cpp +++ b/test_conformance/basic/test_int2fp.cpp @@ -25,6 +25,7 @@ #include #include +#include #include #include diff --git a/test_conformance/basic/test_loop.cpp b/test_conformance/basic/test_loop.cpp index 1a91d9e4d..1c9acd1ad 100644 --- a/test_conformance/basic/test_loop.cpp +++ b/test_conformance/basic/test_loop.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -21,45 +21,45 @@ #include #include +#include #include "procs.h" -const char *loop_kernel_code = -"__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -" int n = get_global_size(0);\n" -" int i, j;\n" -"\n" -" dst[tid] = 0;\n" -" for (i=0,j=loopindx[tid]; i= n)\n" -" j = 0;\n" -" dst[tid] += src[j];\n" -" }\n" -"\n" -"}\n"; - - -int -verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n) +namespace { +const char *loop_kernel_code = R"( +__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst) { - int r, i, j, k; + int tid = get_global_id(0); + int n = get_global_size(0); + int i, j; - for (i=0; i= n) + j = 0; + dst[tid] += src[j]; + } +} +)"; + + +int verify_loop(std::vector inptr, std::vector loopindx, + std::vector loopcnt, std::vector outptr, int n) +{ + for (int i = 0; i < n; i++) + { + int r = 0; + for (int j = 0, k = loopindx[i]; j < loopcnt[i]; j++, k++) { - if (k >= n) - k = 0; + if (k >= n) k = 0; r += inptr[k]; } if (r != outptr[i]) { - log_error("LOOP test failed: %d found, expected %d\n", outptr[i], r); + log_error("LOOP test failed: %d found, expected %d\n", outptr[i], + r); return -1; } } @@ -67,119 +67,69 @@ verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n) log_info("LOOP test passed\n"); return 0; } - -int test_loop(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +} +int test_loop(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) { - cl_mem streams[4]; - cl_int *input_ptr, *loop_indx, *loop_cnt, *output_ptr; - cl_program program; - cl_kernel kernel; - size_t threads[1]; - int err, i; + clMemWrapper streams[4]; + clProgramWrapper program; + clKernelWrapper kernel; + int err; size_t length = sizeof(cl_int) * num_elements; - input_ptr = (cl_int*)malloc(length); - loop_indx = (cl_int*)malloc(length); - loop_cnt = (cl_int*)malloc(length); - output_ptr = (cl_int*)malloc(length); + std::vector input(length); + std::vector loop_indx(length); + std::vector loop_cnt(length); + std::vector output(length); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[1]) + for (auto &stream : streams) { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[3]) - { - log_error("clCreateBuffer failed\n"); - return -1; + stream = + clCreateBuffer(context, CL_MEM_READ_WRITE, length, nullptr, &err); + test_error(err, "clCreateBuffer failed."); } - MTdata d = init_genrand( gRandomSeed ); - for (i=0; i(genrand_int32(seed)); + loop_indx[i] = + static_cast(get_random_float(0, num_elements - 1, seed)); + loop_cnt[i] = + static_cast(get_random_float(0, num_elements / 32, seed)); + }; + + err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, + input.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed."); + err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, + loop_indx.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed."); + err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, + loop_cnt.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed."); + + err = create_single_kernel_helper(context, &program, &kernel, 1, + &loop_kernel_code, "test_loop"); + test_error(err, "create_single_kernel_helper failed."); + + for (int i = 0; i < ARRAY_SIZE(streams); i++) { - log_error("clSetKernelArgs failed\n"); - return -1; + err = clSetKernelArg(kernel, i, sizeof streams[i], &streams[i]); + test_error(err, "clSetKernelArgs failed\n"); } - threads[0] = (unsigned int)num_elements; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; - } - - err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clReadArray failed\n"); - return -1; - } - - err = verify_loop(input_ptr, loop_indx, loop_cnt, output_ptr, num_elements); - - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseMemObject(streams[3]); - clReleaseKernel(kernel); - clReleaseProgram(program); - free(input_ptr); - free(loop_indx); - free(loop_cnt); - free(output_ptr); + size_t threads[] = { (size_t)num_elements }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads, nullptr, 0, + nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed\n"); - return err; -} + err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, + output.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed\n"); + + err = verify_loop(input, loop_indx, loop_cnt, output, num_elements); + return err; +} diff --git a/test_conformance/basic/test_vloadstore.cpp b/test_conformance/basic/test_vloadstore.cpp index e137f9e73..d34ecbf90 100644 --- a/test_conformance/basic/test_vloadstore.cpp +++ b/test_conformance/basic/test_vloadstore.cpp @@ -13,52 +13,129 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" - +#include #include #include #include #include #include #include +#include +#include #include "procs.h" #include "harness/conversions.h" -#include "harness/typeWrappers.h" #include "harness/errorHelpers.h" +#include "harness/stringHelpers.h" +#include "harness/typeWrappers.h" // Outputs debug information for stores #define DEBUG 0 // Forces stores/loads to be done with offsets = tid #define LINEAR_OFFSETS 0 #define NUM_LOADS 512 - -static const char *doubleExtensionPragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; +#define HFF(num) cl_half_from_float(num, halfRoundingMode) +#define HTF(num) cl_half_to_float(num) + +char pragma_str[128] = { 0 }; +char mem_type[64] = { 0 }; +char store_str[128] = { 0 }; +char load_str[128] = { 0 }; + +extern cl_half_rounding_mode halfRoundingMode; + +// clang-format off +static const char *store_pattern= "results[ tid ] = tmp;\n"; +static const char *store_patternV3 = "results[3*tid] = tmp.s0; results[3*tid+1] = tmp.s1; results[3*tid+2] = tmp.s2;\n"; +static const char *load_pattern = "sSharedStorage[ i ] = src[ i ];\n"; +static const char *load_patternV3 = "sSharedStorage[3*i] = src[ 3*i]; sSharedStorage[3*i+1] = src[3*i+1]; sSharedStorage[3*i+2] = src[3*i+2];\n"; +static const char *kernel_pattern[] = { +pragma_str, +"#define STYPE %s\n" +"__kernel void test_fn( ", mem_type, " STYPE *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" +"{\n" +" int tid = get_global_id( 0 );\n" +" %s%d tmp = vload%d( offsets[ tid ], ( (", mem_type, " STYPE *) src ) + alignmentOffsets[ tid ] );\n" +" ", store_str, +"}\n" +}; + +const char *pattern_local [] = { +pragma_str, +"__kernel void test_fn(__local %s *sSharedStorage, __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" +"{\n" +" int tid = get_global_id( 0 );\n" +" int lid = get_local_id( 0 );\n" +"\n" +" if( lid == 0 )\n" +" {\n" +" for( int i = 0; i < %d; i++ ) {\n" +" ", load_str, +" }\n" +" }\n" +// Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all +// threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be +// updated on all threads at that point +" barrier( CLK_LOCAL_MEM_FENCE );\n" +"\n" +" %s%d tmp = vload%d( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n" +" ", store_str, +"}\n" }; + +const char *pattern_priv [] = { +pragma_str, +// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means +// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test +"#define PRIV_TYPE %s\n" +"#define PRIV_SIZE %d\n" +"__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" +"{\n" +" __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n" +" int tid = get_global_id( 0 );\n" +"\n" +" for( int i = 0; i < PRIV_SIZE; i++ )\n" +" sPrivateStorage[ i ] = src[ i ];\n" +// Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for +// anybody else to sync up +"\n" +" %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n" +" ", store_str, +"}\n"}; +// clang-format on #pragma mark -------------------- vload harness -------------------------- -typedef void (*create_vload_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ); +typedef void (*create_program_fn)(std::string &, size_t, ExplicitType, size_t, + size_t); +typedef int (*test_fn)(cl_device_id, cl_context, cl_command_queue, ExplicitType, + unsigned int, create_program_fn, size_t); -int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize, - create_vload_program_fn createFn, size_t bufferSize, MTdata d ) +int test_vload(cl_device_id device, cl_context context, cl_command_queue queue, + ExplicitType type, unsigned int vecSize, + create_program_fn createFn, size_t bufferSize) { - int error; - clProgramWrapper program; clKernelWrapper kernel; clMemWrapper streams[ 4 ]; + MTdataHolder d(gRandomSeed); const size_t numLoads = (DEBUG) ? 16 : NUM_LOADS; if (DEBUG) bufferSize = (bufferSize < 128) ? bufferSize : 128; size_t threads[ 1 ], localThreads[ 1 ]; clProtectedArray inBuffer( bufferSize ); - char programSrc[ 10240 ]; cl_uint offsets[ numLoads ], alignmentOffsets[ numLoads ]; size_t numElements, typeSize, i; unsigned int outVectorSize; + pragma_str[0] = '\0'; + if (type == kDouble) + std::snprintf(pragma_str, sizeof(pragma_str), + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"); + else if (type == kHalf) + std::snprintf(pragma_str, sizeof(pragma_str), + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"); typeSize = get_explicit_type_size( type ); numElements = bufferSize / ( typeSize * vecSize ); @@ -83,25 +160,19 @@ int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, outVectorSize = vecSize; // Declare output buffers now -#if !(defined(_WIN32) && defined(_MSC_VER)) - char outBuffer[ numLoads * typeSize * outVectorSize ]; - char referenceBuffer[ numLoads * typeSize * vecSize ]; -#else - char* outBuffer = (char*)_malloca(numLoads * typeSize * outVectorSize * sizeof(cl_char)); - char* referenceBuffer = (char*)_malloca(numLoads * typeSize * vecSize * sizeof(cl_char)); -#endif + std::vector outBuffer(numLoads * typeSize * outVectorSize); + std::vector referenceBuffer(numLoads * typeSize * vecSize); // Create the program - - + std::string programSrc; createFn( programSrc, numElements, type, vecSize, outVectorSize); // Create our kernel - const char *ptr = programSrc; - - error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" ); + const char *ptr = programSrc.c_str(); + cl_int error = create_single_kernel_helper(context, &program, &kernel, 1, + &ptr, "test_fn"); test_error( error, "Unable to create testing kernel" ); - if (DEBUG) log_info("Kernel: \n%s\n", programSrc); + if (DEBUG) log_info("Kernel: \n%s\n", programSrc.c_str()); // Get the number of args to differentiate the kernels with local storage. (They have 5) cl_uint numArgs; @@ -115,7 +186,9 @@ int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, test_error( error, "Unable to create kernel stream" ); streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(alignmentOffsets[0]), alignmentOffsets, &error ); test_error( error, "Unable to create kernel stream" ); - streams[ 3 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*typeSize*outVectorSize, (void *)outBuffer, &error ); + streams[3] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + numLoads * typeSize * outVectorSize, + (void *)outBuffer.data(), &error); test_error( error, "Unable to create kernel stream" ); // Set parameters and run @@ -145,28 +218,32 @@ int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, test_error( error, "Unable to exec kernel" ); // Get the results - error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, numLoads * typeSize * outVectorSize * sizeof(cl_char), (void *)outBuffer, 0, NULL, NULL ); + error = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, + numLoads * typeSize * outVectorSize + * sizeof(cl_char), + (void *)outBuffer.data(), 0, NULL, NULL); test_error( error, "Unable to read results" ); - // Create the reference results - memset( referenceBuffer, 0, numLoads * typeSize * vecSize * sizeof(cl_char)); + referenceBuffer.assign(numLoads * typeSize * vecSize, 0); for( i = 0; i < numLoads; i++ ) { - memcpy( referenceBuffer + i * typeSize * vecSize, ( (char *)(void *)inBuffer ) + ( ( offsets[ i ] * vecSize ) + alignmentOffsets[ i ] ) * typeSize, - typeSize * vecSize ); + memcpy(&referenceBuffer[i * typeSize * vecSize], + ((char *)(void *)inBuffer) + + ((offsets[i] * vecSize) + alignmentOffsets[i]) * typeSize, + typeSize * vecSize); } // Validate the results now - char *expected = referenceBuffer; - char *actual = outBuffer; + char *expected = referenceBuffer.data(); + char *actual = outBuffer.data(); char *in = (char *)(void *)inBuffer; if (DEBUG) { log_info("Memory contents:\n"); + char inString[1024]; + char expectedString[1024], actualString[1024]; for (i=0; i +int test_vset(cl_device_id device, cl_context context, cl_command_queue queue, + create_program_fn createFn, size_t bufferSize) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; + std::vector vecType = { kChar, kUChar, kShort, kUShort, + kInt, kUInt, kLong, kULong, + kFloat, kHalf, kDouble }; unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 }; const char *size_names[] = { "2", "3", "4", "8", "16"}; - unsigned int typeIdx, sizeIdx; int error = 0; - MTdata mtData = init_genrand( gRandomSeed ); log_info("Testing with buffer size of %d.\n", (int)bufferSize); - for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ ) - { + bool hasDouble = is_extension_available(device, "cl_khr_fp64"); + bool hasHalf = is_extension_available(device, "cl_khr_fp16"); - if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) + for (unsigned typeIdx = 0; typeIdx < vecType.size(); typeIdx++) + { + if (vecType[typeIdx] == kDouble && !hasDouble) continue; - - if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong ) + else if (vecType[typeIdx] == kHalf && !hasHalf) + continue; + else if ((vecType[typeIdx] == kLong || vecType[typeIdx] == kULong) + && !gHasLong) continue; - for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ ) + for (unsigned sizeIdx = 0; vecSizes[sizeIdx] != 0; sizeIdx++) { log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]); - int error_this_type = test_vload( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, mtData ); + int error_this_type = + test_func_ptr(device, context, queue, vecType[typeIdx], + vecSizes[sizeIdx], createFn, bufferSize); if (error_this_type) { error += error_this_type; log_error("Failure; skipping further sizes for this type."); @@ -233,125 +317,59 @@ int test_vloadset(cl_device_id device, cl_context context, cl_command_queue queu } } } - - free_mtdata(mtData); - return error; } #pragma mark -------------------- vload test cases -------------------------- -void create_global_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ) +void create_global_load_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t outVectorSize) { - const char *pattern = - "%s%s" - "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " %s%d tmp = vload%d( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n" - " results[ tid ] = tmp;\n" - "}\n"; - - const char *patternV3 = - "%s%s" - "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " %s3 tmp = vload3( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n" - " results[ 3*tid ] = tmp.s0;\n" - " results[ 3*tid+1 ] = tmp.s1;\n" - " results[ 3*tid+2 ] = tmp.s2;\n" - "}\n"; - + std::snprintf(mem_type, sizeof(mem_type), "__global"); + std::snprintf(store_str, sizeof(store_str), store_patternV3); const char *typeName = get_explicit_type_name(type); - if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, typeName, typeName, typeName ); - } else { - sprintf( destBuffer, pattern, type == kDouble ? doubleExtensionPragma : "", - "", - typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize, - (int)inVectorSize, typeName ); + std::string outTypeName = typeName; + if (inVectorSize != 3) + { + outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize); + std::snprintf(store_str, sizeof(store_str), store_pattern); } + + std::string kernel_src = concat_kernel( + kernel_pattern, sizeof(kernel_pattern) / sizeof(kernel_pattern[0])); + destBuffer = str_sprintf(kernel_src, typeName, outTypeName.c_str(), + typeName, (int)inVectorSize, (int)inVectorSize); } int test_vload_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) { - return test_vloadset( device, context, queue, create_global_load_code, 10240 ); + return test_vset(device, context, queue, + create_global_load_code, 10240); } - -void create_local_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ) +void create_local_load_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t outVectorSize) { - const char *pattern = - "%s%s" - //" __local %s%d sSharedStorage[ %d ];\n" - "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " int lid = get_local_id( 0 );\n" - "\n" - " if( lid == 0 )\n" - " {\n" - " for( int i = 0; i < %d; i++ )\n" - " sSharedStorage[ i ] = src[ i ];\n" - " }\n" - // Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all - // threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be - // updated on all threads at that point - " barrier( CLK_LOCAL_MEM_FENCE );\n" - "\n" - " %s%d tmp = vload%d( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n" - " results[ tid ] = tmp;\n" - "}\n"; - - const char *patternV3 = - "%s%s" - //" __local %s%d sSharedStorage[ %d ];\n" - "__kernel void test_fn(__local %s *sSharedStorage, __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " int lid = get_local_id( 0 );\n" - "\n" - " if( lid == 0 )\n" - " {\n" - " for( int i = 0; i < %d; i++ ) {\n" - " sSharedStorage[ 3*i ] = src[ 3*i ];\n" - " sSharedStorage[ 3*i +1] = src[ 3*i +1];\n" - " sSharedStorage[ 3*i +2] = src[ 3*i +2];\n" - " }\n" - " }\n" - // Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all - // threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be - // updated on all threads at that point - " barrier( CLK_LOCAL_MEM_FENCE );\n" - "\n" - " %s3 tmp = vload3( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n" - " results[ 3*tid ] = tmp.s0;\n" - " results[ 3*tid +1] = tmp.s1;\n" - " results[ 3*tid +2] = tmp.s2;\n" - "}\n"; - + std::snprintf(store_str, sizeof(store_str), store_patternV3); + std::snprintf(load_str, sizeof(load_str), load_patternV3); const char *typeName = get_explicit_type_name(type); - if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, /*(int)inBufferSize,*/ - typeName, typeName, - (int)inBufferSize, - typeName, typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, (int)inVectorSize, /*(int)inBufferSize,*/ - typeName, (int)inVectorSize, typeName, (int)outVectorSize, - (int)inBufferSize, - typeName, (int)inVectorSize, (int)inVectorSize, typeName ); + std::string outTypeName = typeName; + std::string inTypeName = typeName; + if (inVectorSize != 3) + { + outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize); + inTypeName = str_sprintf("%s%d", typeName, (int)inVectorSize); + std::snprintf(store_str, sizeof(store_str), store_pattern); + std::snprintf(load_str, sizeof(load_str), load_pattern); } + + std::string kernel_src = concat_kernel( + pattern_local, sizeof(pattern_local) / sizeof(pattern_local[0])); + destBuffer = str_sprintf(kernel_src, inTypeName.c_str(), inTypeName.c_str(), + outTypeName.c_str(), (int)inBufferSize, typeName, + (int)inVectorSize, (int)inVectorSize, typeName); } int test_vload_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) @@ -360,53 +378,34 @@ int test_vload_local(cl_device_id device, cl_context context, cl_command_queue q cl_ulong localSize; int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL ); test_error( error, "Unable to get max size of local memory buffer" ); - if( localSize > 10240 ) - localSize = 10240; + if (localSize > 10240) localSize = 10240; if (localSize > 4096) localSize -= 2048; else localSize /= 2; - return test_vloadset( device, context, queue, create_local_load_code, (size_t)localSize ); + return test_vset(device, context, queue, create_local_load_code, + (size_t)localSize); } - -void create_constant_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ) +void create_constant_load_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t outVectorSize) { - const char *pattern = - "%s%s" - "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " %s%d tmp = vload%d( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n" - " results[ tid ] = tmp;\n" - "}\n"; - - const char *patternV3 = - "%s%s" - "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " %s3 tmp = vload3( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n" - " results[ 3*tid ] = tmp.s0;\n" - " results[ 3*tid+1 ] = tmp.s1;\n" - " results[ 3*tid+2 ] = tmp.s2;\n" - "}\n"; - + std::snprintf(mem_type, sizeof(mem_type), "__constant"); + std::snprintf(store_str, sizeof(store_str), store_patternV3); const char *typeName = get_explicit_type_name(type); - if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, typeName, typeName, - typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize, - (int)inVectorSize, typeName ); + std::string outTypeName = typeName; + if (inVectorSize != 3) + { + outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize); + std::snprintf(store_str, sizeof(store_str), store_pattern); } + + std::string kernel_src = concat_kernel( + kernel_pattern, sizeof(kernel_pattern) / sizeof(kernel_pattern[0])); + destBuffer = str_sprintf(kernel_src, typeName, outTypeName.c_str(), + typeName, (int)inVectorSize, (int)inVectorSize); } int test_vload_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) @@ -415,109 +414,71 @@ int test_vload_constant(cl_device_id device, cl_context context, cl_command_queu cl_ulong maxSize; int error = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, NULL ); test_error( error, "Unable to get max size of constant memory buffer" ); - if( maxSize > 10240 ) - maxSize = 10240; + if (maxSize > 10240) maxSize = 10240; if (maxSize > 4096) maxSize -= 2048; else maxSize /= 2; - return test_vloadset( device, context, queue, create_constant_load_code, (size_t)maxSize ); + return test_vset(device, context, queue, + create_constant_load_code, (size_t)maxSize); } - -void create_private_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ) +void create_private_load_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t outVectorSize) { - const char *pattern = - "%s%s" - // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means - // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test - "#define PRIV_TYPE %s%d\n" - "#define PRIV_SIZE %d\n" - "__kernel void test_fn( __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n" - "{\n" - " __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n" - " int tid = get_global_id( 0 );\n" - "\n" - " for( int i = 0; i < %d; i++ )\n" - " sPrivateStorage[ i ] = src[ i ];\n" - // Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for - // anybody else to sync up - "\n" - " %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n" - " results[ tid ] = tmp;\n" - "}\n"; - - const char *patternV3 = - "%s%s" - // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means - // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test - "#define PRIV_TYPE %s\n" - "#define PRIV_SIZE %d\n" - "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" - "{\n" - " __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n" - " int tid = get_global_id( 0 );\n" - "\n" - " for( int i = 0; i < PRIV_SIZE; i++ )\n" - " {\n" - " sPrivateStorage[ i ] = src[ i ];\n" - " }\n" - // Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for - // anybody else to sync up - "\n" - " %s3 tmp = vload3( offsets[ tid ], ( sPrivateStorage ) + alignmentOffsets[ tid ] );\n" - " results[ 3*tid ] = tmp.s0;\n" - " results[ 3*tid+1 ] = tmp.s1;\n" - " results[ 3*tid+2 ] = tmp.s2;\n" - "}\n"; - + std::snprintf(store_str, sizeof(store_str), store_patternV3); const char *typeName = get_explicit_type_name(type); - if(inVectorSize ==3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, 3*((int)inBufferSize), - typeName, typeName, - typeName ); - // log_info("Src is \"\n%s\n\"\n", destBuffer); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, (int)inVectorSize, (int)inBufferSize, - typeName, (int)inVectorSize, typeName, (int)outVectorSize, - (int)inBufferSize, - typeName, (int)inVectorSize, (int)inVectorSize, typeName ); + std::string outTypeName = typeName; + std::string inTypeName = typeName; + int bufSize = (int)inBufferSize * 3; + if (inVectorSize != 3) + { + outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize); + inTypeName = str_sprintf("%s%d", typeName, (int)inVectorSize); + bufSize = (int)inBufferSize; + std::snprintf(store_str, sizeof(store_str), store_pattern); } + + std::string kernel_src = concat_kernel( + pattern_priv, sizeof(pattern_priv) / sizeof(pattern_priv[0])); + destBuffer = str_sprintf(kernel_src, inTypeName.c_str(), bufSize, + inTypeName.c_str(), outTypeName.c_str(), typeName, + (int)inVectorSize, (int)inVectorSize, typeName); } int test_vload_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) { // We have no idea how much actual private storage is available, so just pick a reasonable value, // which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes - return test_vloadset( device, context, queue, create_private_load_code, 256 ); + return test_vset(device, context, queue, + create_private_load_code, 256); } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #pragma mark -------------------- vstore harness -------------------------- -typedef void (*create_vstore_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize ); - -int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize, - create_vstore_program_fn createFn, size_t bufferSize, MTdata d ) +int test_vstore(cl_device_id device, cl_context context, cl_command_queue queue, + ExplicitType type, unsigned int vecSize, + create_program_fn createFn, size_t bufferSize) { - int error; - clProgramWrapper program; clKernelWrapper kernel; clMemWrapper streams[ 3 ]; + MTdataHolder d(gRandomSeed); size_t threads[ 1 ], localThreads[ 1 ]; - size_t numElements, typeSize, numStores = (DEBUG) ? 16 : NUM_LOADS; + pragma_str[0] = '\0'; + if (type == kDouble) + std::snprintf(pragma_str, sizeof(pragma_str), + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"); + else if (type == kHalf) + std::snprintf(pragma_str, sizeof(pragma_str), + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"); + if (DEBUG) bufferSize = (bufferSize < 128) ? bufferSize : 128; @@ -534,39 +495,22 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue } if (DEBUG) log_info("Testing: numStores: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numStores, (int)typeSize, vecSize, (int)numElements, (int)bufferSize); -#if !(defined(_WIN32) && defined(_MSC_VER)) - cl_uint offsets[ numStores ]; -#else - cl_uint* offsets = (cl_uint*)_malloca(numStores * sizeof(cl_uint)); -#endif - char programSrc[ 10240 ]; - size_t i; - -#if !(defined(_WIN32) && defined(_MSC_VER)) - char inBuffer[ numStores * typeSize * vecSize ]; -#else - char* inBuffer = (char*)_malloca( numStores * typeSize * vecSize * sizeof(cl_char)); -#endif + + std::vector offsets(numStores); + std::vector inBuffer(numStores * typeSize * vecSize); + clProtectedArray outBuffer( numElements * typeSize * vecSize ); -#if !(defined(_WIN32) && defined(_MSC_VER)) - char referenceBuffer[ numElements * typeSize * vecSize ]; -#else - char* referenceBuffer = (char*)_malloca(numElements * typeSize * vecSize * sizeof(cl_char)); -#endif + std::vector referenceBuffer(numElements * typeSize * vecSize); // Create some random input data and random offsets to load from - generate_random_data( type, numStores * vecSize, d, (void *)inBuffer ); + generate_random_data(type, numStores * vecSize, d, (void *)inBuffer.data()); // Note: make sure no two offsets are the same, otherwise the output would depend on // the order that threads ran in, and that would be next to impossible to verify -#if !(defined(_WIN32) && defined(_MSC_VER)) - char flags[ numElements ]; -#else - char* flags = (char*)_malloca( numElements * sizeof(char)); -#endif - - memset( flags, 0, numElements * sizeof(char) ); - for( i = 0; i < numStores; i++ ) + std::vector flags(numElements); + flags.assign(flags.size(), 0); + + for (size_t i = 0; i < numStores; i++) { do { @@ -579,13 +523,15 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue if (LINEAR_OFFSETS) log_info("Offsets set to thread IDs to simplify output.\n"); - createFn( programSrc, numElements, type, vecSize ); + std::string programSrc; + createFn(programSrc, numElements, type, vecSize, vecSize); // Create our kernel - const char *ptr = programSrc; - error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" ); + const char *ptr = programSrc.c_str(); + cl_int error = create_single_kernel_helper(context, &program, &kernel, 1, + &ptr, "test_fn"); test_error( error, "Unable to create testing kernel" ); - if (DEBUG) log_info("Kernel: \n%s\n", programSrc); + if (DEBUG) log_info("Kernel: \n%s\n", programSrc.c_str()); // Get the number of args to differentiate the kernels with local storage. (They have 5) cl_uint numArgs; @@ -593,9 +539,14 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue test_error( error, "clGetKernelInfo failed"); // Set up parameters - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * typeSize * vecSize * sizeof(cl_char), (void *)inBuffer, &error ); + streams[0] = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + numStores * typeSize * vecSize * sizeof(cl_char), + (void *)inBuffer.data(), &error); test_error( error, "Unable to create kernel stream" ); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * sizeof(cl_uint), offsets, &error ); + streams[1] = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + numStores * sizeof(cl_uint), offsets.data(), &error); test_error( error, "Unable to create kernel stream" ); streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numElements * typeSize * vecSize, (void *)outBuffer, &error ); test_error( error, "Unable to create kernel stream" ); @@ -606,7 +557,7 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue // We need to set the size of the local storage error = clSetKernelArg(kernel, 0, bufferSize, NULL); test_error( error, "clSetKernelArg for buffer failed"); - for( i = 0; i < 3; i++ ) + for (size_t i = 0; i < 3; i++) { error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] ); test_error( error, "Unable to set kernel argument" ); @@ -615,11 +566,10 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue else { // No local storage - for( i = 0; i < 3; i++ ) + for (size_t i = 0; i < 3; i++) { error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] ); - if (error) - log_info("%s\n", programSrc); + if (error) log_info("%s\n", programSrc.c_str()); test_error( error, "Unable to set kernel argument" ); } } @@ -654,25 +604,26 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue error = clEnqueueReadBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL ); test_error( error, "Unable to read results" ); - // Create the reference results - memset( referenceBuffer, 0, numElements * typeSize * vecSize * sizeof(cl_char) ); - for( i = 0; i < numStores; i++ ) + referenceBuffer.assign(referenceBuffer.size(), 0); + for (size_t i = 0; i < numStores; i++) { - memcpy( referenceBuffer + ( ( offsets[ i ] * vecSize ) + addressOffset ) * typeSize, inBuffer + i * typeSize * vecSize, typeSize * vecSize ); + memcpy(&referenceBuffer[((offsets[i] * vecSize) + addressOffset) + * typeSize], + &inBuffer[i * typeSize * vecSize], typeSize * vecSize); } // Validate the results now - char *expected = referenceBuffer; + char *expected = referenceBuffer.data(); char *actual = (char *)(void *)outBuffer; if (DEBUG) { log_info("Memory contents:\n"); - for (i=0; i(device, context, queue, + create_global_store_code, 10240); } - -void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize ) +void create_local_store_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t /*unused*/) { - const char *pattern = - "%s" - "\n" - "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n" + // clang-format off + const char *pattern[] = { + pragma_str, + "#define LOC_TYPE %s\n" + "#define LOC_VTYPE %s%d\n" + "__kernel void test_fn(__local LOC_VTYPE *sSharedStorage, __global LOC_VTYPE *srcValues, __global uint *offsets, __global LOC_VTYPE *destBuffer, uint alignmentOffset )\n" "{\n" " int tid = get_global_id( 0 );\n" // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sSharedStorage[ offsets[tid] ] = (%s%d)(%s)0;\n" + " sSharedStorage[ offsets[tid] ] = (LOC_VTYPE)(LOC_TYPE)0;\n" " sSharedStorage[ offsets[tid] +1 ] = sSharedStorage[ offsets[tid] ];\n" " barrier( CLK_LOCAL_MEM_FENCE );\n" "\n" - " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local %s *)sSharedStorage ) + alignmentOffset );\n" + " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local LOC_TYPE *)sSharedStorage ) + alignmentOffset );\n" "\n" // Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output // buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy @@ -830,20 +748,20 @@ void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitTyp // Note: we only copy the relevant portion of our local storage over to the dest buffer, because // otherwise, local threads would be overwriting results from other local threads " int i;\n" - " __local %s *sp = (__local %s*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n" - " __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n" + " __local LOC_TYPE *sp = (__local LOC_TYPE*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n" + " __global LOC_TYPE *dp = (__global LOC_TYPE*) (destBuffer + offsets[tid]) + alignmentOffset;\n" " for( i = 0; (size_t)i < sizeof( sSharedStorage[0]) / sizeof( *sp ); i++ ) \n" " dp[i] = sp[i];\n" - "}\n"; + "}\n" }; - const char *patternV3 = - "%s" - "\n" - "__kernel void test_fn(__local %s *sSharedStorage, __global %s *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n" + const char *patternV3 [] = { + pragma_str, + "#define LOC_TYPE %s\n" + "__kernel void test_fn(__local LOC_TYPE *sSharedStorage, __global LOC_TYPE *srcValues, __global uint *offsets, __global LOC_TYPE *destBuffer, uint alignmentOffset )\n" "{\n" " int tid = get_global_id( 0 );\n" // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sSharedStorage[ 3*offsets[tid] ] = (%s)0;\n" + " sSharedStorage[ 3*offsets[tid] ] = (LOC_TYPE)0;\n" " sSharedStorage[ 3*offsets[tid] +1 ] = \n" " sSharedStorage[ 3*offsets[tid] ];\n" " sSharedStorage[ 3*offsets[tid] +2 ] = \n" @@ -865,30 +783,26 @@ void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitTyp // Note: we only copy the relevant portion of our local storage over to the dest buffer, because // otherwise, local threads would be overwriting results from other local threads " int i;\n" - " __local %s *sp = (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n" - " __global %s *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n" + " __local LOC_TYPE *sp = (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n" + " __global LOC_TYPE *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n" " for( i = 0; i < 3; i++ ) \n" " dp[i] = sp[i];\n" - "}\n"; + "}\n" }; + // clang-format on const char *typeName = get_explicit_type_name(type); if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - typeName, - typeName, - typeName, typeName, - typeName, typeName, typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - typeName, (int)inVectorSize, - typeName, (int)inVectorSize, typeName, (int)inVectorSize, - typeName, (int)inVectorSize, typeName, - (int)inVectorSize, typeName, typeName, - typeName, typeName, typeName ); + std::string kernel_src = + concat_kernel(patternV3, sizeof(patternV3) / sizeof(patternV3[0])); + destBuffer = str_sprintf(kernel_src, typeName); + } + else + { + std::string kernel_src = + concat_kernel(pattern, sizeof(pattern) / sizeof(pattern[0])); + destBuffer = str_sprintf(kernel_src, typeName, typeName, + (int)inVectorSize, (int)inVectorSize); } - // log_info(destBuffer); } int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) @@ -897,81 +811,82 @@ int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue cl_ulong localSize; int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL ); test_error( error, "Unable to get max size of local memory buffer" ); - if( localSize > 10240 ) - localSize = 10240; + if (localSize > 10240) localSize = 10240; if (localSize > 4096) localSize -= 2048; else localSize /= 2; - return test_vstoreset( device, context, queue, create_local_store_code, (size_t)localSize ); + return test_vset(device, context, queue, + create_local_store_code, (size_t)localSize); } - -void create_private_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize ) +void create_private_store_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t /*unused*/) { - const char *pattern = - "%s" + // clang-format off + const char *pattern [] = { + pragma_str, + "#define PRIV_TYPE %s\n" + "#define PRIV_VTYPE %s%d\n" // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test "\n" - "__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n" + "__kernel void test_fn( __global PRIV_VTYPE *srcValues, __global uint *offsets, __global PRIV_VTYPE *destBuffer, uint alignmentOffset )\n" "{\n" - " __private %s%d sPrivateStorage[ %d ];\n" - " int tid = get_global_id( 0 );\n" + " __private PRIV_VTYPE sPrivateStorage[ %d ];\n" + " int tid = get_global_id( 0 );\n" // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sPrivateStorage[tid] = (%s%d)(%s)0;\n" + " sPrivateStorage[tid] = (PRIV_VTYPE)(PRIV_TYPE)0;\n" "\n" - " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n" + " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private PRIV_TYPE *)sPrivateStorage ) + alignmentOffset );\n" "\n" // Note: we only copy the relevant portion of our local storage over to the dest buffer, because // otherwise, local threads would be overwriting results from other local threads " uint i;\n" - " __private %s *sp = (__private %s*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n" - " __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n" + " __private PRIV_TYPE *sp = (__private PRIV_TYPE*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n" + " __global PRIV_TYPE *dp = (__global PRIV_TYPE*) (destBuffer + offsets[tid]) + alignmentOffset;\n" " for( i = 0; i < sizeof( sPrivateStorage[0]) / sizeof( *sp ); i++ ) \n" " dp[i] = sp[i];\n" - "}\n"; - + "}\n"}; - const char *patternV3 = - "%s" + const char *patternV3 [] = { + pragma_str, + "#define PRIV_TYPE %s\n" + "#define PRIV_VTYPE %s3\n" // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test "\n" - "__kernel void test_fn( __global %s *srcValues, __global uint *offsets, __global %s3 *destBuffer, uint alignmentOffset )\n" + "__kernel void test_fn( __global PRIV_TYPE *srcValues, __global uint *offsets, __global PRIV_VTYPE *destBuffer, uint alignmentOffset )\n" "{\n" - " __private %s3 sPrivateStorage[ %d ];\n" // keep this %d - " int tid = get_global_id( 0 );\n" + " __private PRIV_VTYPE sPrivateStorage[ %d ];\n" // keep this %d + " int tid = get_global_id( 0 );\n" // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sPrivateStorage[tid] = (%s3)(%s)0;\n" + " sPrivateStorage[tid] = (PRIV_VTYPE)(PRIV_TYPE)0;\n" "\n" - - " vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n" - "\n" - // Note: we only copy the relevant portion of our local storage over to the dest buffer, because - // otherwise, local threads would be overwriting results from other local threads + " vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private PRIV_TYPE *)sPrivateStorage ) + alignmentOffset );\n" " uint i;\n" - " __private %s *sp = ((__private %s*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n" - " __global %s *dp = ((__global %s*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n" + " __private PRIV_TYPE *sp = ((__private PRIV_TYPE*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n" + " __global PRIV_TYPE *dp = ((__global PRIV_TYPE*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n" " for( i = 0; i < 3; i++ ) \n" " dp[i] = sp[i];\n" - "}\n"; + "}\n"}; + // clang-format on const char *typeName = get_explicit_type_name(type); if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - typeName, typeName, - typeName, (int)inBufferSize, - typeName, typeName, - typeName, typeName, typeName, typeName, typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - typeName, (int)inVectorSize, typeName, (int)inVectorSize, - typeName, (int)inVectorSize, (int)inBufferSize, - typeName, (int)inVectorSize, typeName, - (int)inVectorSize, typeName, typeName, typeName, typeName, typeName ); + std::string kernel_src = + concat_kernel(patternV3, sizeof(patternV3) / sizeof(patternV3[0])); + destBuffer = + str_sprintf(kernel_src, typeName, typeName, (int)inBufferSize); + } + else + { + std::string kernel_src = + concat_kernel(pattern, sizeof(pattern) / sizeof(pattern[0])); + destBuffer = + str_sprintf(kernel_src, typeName, typeName, (int)inVectorSize, + (int)inBufferSize, (int)inVectorSize); } } @@ -979,7 +894,8 @@ int test_vstore_private(cl_device_id device, cl_context context, cl_command_queu { // We have no idea how much actual private storage is available, so just pick a reasonable value, // which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes - return test_vstoreset( device, context, queue, create_private_store_code, 256 ); + return test_vset(device, context, queue, + create_private_store_code, 256); } diff --git a/test_conformance/basic/test_wg_barrier.cpp b/test_conformance/basic/test_wg_barrier.cpp deleted file mode 100644 index a237d80b9..000000000 --- a/test_conformance/basic/test_wg_barrier.cpp +++ /dev/null @@ -1,159 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include -#include - - -#include "procs.h" - -const char *wg_barrier_kernel_code = -"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n" -"{\n" -" int tid = get_local_id(0);\n" -" int lsize = get_local_size(0);\n" -" int i;\n" -"\n" -" tmp_sum[tid] = 0;\n" -" for (i=tid; i1; i = hadd(i,1))\n" -" {\n" -" work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n" -" if (tid + i < lsize)\n" -" tmp_sum[tid] += tmp_sum[tid + i];\n" -" lsize = i; \n" -" }\n" -"\n" -" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n" -" if (tid == 0)\n" -" *sum = tmp_sum[0];\n" -"}\n"; - - -static int -verify_sum(int *inptr, int *tmpptr, int *outptr, int n) -{ - int i; - int reference = 0; - - for (i=0; i max_local_workgroup_size[0]) - max_threadgroup_size = max_local_workgroup_size[0]; - - // work group size must divide evenly into the global size - while( num_elements % max_threadgroup_size ) - max_threadgroup_size--; - - input_ptr = (int*)malloc(sizeof(int) * num_elements); - output_ptr = (int*)malloc(sizeof(int)); - - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, &err); - test_error(err, "clCreateBuffer failed."); - streams[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err); - test_error(err, "clCreateBuffer failed."); - streams[2] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * max_threadgroup_size, NULL, &err); - test_error(err, "clCreateBuffer failed."); - - d = init_genrand( gRandomSeed ); - for (i=0; i sema_props{ (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, @@ -803,6 +824,16 @@ clExternalSemaphore::clExternalSemaphore( sema_props.push_back((cl_semaphore_properties_khr)handle); #endif break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD: + err = check_external_semaphore_handle_type( + devList[0], CL_SEMAPHORE_HANDLE_SYNC_FD_KHR); + sema_props.push_back(static_cast( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR)); + sema_props.push_back(static_cast( + CL_SEMAPHORE_HANDLE_SYNC_FD_KHR)); + sema_props.push_back(static_cast( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR)); + break; default: ASSERT(0); log_error("Unsupported external memory handle type\n"); diff --git a/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp index 1a313cce4..96c5adbc7 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp @@ -248,6 +248,9 @@ getSupportedVulkanExternalSemaphoreHandleTypeList() } externalSemaphoreHandleTypeList.push_back( VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT); +#elif defined(__ANDROID__) + externalSemaphoreHandleTypeList.push_back( + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD); #else externalSemaphoreHandleTypeList.push_back( VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD); @@ -480,6 +483,33 @@ const std::vector getSupportedVulkanFormatList() return formatList; } +cl_external_semaphore_handle_type_khr getCLSemaphoreTypeFromVulkanType( + VulkanExternalSemaphoreHandleType vulkanExternalSemaphoreHandleType) +{ + cl_external_semaphore_handle_type_khr clExternalSemaphoreHandleTypeKhr = 0; + switch (vulkanExternalSemaphoreHandleType) + { + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD: + clExternalSemaphoreHandleTypeKhr = + CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR; + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT: + clExternalSemaphoreHandleTypeKhr = + CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR; + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT: + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT: + clExternalSemaphoreHandleTypeKhr = + CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR; + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD: + clExternalSemaphoreHandleTypeKhr = CL_SEMAPHORE_HANDLE_SYNC_FD_KHR; + break; + default: break; + } + return clExternalSemaphoreHandleTypeKhr; +} + uint32_t getVulkanFormatElementSize(VulkanFormat format) { switch (format) diff --git a/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp b/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp index 04f5a5940..989132570 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp @@ -51,6 +51,8 @@ const std::vector getSupportedVulkanFormatList(); uint32_t getVulkanFormatElementSize(VulkanFormat format); const char* getVulkanFormatGLSLFormat(VulkanFormat format); const char* getVulkanFormatGLSLTypePrefix(VulkanFormat format); +cl_external_semaphore_handle_type_khr getCLSemaphoreTypeFromVulkanType( + VulkanExternalSemaphoreHandleType vulkanExternalSemaphoreHandleType); std::string prepareVulkanShader( std::string shaderCode, diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp index 3ce4af6b0..4d803be48 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp @@ -72,6 +72,8 @@ VulkanInstance::VulkanInstance(): m_vkInstance(VK_NULL_HANDLE) #if defined(_WIN32) || defined(_WIN64) const char *vulkanLoaderLibraryName = "vulkan-1.dll"; +#elif defined(__ANDROID__) + const char *vulkanLoaderLibraryName = "libvulkan.so"; #elif defined(__linux__) const char *vulkanLoaderLibraryName = "libvulkan.so.1"; #endif @@ -604,6 +606,37 @@ VulkanQueue &VulkanDevice::getQueue(const VulkanQueueFamily &queueFamily, VulkanDevice::operator VkDevice() const { return m_vkDevice; } +//////////////////////////////// +// VulkanFence implementation // +//////////////////////////////// + +VulkanFence::VulkanFence(const VulkanDevice &vkDevice) +{ + + device = vkDevice; + + VkFenceCreateInfo fenceInfo{}; + fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fenceInfo.pNext = nullptr; + fenceInfo.flags = 0; + + VkResult vkStatus = vkCreateFence(device, &fenceInfo, nullptr, &fence); + + if (vkStatus != VK_SUCCESS) + { + throw std::runtime_error("Error: Failed create fence."); + } +} + +VulkanFence::~VulkanFence() { vkDestroyFence(device, fence, nullptr); } + +void VulkanFence::reset() { vkResetFences(device, 1, &fence); } + +void VulkanFence::wait() +{ + vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX); +} + //////////////////////////////// // VulkanQueue implementation // //////////////////////////////// @@ -615,6 +648,22 @@ VulkanQueue::VulkanQueue(VkQueue vkQueue): m_vkQueue(vkQueue) {} VulkanQueue::~VulkanQueue() {} +void VulkanQueue::submit(const VulkanCommandBuffer &commandBuffer, + const std::shared_ptr &vkFence) +{ + VulkanCommandBufferList commandBufferList; + commandBufferList.add(commandBuffer); + + VkSubmitInfo vkSubmitInfo = {}; + vkSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + vkSubmitInfo.pNext = NULL; + vkSubmitInfo.waitSemaphoreCount = (uint32_t)0; + vkSubmitInfo.commandBufferCount = (uint32_t)commandBufferList.size(); + vkSubmitInfo.pCommandBuffers = commandBufferList(); + + vkQueueSubmit(m_vkQueue, 1, &vkSubmitInfo, vkFence->fence); +} + void VulkanQueue::submit(const VulkanSemaphoreList &waitSemaphoreList, const VulkanCommandBufferList &commandBufferList, const VulkanSemaphoreList &signalSemaphoreList) diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp index 37925ee4a..af4782191 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp @@ -21,6 +21,7 @@ #include "vulkan_wrapper_types.hpp" #include "vulkan_list_map.hpp" #include "vulkan_api_list.hpp" +#include class VulkanInstance { friend const VulkanInstance &getVulkanInstance(); @@ -145,6 +146,20 @@ class VulkanDevice { operator VkDevice() const; }; +class VulkanFence { + friend class VulkanQueue; + +protected: + VkFence fence; + VkDevice device; + +public: + VulkanFence(const VulkanDevice &device); + virtual ~VulkanFence(); + void reset(); + void wait(); +}; + class VulkanQueue { friend class VulkanDevice; @@ -157,6 +172,8 @@ class VulkanQueue { public: const VulkanQueueFamily &getQueueFamily(); + void submit(const VulkanCommandBuffer &commandBuffer, + const std::shared_ptr &fence); void submit(const VulkanSemaphoreList &waitSemaphoreList, const VulkanCommandBufferList &commandBufferList, const VulkanSemaphoreList &signalSemaphoreList); @@ -569,7 +586,6 @@ class VulkanSemaphore { operator VkSemaphore() const; }; - #define VK_FUNC_DECL(name) extern "C" PFN_##name _##name; VK_FUNC_LIST #if defined(_WIN32) || defined(_WIN64) diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp index 2473a1d7b..fcd193732 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp @@ -169,7 +169,9 @@ enum VulkanExternalSemaphoreHandleType VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR, VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR - | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR + | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR, + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR }; enum VulkanBufferUsage diff --git a/test_conformance/commonfns/main.cpp b/test_conformance/commonfns/main.cpp index 3e4b0b8e7..645d3f703 100644 --- a/test_conformance/commonfns/main.cpp +++ b/test_conformance/commonfns/main.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -18,8 +18,10 @@ #include #include "procs.h" #include "test_base.h" +#include "harness/kernelHelpers.h" std::map BaseFunctionTest::type2name; +cl_half_rounding_mode BaseFunctionTest::halfRoundingMode = CL_HALF_RTE; int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount]; int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3}; @@ -45,17 +47,38 @@ test_definition test_list[] = { const int test_num = ARRAY_SIZE( test_list ); -int main(int argc, const char *argv[]) +test_status InitCL(cl_device_id device) { - initVecSizes(); - - if (BaseFunctionTest::type2name.empty()) + if (is_extension_available(device, "cl_khr_fp16")) { - BaseFunctionTest::type2name[sizeof(half)] = "half"; - BaseFunctionTest::type2name[sizeof(float)] = "float"; - BaseFunctionTest::type2name[sizeof(double)] = "double"; + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + BaseFunctionTest::halfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + BaseFunctionTest::halfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode"); + return TEST_FAIL; + } } - return runTestHarness(argc, argv, test_num, test_list, false, 0); + return TEST_PASS; } +int main(int argc, const char *argv[]) +{ + initVecSizes(); + + BaseFunctionTest::type2name[sizeof(half)] = "half"; + BaseFunctionTest::type2name[sizeof(float)] = "float"; + BaseFunctionTest::type2name[sizeof(double)] = "double"; + + return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0, + InitCL); +} diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h index 442910426..be36ed264 100644 --- a/test_conformance/commonfns/test_base.h +++ b/test_conformance/commonfns/test_base.h @@ -19,27 +19,23 @@ #include #include #include +#include #include #include -#include "harness/deviceInfo.h" #include "harness/testHarness.h" #include "harness/typeWrappers.h" - template using VerifyFuncBinary = int (*)(const T *const, const T *const, const T *const, const int num, const int vs, const int vp); - template using VerifyFuncUnary = int (*)(const T *const, const T *const, const int num); - using half = cl_half; - struct BaseFunctionTest { BaseFunctionTest(cl_device_id device, cl_context context, @@ -61,9 +57,9 @@ struct BaseFunctionTest bool vecParam; static std::map type2name; + static cl_half_rounding_mode halfRoundingMode; }; - struct MinTest : BaseFunctionTest { MinTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -74,7 +70,6 @@ struct MinTest : BaseFunctionTest cl_int Run() override; }; - struct MaxTest : BaseFunctionTest { MaxTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -85,7 +80,6 @@ struct MaxTest : BaseFunctionTest cl_int Run() override; }; - struct ClampTest : BaseFunctionTest { ClampTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -96,7 +90,6 @@ struct ClampTest : BaseFunctionTest cl_int Run() override; }; - struct DegreesTest : BaseFunctionTest { DegreesTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -107,7 +100,6 @@ struct DegreesTest : BaseFunctionTest cl_int Run() override; }; - struct RadiansTest : BaseFunctionTest { RadiansTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -118,7 +110,6 @@ struct RadiansTest : BaseFunctionTest cl_int Run() override; }; - struct SignTest : BaseFunctionTest { SignTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -129,7 +120,6 @@ struct SignTest : BaseFunctionTest cl_int Run() override; }; - struct SmoothstepTest : BaseFunctionTest { SmoothstepTest(cl_device_id device, cl_context context, @@ -141,7 +131,6 @@ struct SmoothstepTest : BaseFunctionTest cl_int Run() override; }; - struct StepTest : BaseFunctionTest { StepTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -152,7 +141,6 @@ struct StepTest : BaseFunctionTest cl_int Run() override; }; - struct MixTest : BaseFunctionTest { MixTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -163,19 +151,71 @@ struct MixTest : BaseFunctionTest cl_int Run() override; }; +template float UlpFn(const T &val, const double &r) +{ + if (std::is_same::value) + { + return Ulp_Error_Half(val, r); + } + else if (std::is_same::value) + { + return Ulp_Error(val, r); + } + else if (std::is_same::value) + { + return Ulp_Error_Double(val, r); + } + else + { + log_error("UlpFn: unsupported data type\n"); + } + + return -1.f; // wrong val +} + +template inline double conv_to_dbl(const T &val) +{ + if (std::is_same::value) + return (double)cl_half_to_float(val); + else + return (double)val; +} -template -std::string string_format(const std::string &format, Args... args) +template inline double conv_to_flt(const T &val) { - int sformat = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; - if (sformat <= 0) - throw std::runtime_error("string_format: string processing error."); - auto format_size = static_cast(sformat); - std::unique_ptr buffer(new char[format_size]); - std::snprintf(buffer.get(), format_size, format.c_str(), args...); - return std::string(buffer.get(), buffer.get() + format_size - 1); + if (std::is_same::value) + return (float)cl_half_to_float(val); + else + return (float)val; } +template inline half conv_to_half(const T &val) +{ + if (std::is_floating_point::value) + return cl_half_from_float(val, BaseFunctionTest::halfRoundingMode); + return 0; +} + +template bool isfinite_fp(const T &v) +{ + if (std::is_same::value) + { + // Extract FP16 exponent and mantissa + uint16_t h_exp = (((half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = ((half)v) & 0x3FF; + + // !Inf test + return !(h_exp == 0x1F && h_mant == 0); + } + else + { +#if !defined(_WIN32) + return std::isfinite(v); +#else + return isfinite(v); +#endif + } +} template int MakeAndRunTest(cl_device_id device, cl_context context, diff --git a/test_conformance/commonfns/test_binary_fn.cpp b/test_conformance/commonfns/test_binary_fn.cpp index 1eb12f730..a6c75647d 100644 --- a/test_conformance/commonfns/test_binary_fn.cpp +++ b/test_conformance/commonfns/test_binary_fn.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -22,6 +22,7 @@ #include "harness/deviceInfo.h" #include "harness/typeWrappers.h" +#include "harness/stringHelpers.h" #include "procs.h" #include "test_base.h" @@ -53,7 +54,6 @@ const char *binary_fn_code_pattern_v3_scalar = " vstore3(%s(vload3(tid,x), y[tid] ), tid, dst);\n" "}\n"; - template int test_binary_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, @@ -105,6 +105,16 @@ int test_binary_fn(cl_device_id device, cl_context context, input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d); } } + else if (std::is_same::value) + { + const float fval = CL_HALF_MAX; + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (int j = 0; j < num_elements; j++) + { + input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d)); + } + } for (i = 0; i < 2; i++) { @@ -125,22 +135,22 @@ int test_binary_fn(cl_device_id device, cl_context context, { std::string str = binary_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), fnName.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), fnName.c_str()); } else { std::string str = binary_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), fnName.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), fnName.c_str()); } } else { // do regular std::string str = binary_fn_code_pattern; - kernelSource = string_format( + kernelSource = str_sprintf( str, pragma_str.c_str(), tname.c_str(), vecSizeNames[i], tname.c_str(), vecSecParam ? vecSizeNames[i] : "", tname.c_str(), vecSizeNames[i], fnName.c_str()); @@ -203,13 +213,20 @@ int max_verify(const T* const x, const T* const y, const T* const out, { int k = i * vecSize + j; int l = (k * vecParam + i * (1 - vecParam)); - T v = (x[k] < y[l]) ? y[l] : x[k]; + T v = (conv_to_dbl(x[k]) < conv_to_dbl(y[l])) ? y[l] : x[k]; if (v != out[k]) { - log_error( - "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is " - "vector %d, element %d, for vector size %d)\n", - k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); + if (std::is_same::value) + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k, + conv_to_flt(out[k]), v, k, i, j, vecSize); + else + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); return -1; } } @@ -227,13 +244,20 @@ int min_verify(const T* const x, const T* const y, const T* const out, { int k = i * vecSize + j; int l = (k * vecParam + i * (1 - vecParam)); - T v = (x[k] > y[l]) ? y[l] : x[k]; + T v = (conv_to_dbl(x[k]) > conv_to_dbl(y[l])) ? y[l] : x[k]; if (v != out[k]) { - log_error( - "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is " - "vector %d, element %d, for vector size %d)\n", - k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); + if (std::is_same::value) + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k, + conv_to_flt(out[k]), v, k, i, j, vecSize); + else + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); return -1; } } @@ -246,6 +270,13 @@ int min_verify(const T* const x, const T* const y, const T* const out, cl_int MaxTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, + max_verify); + test_error(error, "MaxTest::Run failed"); + } error = test_binary_fn(device, context, queue, num_elems, fnName.c_str(), vecParam, max_verify); @@ -265,6 +296,13 @@ cl_int MaxTest::Run() cl_int MinTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_binary_fn(device, context, queue, num_elems, + fnName.c_str(), vecParam, + min_verify); + test_error(error, "MinTest::Run failed"); + } error = test_binary_fn(device, context, queue, num_elems, fnName.c_str(), vecParam, min_verify); diff --git a/test_conformance/commonfns/test_clamp.cpp b/test_conformance/commonfns/test_clamp.cpp index 0e96fb602..1bf406770 100644 --- a/test_conformance/commonfns/test_clamp.cpp +++ b/test_conformance/commonfns/test_clamp.cpp @@ -26,12 +26,10 @@ #include "procs.h" #include "test_base.h" - #ifndef M_PI #define M_PI 3.14159265358979323846264338327950288 #endif - #define CLAMP_KERNEL(type) \ const char *clamp_##type##_kernel_code = EMIT_PRAGMA_DIRECTIVE \ "__kernel void test_clamp(__global " #type " *x, __global " #type \ @@ -64,6 +62,14 @@ "vload3(tid,maxval)), tid, dst);\n" \ "}\n"; +#define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" +CLAMP_KERNEL(half) +CLAMP_KERNEL_V(half, 2) +CLAMP_KERNEL_V(half, 4) +CLAMP_KERNEL_V(half, 8) +CLAMP_KERNEL_V(half, 16) +CLAMP_KERNEL_V3(half, 3) +#undef EMIT_PRAGMA_DIRECTIVE #define EMIT_PRAGMA_DIRECTIVE " " CLAMP_KERNEL(float) @@ -83,6 +89,10 @@ CLAMP_KERNEL_V(double, 16) CLAMP_KERNEL_V3(double, 3) #undef EMIT_PRAGMA_DIRECTIVE +const char *clamp_half_codes[] = { + clamp_half_kernel_code, clamp_half2_kernel_code, clamp_half4_kernel_code, + clamp_half8_kernel_code, clamp_half16_kernel_code, clamp_half3_kernel_code +}; const char *clamp_float_codes[] = { clamp_float_kernel_code, clamp_float2_kernel_code, clamp_float4_kernel_code, clamp_float8_kernel_code, @@ -96,21 +106,42 @@ const char *clamp_double_codes[] = { namespace { - template int verify_clamp(const T *const x, const T *const minval, const T *const maxval, const T *const outptr, int n) { - T t; - for (int i = 0; i < n; i++) + if (std::is_same::value) + { + float t; + for (int i = 0; i < n; i++) + { + t = std::min( + std::max(cl_half_to_float(x[i]), cl_half_to_float(minval[i])), + cl_half_to_float(maxval[i])); + if (t != cl_half_to_float(outptr[i])) + { + log_error( + "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", + i, cl_half_to_float(x[i]), cl_half_to_float(minval[i]), + cl_half_to_float(maxval[i]), t, + cl_half_to_float(outptr[i])); + return -1; + } + } + } + else { - t = std::min(std::max(x[i], minval[i]), maxval[i]); - if (t != outptr[i]) + T t; + for (int i = 0; i < n; i++) { - log_error( - "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, - x[i], minval[i], maxval[i], t, outptr[i]); - return -1; + t = std::min(std::max(x[i], minval[i]), maxval[i]); + if (t != outptr[i]) + { + log_error( + "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", + i, x[i], minval[i], maxval[i], t, outptr[i]); + return -1; + } } } @@ -118,7 +149,6 @@ int verify_clamp(const T *const x, const T *const minval, const T *const maxval, } } - template int test_clamp_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) @@ -169,6 +199,17 @@ int test_clamp_fn(cl_device_id device, cl_context context, input_ptr[2][j] = get_random_double(input_ptr[1][j], 0x20000000, d); } } + else if (std::is_same::value) + { + const float fval = CL_HALF_MAX; + for (j = 0; j < num_elements; j++) + { + input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[2][j] = conv_to_half( + get_random_float(conv_to_flt(input_ptr[1][j]), fval, d)); + } + } for (i = 0; i < 3; i++) { @@ -194,9 +235,16 @@ int test_clamp_fn(cl_device_id device, cl_context context, "test_clamp"); test_error(err, "Unable to create kernel"); } + else if (std::is_same::value) + { + err = create_single_kernel_helper( + context, &programs[i], &kernels[i], 1, &clamp_half_codes[i], + "test_clamp"); + test_error(err, "Unable to create kernel"); + } - log_info("Just made a program for float, i=%d, size=%d, in slot %d\n", - i, g_arrVecSizes[i], i); + log_info("Just made a program for %s, i=%d, size=%d, in slot %d\n", + tname.c_str(), i, g_arrVecSizes[i], i); fflush(stdout); for (j = 0; j < 4; j++) @@ -239,10 +287,14 @@ int test_clamp_fn(cl_device_id device, cl_context context, return err; } - cl_int ClampTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_clamp_fn(device, context, queue, num_elems); + test_error(error, "ClampTest::Run failed"); + } error = test_clamp_fn(device, context, queue, num_elems); test_error(error, "ClampTest::Run failed"); @@ -256,7 +308,6 @@ cl_int ClampTest::Run() return error; } - int test_clamp(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp index 92c101005..2a06e43df 100644 --- a/test_conformance/commonfns/test_mix.cpp +++ b/test_conformance/commonfns/test_mix.cpp @@ -18,6 +18,8 @@ #include #include +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" @@ -52,33 +54,42 @@ const char *mix_fn_code_pattern_v3_scalar = " vstore3(mix(vload3(tid, x), vload3(tid, y), a[tid]), tid, dst);\n" "}\n"; - #define MAX_ERR 1e-3 namespace { - template int verify_mix(const T *const inptrX, const T *const inptrY, const T *const inptrA, const T *const outptr, const int n, const int veclen, const bool vecParam) { - T r; - float delta = 0.0f; + double r, o; + float delta = 0.f, max_delta = 0.f; int i; if (vecParam) { for (i = 0; i < n * veclen; i++) { - r = inptrX[i] + ((inptrY[i] - inptrX[i]) * inptrA[i]); - delta = fabs(double(r - outptr[i])) / r; - if (delta > MAX_ERR) + r = conv_to_dbl(inptrX[i]) + + ((conv_to_dbl(inptrY[i]) - conv_to_dbl(inptrX[i])) + * conv_to_dbl(inptrA[i])); + + o = conv_to_dbl(outptr[i]); + delta = fabs(double(r - o)) / r; + if (!std::is_same::value) + { + if (delta > MAX_ERR) + { + log_error("%d) verification error: mix(%a, %a, %a) = *%a " + "vs. %a\n", + i, inptrX[i], inptrY[i], inptrA[i], r, outptr[i]); + return -1; + } + } + else { - log_error( - "%d) verification error: mix(%a, %a, %a) = *%a vs. %a\n", i, - inptrX[i], inptrY[i], inptrA[i], r, outptr[i]); - return -1; + max_delta = std::max(max_delta, delta); } } } @@ -90,25 +101,40 @@ int verify_mix(const T *const inptrX, const T *const inptrY, int vi = i * veclen; for (int j = 0; j < veclen; ++j, ++vi) { - r = inptrX[vi] + ((inptrY[vi] - inptrX[vi]) * inptrA[i]); - delta = fabs(double(r - outptr[vi])) / r; - if (delta > MAX_ERR) + r = conv_to_dbl(inptrX[vi]) + + ((conv_to_dbl(inptrY[vi]) - conv_to_dbl(inptrX[vi])) + * conv_to_dbl(inptrA[i])); + delta = fabs(double(r - conv_to_dbl(outptr[vi]))) / r; + if (!std::is_same::value) { - log_error("{%d, element %d}) verification error: mix(%a, " - "%a, %a) = *%a vs. %a\n", - ii, j, inptrX[vi], inptrY[vi], inptrA[i], r, - outptr[vi]); - return -1; + if (delta > MAX_ERR) + { + log_error( + "{%d, element %d}) verification error: mix(%a, " + "%a, %a) = *%a vs. %a\n", + ii, j, inptrX[vi], inptrY[vi], inptrA[i], r, + outptr[vi]); + return -1; + } + } + else + { + max_delta = std::max(max_delta, delta); } } } } + // due to the fact that accuracy of mix for cl_khr_fp16 is implementation + // defined this test only reports maximum error without testing maximum + // error threshold + if (std::is_same::value) + log_error("mix half verification result, max delta: %a\n", max_delta); + return 0; } } // namespace - template int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, bool vecParam) @@ -120,7 +146,7 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, std::vector kernels; int err, i; - MTdataHolder d = MTdataHolder(gRandomSeed); + MTdataHolder d(gRandomSeed); assert(BaseFunctionTest::type2name.find(sizeof(T)) != BaseFunctionTest::type2name.end()); @@ -142,19 +168,32 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, test_error(err, "clCreateBuffer failed"); } - for (i = 0; i < num_elements; i++) - { - input_ptr[0][i] = (T)genrand_real1(d); - input_ptr[1][i] = (T)genrand_real1(d); - input_ptr[2][i] = (T)genrand_real1(d); - } - std::string pragma_str; if (std::is_same::value) { pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; } + if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half((float)genrand_real1(d)); + input_ptr[1][i] = conv_to_half((float)genrand_real1(d)); + input_ptr[2][i] = conv_to_half((float)genrand_real1(d)); + } + } + else + { + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = (T)genrand_real1(d); + input_ptr[1][i] = (T)genrand_real1(d); + input_ptr[2][i] = (T)genrand_real1(d); + } + } + for (i = 0; i < 3; i++) { err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, @@ -164,7 +203,6 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, } char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; - for (i = 0; i < kTotalVecCount; i++) { std::string kernelSource; @@ -174,15 +212,15 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, { std::string str = mix_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } else { std::string str = mix_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } } else @@ -190,10 +228,10 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, // regular path std::string str = mix_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i], - tname.c_str(), vecParam ? vecSizeNames[i] : "", - tname.c_str(), vecSizeNames[i]); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i], + tname.c_str(), vecParam ? vecSizeNames[i] : "", + tname.c_str(), vecSizeNames[i]); } const char *programPtr = kernelSource.c_str(); err = @@ -242,10 +280,14 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, return err; } - cl_int MixTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_mix_fn(device, context, queue, num_elems, vecParam); + test_error(error, "MixTest::Run failed"); + } error = test_mix_fn(device, context, queue, num_elems, vecParam); test_error(error, "MixTest::Run failed"); @@ -260,7 +302,6 @@ cl_int MixTest::Run() return error; } - int test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -268,7 +309,6 @@ int test_mix(cl_device_id device, cl_context context, cl_command_queue queue, true); } - int test_mixf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp index 31948d3fe..5afc2d0f2 100644 --- a/test_conformance/commonfns/test_smoothstep.cpp +++ b/test_conformance/commonfns/test_smoothstep.cpp @@ -18,10 +18,11 @@ #include #include +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" - const char *smoothstep_fn_code_pattern = "%s\n" /* optional pragma */ "__kernel void test_fn(__global %s%s *e0, __global %s%s *e1, __global %s%s " @@ -53,38 +54,43 @@ const char *smoothstep_fn_code_pattern_v3_scalar = " vstore3(smoothstep(e0[tid], e1[tid], vload3(tid,x)), tid, dst);\n" "}\n"; - #define MAX_ERR (1e-5f) namespace { - template int verify_smoothstep(const T *const edge0, const T *const edge1, const T *const x, const T *const outptr, const int n, const int veclen, const bool vecParam) { - T r, t; - float delta = 0; + double r, t; + float delta = 0, max_delta = 0; if (vecParam) { for (int i = 0; i < n * veclen; i++) { - t = (x[i] - edge0[i]) / (edge1[i] - edge0[i]); - if (t < 0.0f) - t = 0.0f; - else if (t > 1.0f) - t = 1.0f; - r = t * t * (3.0f - 2.0f * t); - delta = (float)fabs(r - outptr[i]); - if (delta > MAX_ERR) + t = (conv_to_dbl(x[i]) - conv_to_dbl(edge0[i])) + / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i])); + if (t < 0.0) + t = 0.0; + else if (t > 1.0) + t = 1.0; + r = t * t * (3.0 - 2.0 * t); + delta = (float)fabs(r - conv_to_dbl(outptr[i])); + if (!std::is_same::value) { - log_error("%d) verification error: smoothstep(%a, %a, %a) = " - "*%a vs. %a\n", - i, x[i], edge0[i], edge1[i], r, outptr[i]); - return -1; + if (delta > MAX_ERR) + { + log_error( + "%d) verification error: smoothstep(%a, %a, %a) = " + "*%a vs. %a\n", + i, x[i], edge0[i], edge1[i], r, outptr[i]); + return -1; + } } + else + max_delta = std::max(max_delta, delta); } } else @@ -95,32 +101,48 @@ int verify_smoothstep(const T *const edge0, const T *const edge1, int vi = i * veclen; for (int j = 0; j < veclen; ++j, ++vi) { - t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]); - if (t < 0.0f) - t = 0.0f; - else if (t > 1.0f) - t = 1.0f; - r = t * t * (3.0f - 2.0f * t); - delta = (float)fabs(r - outptr[vi]); - if (delta > MAX_ERR) + t = (conv_to_dbl(x[vi]) - conv_to_dbl(edge0[i])) + / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i])); + if (t < 0.0) + t = 0.0; + else if (t > 1.0) + t = 1.0; + r = t * t * (3.0 - 2.0 * t); + delta = (float)fabs(r - conv_to_dbl(outptr[vi])); + + if (!std::is_same::value) { - log_error("{%d, element %d}) verification error: " - "smoothstep(%a, %a, %a) = *%a vs. %a\n", - ii, j, x[vi], edge0[i], edge1[i], r, outptr[vi]); - return -1; + if (delta > MAX_ERR) + { + log_error("{%d, element %d}) verification error: " + "smoothstep(%a, %a, %a) = *%a vs. %a\n", + ii, j, x[vi], edge0[i], edge1[i], r, + outptr[vi]); + return -1; + } } + else + max_delta = std::max(max_delta, delta); } } } + + // due to the fact that accuracy of smoothstep for cl_khr_fp16 is + // implementation defined this test only reports maximum error without + // testing maximum error threshold + if (std::is_same::value) + log_error("smoothstep half verification result, max delta: %a\n", + max_delta); + return 0; } } - template int test_smoothstep_fn(cl_device_id device, cl_context context, - cl_command_queue queue, int n_elems, bool vecParam) + cl_command_queue queue, const int n_elems, + const bool vecParam) { clMemWrapper streams[4]; std::vector input_ptr[3], output_ptr; @@ -170,6 +192,17 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, input_ptr[2][i] = get_random_double(-0x20000000, 0x20000000, d); } } + else if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half(get_random_float(-65503, 65503, d)); + input_ptr[1][i] = conv_to_half( + get_random_float(conv_to_flt(input_ptr[0][i]), 65503, d)); + input_ptr[2][i] = conv_to_half(get_random_float(-65503, 65503, d)); + } + } for (i = 0; i < 3; i++) { @@ -179,7 +212,7 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, test_error(err, "Unable to write input buffer"); } - char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; for (i = 0; i < kTotalVecCount; i++) { @@ -190,15 +223,15 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, { std::string str = smoothstep_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } else { std::string str = smoothstep_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } } else @@ -206,11 +239,12 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, // regular path std::string str = smoothstep_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i]); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i]); } + const char *programPtr = kernelSource.c_str(); err = create_single_kernel_helper(context, &programs[i], &kernels[i], 1, @@ -259,10 +293,15 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, return err; } - cl_int SmoothstepTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_smoothstep_fn(device, context, queue, num_elems, + vecParam); + test_error(error, "SmoothstepTest::Run failed"); + } error = test_smoothstep_fn(device, context, queue, num_elems, vecParam); @@ -278,7 +317,6 @@ cl_int SmoothstepTest::Run() return error; } - int test_smoothstep(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -286,7 +324,6 @@ int test_smoothstep(cl_device_id device, cl_context context, "smoothstep", true); } - int test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp index dc91766e9..1cfa96eab 100644 --- a/test_conformance/commonfns/test_step.cpp +++ b/test_conformance/commonfns/test_step.cpp @@ -18,10 +18,11 @@ #include #include +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" - const char *step_fn_code_pattern = "%s\n" /* optional pragma */ "__kernel void test_fn(__global %s%s *edge, " "__global %s%s *x, __global %s%s *dst)\n" @@ -48,7 +49,6 @@ const char *step_fn_code_pattern_v3_scalar = " vstore3(step(edge[tid], vload3(tid,x)), tid, dst);\n" "}\n"; - namespace { template @@ -62,8 +62,8 @@ int verify_step(const T *const inptrA, const T *const inptrB, { for (int i = 0; i < n * veclen; i++) { - r = (inptrB[i] < inptrA[i]) ? 0.0 : 1.0; - if (r != outptr[i]) return -1; + r = (conv_to_dbl(inptrB[i]) < conv_to_dbl(inptrA[i])) ? 0.0 : 1.0; + if (r != conv_to_dbl(outptr[i])) return -1; } } else @@ -73,24 +73,31 @@ int verify_step(const T *const inptrA, const T *const inptrB, int ii = i / veclen; for (int j = 0; j < veclen && i < n; ++j, ++i) { - r = (inptrB[i] < inptrA[ii]) ? 0.0f : 1.0f; - if (r != outptr[i]) + r = (conv_to_dbl(inptrB[i]) < conv_to_dbl(inptrA[ii])) ? 0.0f + : 1.0f; + if (r != conv_to_dbl(outptr[i])) { - log_error("Failure @ {%d, element %d}: step(%a,%a) -> *%a " - "vs %a\n", - ii, j, inptrA[ii], inptrB[i], r, outptr[i]); + if (std::is_same::value) + log_error( + "Failure @ {%d, element %d}: step(%a,%a) -> *%a " + "vs %a\n", + ii, j, conv_to_flt(inptrA[ii]), + conv_to_flt(inptrB[i]), r, conv_to_flt(outptr[i])); + else + log_error( + "Failure @ {%d, element %d}: step(%a,%a) -> *%a " + "vs %a\n", + ii, j, inptrA[ii], inptrB[i], r, outptr[i]); return -1; } } } } - return 0; } } - template int test_step_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, bool vecParam) @@ -140,6 +147,16 @@ int test_step_fn(cl_device_id device, cl_context context, input_ptr[1][i] = get_random_double(-0x40000000, 0x40000000, d); } } + else if (std::is_same::value) + { + const float fval = CL_HALF_MAX; + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][i] = conv_to_half(get_random_float(-fval, fval, d)); + } + } for (i = 0; i < 2; i++) { @@ -160,15 +177,15 @@ int test_step_fn(cl_device_id device, cl_context context, { std::string str = step_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str()); } else { std::string str = step_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str()); } } else @@ -176,9 +193,9 @@ int test_step_fn(cl_device_id device, cl_context context, // regular path std::string str = step_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i]); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i]); } const char *programPtr = kernelSource.c_str(); err = @@ -229,10 +246,14 @@ int test_step_fn(cl_device_id device, cl_context context, return err; } - cl_int StepTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_step_fn(device, context, queue, num_elems, vecParam); + test_error(error, "StepTest::Run failed"); + } error = test_step_fn(device, context, queue, num_elems, vecParam); test_error(error, "StepTest::Run failed"); @@ -247,7 +268,6 @@ cl_int StepTest::Run() return error; } - int test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -255,7 +275,6 @@ int test_step(cl_device_id device, cl_context context, cl_command_queue queue, true); } - int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_unary_fn.cpp b/test_conformance/commonfns/test_unary_fn.cpp index fed4389d9..91b5c215b 100644 --- a/test_conformance/commonfns/test_unary_fn.cpp +++ b/test_conformance/commonfns/test_unary_fn.cpp @@ -21,6 +21,7 @@ #include #include "harness/deviceInfo.h" +#include "harness/stringHelpers.h" #include "harness/typeWrappers.h" #include "procs.h" @@ -30,7 +31,6 @@ #define M_PI 3.14159265358979323846264338327950288 #endif - // clang-format off const char *unary_fn_code_pattern = "%s\n" /* optional pragma */ @@ -51,23 +51,10 @@ const char *unary_fn_code_pattern_v3 = "}\n"; // clang-format on - #define MAX_ERR 2.0f namespace { - -template float UlpFn(const T &val, const double &r) -{ - if (std::is_same::value) - return Ulp_Error_Double(val, r); - else if (std::is_same::value) - return Ulp_Error(val, r); - else if (std::is_same::value) - return Ulp_Error(val, r); -} - - template int verify_degrees(const T *const inptr, const T *const outptr, int n) { @@ -77,7 +64,11 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n) for (int i = 0, j = 0; i < n; i++, j++) { - r = (180.0 / M_PI) * inptr[i]; + r = (180.0 / M_PI) * conv_to_dbl(inptr[i]); + + if (std::is_same::value) + if (!isfinite_fp(conv_to_half(r)) && !isfinite_fp(outptr[i])) + continue; error = UlpFn(outptr[i], r); @@ -88,21 +79,32 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n) max_val = r; if (fabsf(error) > MAX_ERR) { - log_error("%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", - i, inptr[i], r, outptr[i], r, outptr[i], error); + if (std::is_same::value) + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + conv_to_flt(inptr[i]), r, conv_to_flt(outptr[i]), r, + conv_to_flt(outptr[i]), error); + else + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + inptr[i], r, outptr[i], r, outptr[i], error); return 1; } } } - log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", - max_error, max_index, max_val, outptr[max_index], max_val, - outptr[max_index]); + if (std::is_same::value) + log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, conv_to_flt(outptr[max_index]), + max_val, conv_to_flt(outptr[max_index])); + else + log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, outptr[max_index], max_val, + outptr[max_index]); return 0; } - template int verify_radians(const T *const inptr, const T *const outptr, int n) { @@ -112,8 +114,14 @@ int verify_radians(const T *const inptr, const T *const outptr, int n) for (int i = 0, j = 0; i < n; i++, j++) { - r = (M_PI / 180.0) * inptr[i]; - error = Ulp_Error(outptr[i], r); + r = (M_PI / 180.0) * conv_to_dbl(inptr[i]); + + if (std::is_same::value) + if (!isfinite_fp(conv_to_half(r)) && !isfinite_fp(outptr[i])) + continue; + + error = UlpFn(outptr[i], r); + if (fabsf(error) > max_error) { max_error = error; @@ -121,41 +129,51 @@ int verify_radians(const T *const inptr, const T *const outptr, int n) max_val = r; if (fabsf(error) > MAX_ERR) { - log_error("%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", - i, inptr[i], r, outptr[i], r, outptr[i], error); + if (std::is_same::value) + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + conv_to_flt(inptr[i]), r, conv_to_flt(outptr[i]), r, + conv_to_flt(outptr[i]), error); + else + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + inptr[i], r, outptr[i], r, outptr[i], error); return 1; } } } - log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", - max_error, max_index, max_val, outptr[max_index], max_val, - outptr[max_index]); + if (std::is_same::value) + log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, conv_to_flt(outptr[max_index]), + max_val, conv_to_flt(outptr[max_index])); + else + log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, outptr[max_index], max_val, + outptr[max_index]); return 0; } - template int verify_sign(const T *const inptr, const T *const outptr, int n) { - T r = 0; + double r = 0; for (int i = 0; i < n; i++) { - if (inptr[i] > 0.0f) + if (conv_to_dbl(inptr[i]) > 0.0f) r = 1.0; - else if (inptr[i] < 0.0f) + else if (conv_to_dbl(inptr[i]) < 0.0f) r = -1.0; else r = 0.0; - if (r != outptr[i]) return -1; + if (r != conv_to_dbl(outptr[i])) return -1; } return 0; } } - template int test_unary_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, @@ -207,33 +225,38 @@ int test_unary_fn(cl_device_id device, cl_context context, get_random_double(-100000.0 * M_PI, 100000.0 * M_PI, d); } } + else if (std::is_same::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (int j = 0; j < num_elements; j++) + { + input_ptr[j] = conv_to_half(get_random_float( + (float)(-10000.f * M_PI), (float)(10000.f * M_PI), d)); + } + } err = clEnqueueWriteBuffer(queue, streams[0], true, 0, sizeof(T) * num_elements, &input_ptr.front(), 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueWriteBuffer failed\n"); - return -1; - } + test_error(err, "clEnqueueWriteBuffer failed\n"); for (i = 0; i < kTotalVecCount; i++) { std::string kernelSource; - char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; if (i >= kVectorSizeCount) { std::string str = unary_fn_code_pattern_v3; - kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), fnName.c_str()); + kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), fnName.c_str()); } else { std::string str = unary_fn_code_pattern; - kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(), - vecSizeNames[i], tname.c_str(), - vecSizeNames[i], fnName.c_str()); + kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecSizeNames[i], tname.c_str(), + vecSizeNames[i], fnName.c_str()); } /* Create kernels */ @@ -290,11 +313,18 @@ int test_unary_fn(cl_device_id device, cl_context context, return err; } - cl_int DegreesTest::Run() { - cl_int error = test_unary_fn(device, context, queue, num_elems, - fnName.c_str(), verify_degrees); + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_degrees); + test_error(error, "DegreesTest::Run failed"); + } + + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_degrees); test_error(error, "DegreesTest::Run failed"); if (is_extension_available(device, "cl_khr_fp64")) @@ -307,11 +337,18 @@ cl_int DegreesTest::Run() return error; } - cl_int RadiansTest::Run() { - cl_int error = test_unary_fn(device, context, queue, num_elems, - fnName.c_str(), verify_radians); + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_radians); + test_error(error, "RadiansTest::Run failed"); + } + + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_radians); test_error(error, "RadiansTest::Run failed"); if (is_extension_available(device, "cl_khr_fp64")) @@ -324,11 +361,18 @@ cl_int RadiansTest::Run() return error; } - cl_int SignTest::Run() { - cl_int error = test_unary_fn(device, context, queue, num_elems, - fnName.c_str(), verify_sign); + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_sign); + test_error(error, "SignTest::Run failed"); + } + + error = test_unary_fn(device, context, queue, num_elems, + fnName.c_str(), verify_sign); test_error(error, "SignTest::Run failed"); if (is_extension_available(device, "cl_khr_fp64")) @@ -341,7 +385,6 @@ cl_int SignTest::Run() return error; } - int test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -349,7 +392,6 @@ int test_degrees(cl_device_id device, cl_context context, "degrees"); } - int test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -357,7 +399,6 @@ int test_radians(cl_device_id device, cl_context context, "radians"); } - int test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/compiler/test_compile.cpp b/test_conformance/compiler/test_compile.cpp index f3ee43122..d250bdd47 100644 --- a/test_conformance/compiler/test_compile.cpp +++ b/test_conformance/compiler/test_compile.cpp @@ -462,7 +462,7 @@ int test_large_multiple_embedded_headers(cl_context context, cl_device_id device header_names[i] = _strdup(buffer); sprintf(buffer, composite_kernel_extern_template, i); - const char* line = _strdup(buffer); + const char *line = buffer; error = create_single_kernel_helper_create_program(context, &headers[i], 1, &line); if( headers[i] == NULL || error != CL_SUCCESS ) { diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index d53af8dc7..89626b797 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -76,6 +76,7 @@ const char *known_extensions[] = { "cl_khr_device_uuid", "cl_khr_pci_bus_info", "cl_khr_suggested_local_work_size", + "cl_khr_expect_assume", "cl_khr_spirv_linkonce_odr", "cl_khr_semaphore", "cl_khr_external_semaphore", diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index dff9788c7..851696406 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -15,7 +15,6 @@ // #include "harness/testHarness.h" #include "harness/compat.h" -#include "harness/rounding_mode.h" #include "harness/ThreadPool.h" #if defined(__APPLE__) @@ -102,6 +101,7 @@ MTdata gMTdata; const char **argList = NULL; int argCount = 0; + double SubtractTime(uint64_t endTime, uint64_t startTime); cl_half_rounding_mode DataInitInfo::halfRoundingMode = CL_HALF_RTE; @@ -264,6 +264,7 @@ std::vector DataInitInfo::specialValuesDouble = { }; // clang-format on + // Windows (since long double got deprecated) sets the x87 to 53-bit precision // (that's x87 default state). This causes problems with the tests that // convert long and ulong to float and double or otherwise deal with values @@ -351,6 +352,7 @@ int CalcRefValsPat::check_result(void *test, return 0; } + cl_uint RoundUpToNextPowerOfTwo(cl_uint x) { if (0 == (x & (x - 1))) return x; @@ -360,6 +362,7 @@ cl_uint RoundUpToNextPowerOfTwo(cl_uint x) return x + x; } + cl_int CustomConversionsTest::Run() { int startMinVectorSize = gMinVectorSize; @@ -391,8 +394,7 @@ cl_int CustomConversionsTest::Run() continue; } - - // skip double if we don't have it + // skip half if we don't have it if (!gTestHalfs && (inType == khalf || outType == khalf)) { if (gHasHalfs) @@ -400,7 +402,7 @@ cl_int CustomConversionsTest::Run() vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", gTypeNames[outType], gSaturationNames[sat], gRoundingModeNames[round], gTypeNames[inType]); - vlog("\t\tcl_khr_fp64 enabled, but double testing turned " + vlog("\t\tcl_khr_fp16 enabled, but half testing turned " "off.\n"); } continue; @@ -440,6 +442,7 @@ cl_int CustomConversionsTest::Run() return gFailCount; } + ConversionsTest::ConversionsTest(cl_device_id device, cl_context context, cl_command_queue queue) : context(context), device(device), queue(queue), num_elements(0), @@ -448,6 +451,7 @@ ConversionsTest::ConversionsTest(cl_device_id device, cl_context context, cl_double(0), cl_ulong(0), cl_long(0) }) {} + cl_int ConversionsTest::Run() { IterOverTypes iter(typeIterator, *this); @@ -457,6 +461,7 @@ cl_int ConversionsTest::Run() return gFailCount; } + cl_int ConversionsTest::SetUp(int elements) { num_elements = elements; @@ -474,7 +479,7 @@ cl_int ConversionsTest::SetUp(int elements) DataInitInfo::halfRoundingMode = CL_HALF_RTZ; ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTZ; } - else // CL_FP_ROUND_TO_INF ?? + else { log_error("Error while acquiring half rounding mode"); return TEST_FAIL; @@ -542,7 +547,7 @@ void ConversionsTest::TestTypesConversion(const Type &inType, continue; } - // skip double if we don't have it + // skip half if we don't have it if (!gTestHalfs && (inType == khalf || outType == khalf)) { if (gHasHalfs) @@ -550,7 +555,7 @@ void ConversionsTest::TestTypesConversion(const Type &inType, vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", gTypeNames[outType], gSaturationNames[sat], gRoundingModeNames[round], gTypeNames[inType]); - vlog("\t\tcl_khr_fp64 enabled, but double testing turned " + vlog("\t\tcl_khr_fp16 enabled, but half testing turned " "off.\n"); } continue; @@ -587,7 +592,6 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, cl_ulong wall_start = mach_absolute_time(); #endif - uint64_t lastCase = 1ULL << (8 * gTypeSizes[inType]); cl_uint threads = GetThreadCount(); DataInitInfo info = { 0, 0, outType, inType, sat, round, threads }; @@ -655,7 +659,9 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, // Figure out how many elements are in a work block // we handle 64-bit types a bit differently. - if (8 * gTypeSizes[inType] > 32) lastCase = 0x100000000ULL; + uint64_t lastCase = (8 * gTypeSizes[inType] > 32) + ? 0x100000000ULL + : 1ULL << (8 * gTypeSizes[inType]); if (!gWimpyMode && gIsEmbedded) step = blockCount * EMBEDDED_REDUCTION_FACTOR; @@ -965,6 +971,7 @@ static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count) allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0); } + void MapResultValuesComplete(const std::unique_ptr &ptr); void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, @@ -1005,6 +1012,7 @@ void MapResultValuesComplete(const std::unique_ptr &info) // destroyed automatically soon after we exit. } + void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, void *data) { @@ -1233,7 +1241,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p) if (inType == kfloat || outType == kfloat) setAllowZ((uint8_t *)a, (uint32_t *)s, count); } - if (gForceHalfFTZ) { if (inType == khalf || outType == khalf) @@ -1499,6 +1506,8 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, return program; } +// + int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount) { // The global dimensions are just the blockCount to execute since we haven't @@ -1524,6 +1533,7 @@ int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount) return 0; } + int GetTestCase(const char *name, Type *outType, Type *inType, SaturationMode *sat, RoundingMode *round) { diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index c4310646a..f5646fce0 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at diff --git a/test_conformance/conversions/conversions_data_info.h b/test_conformance/conversions/conversions_data_info.h index 837b16772..2f408cf7e 100644 --- a/test_conformance/conversions/conversions_data_info.h +++ b/test_conformance/conversions/conversions_data_info.h @@ -53,7 +53,6 @@ typedef enum kSaturationModeCount } SaturationMode; - struct DataInitInfo { cl_ulong start; @@ -64,18 +63,15 @@ struct DataInitInfo RoundingMode round; cl_uint threads; - static cl_half_rounding_mode halfRoundingMode; static std::vector specialValuesUInt; static std::vector specialValuesFloat; static std::vector specialValuesDouble; }; - #define HFF(num) cl_half_from_float(num, DataInitInfo::halfRoundingMode) #define HTF(num) cl_half_to_float(num) - struct DataInitBase : public DataInitInfo { virtual ~DataInitBase() = default; @@ -86,7 +82,6 @@ struct DataInitBase : public DataInitInfo virtual void init(const cl_uint &, const cl_uint &) {} }; - template struct DataInfoSpec : public DataInitBase { @@ -110,7 +105,6 @@ struct DataInfoSpec : public DataInitBase std::vector mdv; - constexpr bool is_in_half() const { return (std::is_same::value && InFP); @@ -135,7 +129,6 @@ struct DataInfoSpec : public DataInitBase void init(const cl_uint &, const cl_uint &) override; InType clamp(const InType &); - inline float fclamp(float lo, float v, float hi) { v = v < lo ? lo : v; @@ -175,16 +168,16 @@ DataInfoSpec::DataInfoSpec( else if (std::is_same::value) ranges = std::make_pair(CL_LONG_MIN, CL_LONG_MAX); - InType outMin = ((InType)ranges.first); - InType outMax = ((InType)ranges.second); - // clang-format off // for readability sake keep this section unformatted if (std::is_floating_point::value) { // from float/double + InType outMin = static_cast(ranges.first); + InType outMax = static_cast(ranges.second); + InType eps = std::is_same::value ? (InType) FLT_EPSILON : (InType) DBL_EPSILON; if (std::is_integral::value) - { // to char/uchar/short/ushort/half/int/uint/long/ulong + { // to char/uchar/short/ushort/int/uint/long/ulong/half if (sizeof(OutType)<=sizeof(cl_short)) { // to char/uchar/short/ushort/half clamp_ranges= @@ -449,7 +442,9 @@ void DataInfoSpec::conv(OutType *out, InType *in) // always convert to +0.0 } #else - *out = (*in == 0 ? 0.0 : (OutType)*in); + // Use volatile to prevent optimization by Clang compiler + volatile InType vi = *in; + *out = (vi == 0 ? 0.0 : static_cast(vi)); #endif } else if (std::is_same::value || is_out_half()) @@ -510,14 +505,23 @@ void DataInfoSpec::conv(OutType *out, InType *in) else { if (std::is_same::value) - *out = (*in == 0 ? 0.f : *in); // Per IEEE-754-2008 5.4.1, 0's - // always convert to +0.0 + { + // Use volatile to prevent optimization by Clang compiler + volatile InType vi = *in; + // Per IEEE-754-2008 5.4.1, 0 always converts to +0.0 + *out = (vi == 0 ? 0.0f : vi); + } else if (std::is_same::value) + { + // Per IEEE-754-2008 5.4.1, 0 always converts to +0.0 *out = (*in == 0 ? 0.0 : *in); + } else if (is_out_half()) *out = static_cast(HFF(*in == 0 ? 0.f : *in)); else + { *out = (OutType)*in; + } } } diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index 88dca69fd..b7d6b0715 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -13,7 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/rounding_mode.h" #include "harness/ThreadPool.h" #include "harness/testHarness.h" #include "harness/parseParameters.h" @@ -119,7 +118,6 @@ const int test_num = ARRAY_SIZE(test_list); int main(int argc, const char **argv) { int error; - cl_uint seed = (cl_uint)time(NULL); argc = parseCustomParam(argc, argv); if (argc == -1) @@ -146,8 +144,8 @@ int main(int argc, const char **argv) #endif vlog("===========================================================\n"); - vlog("Random seed: %u\n", seed); - gMTdata = init_genrand(seed); + vlog("Random seed: %u\n", gRandomSeed); + gMTdata = init_genrand(gRandomSeed); const char *arg[] = { argv[0] }; int ret = @@ -523,8 +521,6 @@ test_status InitCL(cl_device_id device) } } - gMTdata = init_genrand(gRandomSeed); - char c[1024]; static const char *no_yes[] = { "NO", "YES" }; vlog("\nCompute Device info:\n"); diff --git a/test_conformance/device_execution/enqueue_block.cpp b/test_conformance/device_execution/enqueue_block.cpp index 29a6cec15..4ddd1db7f 100644 --- a/test_conformance/device_execution/enqueue_block.cpp +++ b/test_conformance/device_execution/enqueue_block.cpp @@ -27,561 +27,538 @@ #ifdef CL_VERSION_2_0 extern int gWimpyMode; -static const char* enqueue_simple_block[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " res[tid] = mul * 7 - 21;" - NL, "}" - NL, "" - NL, "kernel void enqueue_simple_block(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; -static const char* enqueue_block_with_local_arg1[] = -{ - NL, "#define LOCAL_MEM_SIZE 10" - NL, "" - NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp)" - NL, "{" - NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)" - NL, " {" - NL, " tmp[i] = mul * 7 - 21;" - NL, " res[tid] += tmp[i];" - NL, " }" - NL, " res[tid] += 2;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_local_arg1(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); };" - NL, "" - NL, " res[tid] = -2;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)));" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; +// clang-format off +static const char* enqueue_simple_block[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + res[tid] = mul * 7 - 21; + } -static const char* enqueue_block_with_local_arg2[] = -{ - NL, "#define LOCAL_MEM_SIZE 10" - NL, "" - NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2)" - NL, "{" - NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)" - NL, " {" - NL, " tmp1[i] = mul * 7 - 21;" - NL, " tmp2[i].x = (float)(mul * 7 - 21);" - NL, " tmp2[i].y = (float)(mul * 7 - 21);" - NL, " tmp2[i].z = (float)(mul * 7 - 21);" - NL, " tmp2[i].w = (float)(mul * 7 - 21);" - NL, "" - NL, " res[tid] += tmp1[i];" - NL, " res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w);" - NL, " }" - NL, " res[tid] += 2;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_local_arg2(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2)" - NL, " { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); };" - NL, "" - NL, " res[tid] = -2;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4)));" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; + kernel void enqueue_simple_block(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); -static const char* enqueue_block_with_wait_list[] = -{ - NL, "#define BLOCK_SUBMITTED 1" - NL, "#define BLOCK_COMPLETED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_block_with_wait_list(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt;" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt," - NL, " ^{" - NL, " res[tid] = BLOCK_COMPLETED;" - NL, " });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " retain_event(block_evt);" - NL, " release_event(block_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(user_evt);" - NL, " release_event(block_evt);" - NL, "}" - NL -}; + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; -static const char* enqueue_block_with_wait_list_and_local_arg[] = -{ - NL, "#define LOCAL_MEM_SIZE 10" - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define BLOCK_STARTED 3" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp)" - NL, "{" - NL, " res[tid] = BLOCK_STARTED;" - NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)" - NL, " {" - NL, " tmp[i] = mul * 7 - 21;" - NL, " res[tid] += tmp[i];" - NL, " }" - NL, " if(res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt;" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, " - NL, " ^(__local void* buf) {" - NL, " block_fn_local_arg(tid, multiplier, res, (__local int*)buf);" - NL, " }, LOCAL_MEM_SIZE*sizeof(int));" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " retain_event(block_evt);" - NL, " release_event(block_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(user_evt);" - NL, " release_event(block_evt);" - NL, "}" - NL -}; + res[tid] = -1; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; -static const char* enqueue_block_get_kernel_work_group_size[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " res[tid] = mul * 7 - 21;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_get_kernel_work_group_size(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " size_t local_work_size = get_kernel_work_group_size(kernelBlock);" - NL, " if (local_work_size <= 0){ res[tid] = -1; return; }" - NL, " size_t global_work_size = local_work_size * 4;" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t q1 = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);" - NL, "" - NL, " int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" -}; +static const char* enqueue_block_with_local_arg1[] = { R"( + #define LOCAL_MEM_SIZE 10 -static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " res[tid] = mul * 7 - 21;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock);" - NL, " if (local_work_size <= 0){ res[tid] = -1; return; }" - NL, " size_t global_work_size = local_work_size * 4;" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t q1 = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);" - NL, "" - NL, " int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" -}; + void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp) + { + for (int i = 0; i < LOCAL_MEM_SIZE; i++) + { + tmp[i] = mul * 7 - 21; + res[tid] += tmp[i]; + } + res[tid] += 2; + } -static const char* enqueue_block_capture_event_profiling_info_after_execution[] = -{ - NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) - NL, "" - NL, "__global ulong value[MAX_GWS*2] = {0};" - NL, "" - NL, "void block_fn(size_t tid, __global int* res)" - NL, "{" - NL, " res[tid] = -2;" - NL, "}" - NL, "" - NL, "void check_res(size_t tid, const clk_event_t evt, __global int* res)" - NL, "{" - NL, " capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);" - NL, "" - NL, " if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;" - NL, " else res[tid] = -4;" - NL, " release_event(evt);" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt1;" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn (tid, res); };" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " void (^checkBlock) (void) = ^{ check_res(tid, block_evt1, res); };" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "}" - NL -}; + kernel void enqueue_block_with_local_arg1(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); -static const char* enqueue_block_capture_event_profiling_info_before_execution[] = -{ - NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) - NL, "" - NL, "__global ulong value[MAX_GWS*2] = {0};" - NL, "" - NL, "void block_fn(size_t tid, __global int* res)" - NL, "{" - NL, " res[tid] = -2;" - NL, "}" - NL, "" - NL, "void check_res(size_t tid, const ulong *value, __global int* res)" - NL, "{" - NL, " if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;" - NL, " else res[tid] = -4;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt1;" - NL, " clk_event_t block_evt2;" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn (tid, res); };" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " void (^checkBlock) (void) = ^{ check_res(tid, &value, res); };" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " release_event(user_evt);" - NL, " release_event(block_evt1);" - NL, " release_event(block_evt2);" - NL, "}" - NL -}; + void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); }; -static const char* enqueue_block_with_barrier[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " if(mul > 0) barrier(CLK_GLOBAL_MEM_FENCE);" - NL, " res[tid] = mul * 7 -21;" - NL, "}" - NL, "" - NL, "void loop_fn(size_t tid, int n, __global int* res)" - NL, "{" - NL, " while(n > 0)" - NL, " {" - NL, " barrier(CLK_GLOBAL_MEM_FENCE);" - NL, " res[tid] = 0;" - NL, " --n;" - NL, " }" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_barrier(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, " queue_t def_q = get_default_queue();" - NL, " res[tid] = -1;" - NL, " size_t n = 256;" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " ndrange_t ndrange = ndrange_1D(n);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); };" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; + res[tid] = -2; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int))); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; -static const char* enqueue_marker_with_block_event[] = -{ - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_marker_with_block_event(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, "" - NL, " clk_event_t block_evt1;" - NL, " clk_event_t marker_evt;" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1," - NL, " ^{" - NL, " res[tid] = BLOCK_COMPLETED;" - NL, " });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }" - NL, "" - NL, " enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " retain_event(marker_evt);" - NL, " release_event(marker_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(block_evt1);" - NL, " release_event(marker_evt);" - NL, " release_event(user_evt);" - NL, "}" - NL -}; +static const char* enqueue_block_with_local_arg2[] = { R"( + #define LOCAL_MEM_SIZE 10 -static const char* enqueue_marker_with_user_event[] = -{ - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_marker_with_user_event(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, " uint multiplier = 7;" - NL, "" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, "" - NL, " clk_event_t marker_evt;" - NL, " clk_event_t block_evt;" - NL, "" - NL, " int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " retain_event(marker_evt);" - NL, " release_event(marker_evt);" - NL, "" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt, " - NL, " ^{" - NL, " if(res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] != BLOCK_SUBMITTED) { res[tid] = -2; return; }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(block_evt);" - NL, " release_event(marker_evt);" - NL, " release_event(user_evt);" - NL, "}" - NL -}; + void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2) + { + for (int i = 0; i < LOCAL_MEM_SIZE; i++) + { + tmp1[i] = mul * 7 - 21; + tmp2[i].x = (float)(mul * 7 - 21); + tmp2[i].y = (float)(mul * 7 - 21); + tmp2[i].z = (float)(mul * 7 - 21); + tmp2[i].w = (float)(mul * 7 - 21); + + res[tid] += tmp1[i]; + res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w); + } + res[tid] += 2; + } -static const char* enqueue_marker_with_mixed_events[] = -{ - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_marker_with_mixed_events(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " clk_event_t mix_ev[2];" - NL, " mix_ev[0] = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1]," - NL, " ^{" - NL, " res[tid] = BLOCK_COMPLETED;" - NL, " });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }" - NL, "" - NL, " clk_event_t marker_evt;" - NL, "" - NL, " enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " retain_event(marker_evt);" - NL, " release_event(marker_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(mix_ev[0], CL_COMPLETE);" - NL, "" - NL, " release_event(mix_ev[1]);" - NL, " release_event(marker_evt);" - NL, " release_event(mix_ev[0]);" - NL, "}" - NL -}; + kernel void enqueue_block_with_local_arg2(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); -static const char* enqueue_block_with_mixed_events[] = -{ - NL, "kernel void enqueue_block_with_mixed_events(__global int* res)" - NL, "{" - NL, " int enq_res;" - NL, " size_t tid = get_global_id(0);" - NL, " clk_event_t mix_ev[3];" - NL, " mix_ev[0] = create_user_event();" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " res[tid] = -2;" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -4; return; }" - NL, "" - NL, " set_user_event_status(mix_ev[0], CL_COMPLETE);" - NL, "" - NL, " release_event(mix_ev[0]);" - NL, " release_event(mix_ev[1]);" - NL, " release_event(mix_ev[2]);" - NL, "}" - NL -}; + void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2) + { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); }; + + res[tid] = -2; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4))); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_block_with_wait_list[] = { R"( + #define BLOCK_SUBMITTED 1 + #define BLOCK_COMPLETED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_block_with_wait_list(__global int* res) + { + size_t tid = get_global_id(0); + + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt; + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, + ^{ + res[tid] = BLOCK_COMPLETED; + }); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + retain_event(block_evt); + release_event(block_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(user_evt); + release_event(block_evt); + } +)" }; + +static const char* enqueue_block_with_wait_list_and_local_arg[] = { R"( + #define LOCAL_MEM_SIZE 10 + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define BLOCK_STARTED 3 + #define CHECK_SUCCESS 0 + + void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp) + { + res[tid] = BLOCK_STARTED; + for (int i = 0; i < LOCAL_MEM_SIZE; i++) + { + tmp[i] = mul * 7 - 21; + res[tid] += tmp[i]; + } + if (res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED; + } + + kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt; + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, + ^(__local void* buf) { + block_fn_local_arg(tid, multiplier, res, (__local int*)buf); + }, LOCAL_MEM_SIZE*sizeof(int)); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + retain_event(block_evt); + release_event(block_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(user_evt); + release_event(block_evt); + } +)" }; + +static const char* enqueue_block_get_kernel_work_group_size[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + res[tid] = mul * 7 - 21; + } + + kernel void enqueue_block_get_kernel_work_group_size(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; + + size_t local_work_size = get_kernel_work_group_size(kernelBlock); + if (local_work_size <= 0){ res[tid] = -1; return; } + size_t global_work_size = local_work_size * 4; + + res[tid] = -1; + queue_t q1 = get_default_queue(); + ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size); + + int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + res[tid] = mul * 7 - 21; + } + + kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; + + size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock); + if (local_work_size <= 0){ res[tid] = -1; return; } + size_t global_work_size = local_work_size * 4; + + res[tid] = -1; + queue_t q1 = get_default_queue(); + ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size); + + int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_block_capture_event_profiling_info_after_execution[] = { + "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) "\n" + , R"( + __global ulong value[MAX_GWS*2] = {0}; + + void block_fn(size_t tid, __global int* res) + { + res[tid] = -2; + } + + void check_res(size_t tid, const clk_event_t evt, __global int* res) + { + capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]); + + if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0; + else res[tid] = -4; + release_event(evt); + } + + kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res) + { + size_t tid = get_global_id(0); + + res[tid] = -1; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt1; + + void (^kernelBlock)(void) = ^{ block_fn (tid, res); }; + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + void (^checkBlock) (void) = ^{ check_res(tid, block_evt1, res); }; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + } +)" }; + +static const char* enqueue_block_capture_event_profiling_info_before_execution[] = { + "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) "\n" + , R"( + __global ulong value[MAX_GWS*2] = {0}; + + void block_fn(size_t tid, __global int* res) + { + res[tid] = -2; + } + + void check_res(size_t tid, const ulong *value, __global int* res) + { + if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0; + else res[tid] = -4; + } + + kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + clk_event_t user_evt = create_user_event(); + + res[tid] = -1; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt1; + clk_event_t block_evt2; + + void (^kernelBlock)(void) = ^{ block_fn (tid, res); }; + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]); + + set_user_event_status(user_evt, CL_COMPLETE); + + void (^checkBlock) (void) = ^{ check_res(tid, &value, res); }; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + release_event(user_evt); + release_event(block_evt1); + release_event(block_evt2); + } +)" }; + +static const char* enqueue_block_with_barrier[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + if (mul > 0) barrier(CLK_GLOBAL_MEM_FENCE); + res[tid] = mul * 7 -21; + } + + void loop_fn(size_t tid, int n, __global int* res) + { + while (n > 0) + { + barrier(CLK_GLOBAL_MEM_FENCE); + res[tid] = 0; + --n; + } + } + + kernel void enqueue_block_with_barrier(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + queue_t def_q = get_default_queue(); + res[tid] = -1; + size_t n = 256; + + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; + + ndrange_t ndrange = ndrange_1D(n); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); }; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_marker_with_block_event[] = { R"( + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_marker_with_block_event(__global int* res) + { + size_t tid = get_global_id(0); + + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + + clk_event_t block_evt1; + clk_event_t marker_evt; + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, + ^{ + res[tid] = BLOCK_COMPLETED; + }); + if (enq_res != CLK_SUCCESS) { res[tid] = -2; return; } + + enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + retain_event(marker_evt); + release_event(marker_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(block_evt1); + release_event(marker_evt); + release_event(user_evt); + } +)" }; + +static const char* enqueue_marker_with_user_event[] = { R"( + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_marker_with_user_event(__global int* res) + { + size_t tid = get_global_id(0); + uint multiplier = 7; + + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + + clk_event_t marker_evt; + clk_event_t block_evt; + + int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + retain_event(marker_evt); + release_event(marker_evt); + + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt, + ^{ + if (res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS; + }); + + //check block is not started + if (res[tid] != BLOCK_SUBMITTED) { res[tid] = -2; return; } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(block_evt); + release_event(marker_evt); + release_event(user_evt); + } +)" }; + +static const char* enqueue_marker_with_mixed_events[] = { R"( + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_marker_with_mixed_events(__global int* res) + { + size_t tid = get_global_id(0); + + clk_event_t mix_ev[2]; + mix_ev[0] = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], + ^{ + res[tid] = BLOCK_COMPLETED; + }); + if (enq_res != CLK_SUCCESS) { res[tid] = -2; return; } + + clk_event_t marker_evt; + + enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + retain_event(marker_evt); + release_event(marker_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(mix_ev[0], CL_COMPLETE); + + release_event(mix_ev[1]); + release_event(marker_evt); + release_event(mix_ev[0]); + } +)" }; + +static const char* enqueue_block_with_mixed_events[] = { R"( + kernel void enqueue_block_with_mixed_events(__global int* res) + { + int enq_res; + size_t tid = get_global_id(0); + clk_event_t mix_ev[3]; + mix_ev[0] = create_user_event(); + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + res[tid] = -2; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; }); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; }); + if (enq_res != CLK_SUCCESS) { res[tid] = -4; return; } + + set_user_event_status(mix_ev[0], CL_COMPLETE); + + release_event(mix_ev[0]); + release_event(mix_ev[1]); + release_event(mix_ev[2]); + } +)" }; +// clang-format on static const kernel_src sources_enqueue_block[] = { diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt index 4b9968c39..098fb5be6 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt @@ -14,6 +14,7 @@ set(${MODULE_NAME}_SOURCES command_buffer_test_copy.cpp command_buffer_test_barrier.cpp command_buffer_test_event_info.cpp + command_buffer_finalize.cpp ) include(../../CMakeCommon.txt) diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp index 43734da0a..6c02f9f78 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp @@ -201,14 +201,33 @@ struct BasicEnqueueTest : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(num_elements); + std::vector output_data_1(num_elements); error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), - output_data.data(), 0, nullptr, nullptr); + output_data_1.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < num_elements; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern, output_data_1[i], i); + } + + const cl_int new_pattern = 12; + error = clEnqueueFillBuffer(queue, in_mem, &new_pattern, sizeof(cl_int), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(num_elements); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), + output_data_2.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(new_pattern, output_data_2[i], i); } return CL_SUCCESS; diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index e06258335..0d4dd0399 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -3,6 +3,12 @@ set(MODULE_NAME CL_KHR_MUTABLE_DISPATCH) set(${MODULE_NAME}_SOURCES main.cpp mutable_command_info.cpp + mutable_command_image_arguments.cpp + mutable_command_arguments.cpp + mutable_command_out_of_order.cpp + mutable_command_global_size.cpp + mutable_command_local_size.cpp + mutable_command_global_offset.cpp ../basic_command_buffer.cpp ) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp index 97075792b..a2fae4974 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp @@ -26,6 +26,18 @@ test_definition test_list[] = { ADD_TEST(mutable_command_info_global_work_offset), ADD_TEST(mutable_command_info_local_work_size), ADD_TEST(mutable_command_info_global_work_size), + ADD_TEST(mutable_dispatch_image_1d_arguments), + ADD_TEST(mutable_dispatch_image_2d_arguments), + ADD_TEST(mutable_dispatch_out_of_order), + ADD_TEST(mutable_dispatch_simultaneous_out_of_order), + ADD_TEST(mutable_dispatch_global_size), + ADD_TEST(mutable_dispatch_local_size), + ADD_TEST(mutable_dispatch_global_offset), + ADD_TEST(mutable_dispatch_svm_arguments), + ADD_TEST(mutable_dispatch_local_arguments), + ADD_TEST(mutable_dispatch_global_arguments), + ADD_TEST(mutable_dispatch_pod_arguments), + ADD_TEST(mutable_dispatch_null_arguments), }; int main(int argc, const char *argv[]) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_arguments.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_arguments.cpp new file mode 100644 index 000000000..5c8291f05 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_arguments.cpp @@ -0,0 +1,847 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "testHarness.h" +#include "imageHelpers.h" +#include "mutable_command_basic.h" + +#include +#include +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases for +// CL_MUTABLE_DISPATCH_ARGUMENTS_KHR: +// - __global arguments +// - __local arguments +// - plain-old-data arguments +// - NULL arguments +// - SVM arguments + +struct MutableDispatchGlobalArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchGlobalArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + return 0; + } + + cl_int Run() override + { + cl_int error; + + // Create kernel + + const char *sample_const_arg_kernel = + R"( + __kernel void sample_test(__constant int *src, __global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = src[tid]; + })"; + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + // Create and initialize buffers + + MTdataHolder d(gRandomSeed); + + std::vector srcData(num_elements); + for (size_t i = 0; i < num_elements; i++) + srcData[i] = (cl_int)genrand_int32(d); + + clMemWrapper srcBuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + num_elements * sizeof(cl_int), + srcData.data(), &error); + test_error(error, "Creating src buffer"); + + clMemWrapper dstBuf0 = + clCreateBuffer(context, CL_MEM_READ_WRITE, + num_elements * sizeof(cl_int), NULL, &error); + test_error(error, "Creating initial dst buffer failed"); + + clMemWrapper dstBuf1 = + clCreateBuffer(context, CL_MEM_READ_WRITE, + num_elements * sizeof(cl_int), NULL, &error); + test_error(error, "Creating updated dst buffer failed"); + + // Build and execute the command buffer for the initial execution + + error = clSetKernelArg(kernel, 0, sizeof(srcBuf), &srcBuf); + test_error(error, "Unable to set src kernel arguments"); + + error = clSetKernelArg(kernel, 1, sizeof(dstBuf0), &dstBuf0); + test_error(error, "Unable to set initial dst kernel argument"); + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the initial execution + + std::vector dstData0(num_elements); + error = clEnqueueReadBuffer(queue, dstBuf0, CL_TRUE, 0, + num_elements * sizeof(cl_int), + dstData0.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer for initial dst failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (srcData[i] != dstData0[i]) + { + log_error("Initial data failed to verify: src[%zu]=%d != " + "dst[%zu]=%d\n", + i, srcData[i], i, dstData0[i]); + return TEST_FAIL; + } + } + + // Modify and execute the command buffer + + cl_mutable_dispatch_arg_khr arg{ 1, sizeof(dstBuf1), &dstBuf1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + &arg /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the modified execution + + std::vector dstData1(num_elements); + error = clEnqueueReadBuffer(queue, dstBuf1, CL_TRUE, 0, + num_elements * sizeof(cl_int), + dstData1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer for modified dst failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (srcData[i] != dstData1[i]) + { + log_error("Initial data failed to verify: src[%zu]=%d != " + "dst[%zu]=%d\n", + i, srcData[i], i, dstData1[i]); + return TEST_FAIL; + } + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; +}; + +struct MutableDispatchLocalArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchLocalArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + return 0; + } + + cl_int Run() override + { + const char *sample_const_arg_kernel = + R"( + __kernel void sample_test(__constant int *src1, __local int + *src, __global int *dst) + { + size_t tid = get_global_id(0); + src[tid] = src1[tid]; + dst[tid] = src[tid]; + })"; + + cl_int error; + clProgramWrapper program; + clKernelWrapper kernel; + size_t threads[1], localThreads[1]; + std::vector constantData; + std::vector resultData; + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + MTdataHolder d(gRandomSeed); + + size_t sizeToAllocate = + ((size_t)max_size / sizeof(cl_int)) * sizeof(cl_int); + size_t numberOfInts = sizeToAllocate / sizeof(cl_int); + constantData.resize(sizeToAllocate / sizeof(cl_int)); + resultData.resize(sizeToAllocate / sizeof(cl_int)); + + for (size_t i = 0; i < numberOfInts; i++) + constantData[i] = (cl_int)genrand_int32(d); + + clMemWrapper streams[2]; + streams[0] = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate, + constantData.data(), &error); + test_error(error, "Creating test array failed"); + streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, + nullptr, &error); + test_error(error, "Creating test array failed"); + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = + clSetKernelArg(kernel, 1, numberOfInts * sizeof(cl_int), nullptr); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); + + threads[0] = numberOfInts; + localThreads[0] = 1; + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, threads, + localThreads, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(cl_mem), nullptr }; + cl_mutable_dispatch_arg_khr args[] = { arg_1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = + clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < numberOfInts; i++) + if (constantData[i] != resultData[i]) + { + log_error("Data failed to verify: constantData[%d]=%d != " + "resultData[%d]=%d\n", + i, constantData[i], i, resultData[i]); + return TEST_FAIL; + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; + const cl_ulong max_size = 16; +}; + +struct MutableDispatchPODArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchPODArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + cl_int Run() override + { + const char *sample_const_arg_kernel = + R"( + __kernel void sample_test(__constant int *src, int dst) + { + size_t tid = get_global_id(0); + dst = src[tid]; + })"; + + cl_int error; + clProgramWrapper program; + clKernelWrapper kernel; + size_t threads[1], localThreads[1]; + std::vector constantData; + std::vector resultData; + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + MTdataHolder d(gRandomSeed); + + size_t sizeToAllocate = + ((size_t)max_size / sizeof(cl_int)) * sizeof(cl_int); + size_t numberOfInts = sizeToAllocate / sizeof(cl_int); + constantData.resize(sizeToAllocate / sizeof(cl_int)); + resultData.resize(sizeToAllocate / sizeof(cl_int)); + + for (size_t i = 0; i < numberOfInts; i++) + constantData[i] = (cl_int)genrand_int32(d); + + clMemWrapper stream; + stream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate, + constantData.data(), &error); + test_error(error, "Creating test array failed"); + + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream); + test_error(error, "Unable to set indexed kernel arguments"); + cl_int intarg = 10; + error = clSetKernelArg(kernel, 1, sizeof(cl_int), &intarg); + test_error(error, "Unable to set indexed kernel arguments"); + + threads[0] = numberOfInts; + localThreads[0] = 1; + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, threads, + localThreads, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + intarg = 20; + cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(cl_int), &intarg }; + cl_mutable_dispatch_arg_khr args[] = { arg_1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < numberOfInts; i++) + if (constantData[i] != resultData[i]) + { + log_error("Data failed to verify: constantData[%d]=%d != " + "resultData[%d]=%d\n", + i, constantData[i], i, resultData[i]); + return TEST_FAIL; + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; + const cl_ulong max_size = 16; +}; + +struct MutableDispatchNullArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchNullArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + cl_int Run() override + { + cl_int error; + + // Create kernel + + const char *sample_const_arg_kernel = + R"( + __kernel void sample_test(__constant int *src, __global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = src ? src[tid] : 12345; + })"; + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + MTdataHolder d(gRandomSeed); + + std::vector srcData(num_elements); + for (size_t i = 0; i < num_elements; i++) + srcData[i] = (cl_int)genrand_int32(d); + + clMemWrapper srcBuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + num_elements * sizeof(cl_int), + srcData.data(), &error); + test_error(error, "Creating src buffer"); + + clMemWrapper dstBuf = + clCreateBuffer(context, CL_MEM_READ_WRITE, + num_elements * sizeof(cl_int), NULL, &error); + test_error(error, "Creating dst buffer failed"); + + // Build and execute the command buffer for the initial execution + + error = clSetKernelArg(kernel, 0, sizeof(srcBuf), &srcBuf); + test_error(error, "Unable to set src kernel arguments"); + + error = clSetKernelArg(kernel, 1, sizeof(dstBuf), &dstBuf); + test_error(error, "Unable to set initial dst kernel argument"); + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the initial execution + + std::vector dstData0(num_elements); + error = clEnqueueReadBuffer(queue, dstBuf, CL_TRUE, 0, + num_elements * sizeof(cl_int), + dstData0.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer for initial dst failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (srcData[i] != dstData0[i]) + { + log_error("Initial data failed to verify: src[%zu]=%d != " + "dst[%zu]=%d\n", + i, srcData[i], i, dstData0[i]); + return TEST_FAIL; + } + } + + // Modify and execute the command buffer + + cl_mutable_dispatch_arg_khr arg{ 0, sizeof(cl_mem), nullptr }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + &arg /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the modified execution + + std::vector dstData1(num_elements); + error = clEnqueueReadBuffer(queue, dstBuf, CL_TRUE, 0, + num_elements * sizeof(cl_int), + dstData1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer for modified dst failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (12345 != dstData1[i]) + { + log_error("Modified data failed to verify: %d != dst[%zu]=%d\n", + 12345, i, dstData1[i]); + return TEST_FAIL; + } + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; + const cl_ulong max_size = 16; +}; + +struct MutableDispatchSVMArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchSVMArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + bool Skip() override + { + cl_device_svm_capabilities svm_caps; + bool svm_capabilities = + !clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES, + sizeof(svm_caps), &svm_caps, NULL) + && svm_caps != 0; + + return !svm_capabilities || BasicMutableCommandBufferTest::Skip(); + } + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + const char *svm_arguments_kernel = + R"( + typedef struct { + global int* ptr; + } wrapper; + __kernel void test_svm_arguments(__global wrapper* pWrapper) + { + size_t i = get_global_id(0); + pWrapper->ptr[i]++; + })"; + + create_single_kernel_helper(context, &program, &kernel, 1, + &svm_arguments_kernel, + "test_svm_arguments"); + + return 0; + } + + cl_int Run() override + { + const cl_int zero = 0; + cl_int error; + + // Allocate and initialize SVM for initial execution + + cl_int *initWrapper = (cl_int *)clSVMAlloc(context, CL_MEM_READ_WRITE, + sizeof(cl_int *), 0); + cl_int *initBuffer = (cl_int *)clSVMAlloc( + context, CL_MEM_READ_WRITE, num_elements * sizeof(cl_int), 0); + test_assert_error(initWrapper != nullptr && initBuffer != nullptr, + "clSVMAlloc failed for initial execution"); + + error = clEnqueueSVMMemcpy(queue, CL_TRUE, initWrapper, &initBuffer, + sizeof(cl_int *), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMemcpy failed for initWrapper"); + + error = clEnqueueSVMMemFill(queue, initBuffer, &zero, sizeof(zero), + num_elements * sizeof(cl_int), 0, nullptr, + nullptr); + test_error(error, "clEnqueueSVMMemFill failed for initBuffer"); + + // Allocate and initialize SVM for modified execution + + cl_int *newWrapper = + (cl_int *)clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0); + cl_int *newBuffer = (cl_int *)clSVMAlloc( + context, CL_MEM_READ_WRITE, num_elements * sizeof(cl_int), 0); + test_assert_error(newWrapper != nullptr && newBuffer != nullptr, + "clSVMAlloc failed for modified execution"); + + error = clEnqueueSVMMemcpy(queue, CL_TRUE, newWrapper, &newBuffer, + sizeof(cl_int *), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMemcpy failed for newWrapper"); + + error = clEnqueueSVMMemFill(queue, newBuffer, &zero, sizeof(zero), + num_elements * sizeof(cl_int), 0, nullptr, + nullptr); + test_error(error, "clEnqueueSVMMemFill failed for newB"); + + // Build and execute the command buffer for the initial execution + + error = clSetKernelArgSVMPointer(kernel, 0, initWrapper); + test_error(error, "clSetKernelArg failed for initWrapper"); + + error = clSetKernelExecInfo(kernel, CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(initBuffer), &initBuffer); + test_error(error, "clSetKernelExecInfo failed for initBuffer"); + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR + | CL_MUTABLE_DISPATCH_EXEC_INFO_KHR, + 0 + }; + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + // Check the results of the initial execution + + error = + clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_READ, initBuffer, + num_elements * sizeof(cl_int), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMap failed for initBuffer"); + + for (size_t i = 0; i < num_elements; i++) + { + if (initBuffer[i] != 1) + { + log_error("Initial verification failed at index %zu: Got %d, " + "wanted 1\n", + i, initBuffer[i]); + return TEST_FAIL; + } + } + + error = clEnqueueSVMUnmap(queue, initBuffer, 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMUnmap failed for initBuffer"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + // Modify and execute the command buffer + + cl_mutable_dispatch_arg_khr arg_svm{}; + arg_svm.arg_index = 0; + arg_svm.arg_value = newWrapper; + + cl_mutable_dispatch_exec_info_khr exec_info{}; + exec_info.param_name = CL_KERNEL_EXEC_INFO_SVM_PTRS; + exec_info.param_value_size = sizeof(newBuffer); + exec_info.param_value = &newBuffer; + + cl_mutable_dispatch_config_khr dispatch_config{}; + dispatch_config.type = CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR; + dispatch_config.command = command; + dispatch_config.num_svm_args = 1; + dispatch_config.arg_svm_list = &arg_svm; + dispatch_config.num_exec_infos = 1; + dispatch_config.exec_info_list = &exec_info; + + cl_mutable_base_config_khr mutable_config{}; + mutable_config.type = CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR; + mutable_config.num_mutable_dispatch = 1; + mutable_config.mutable_dispatch_list = &dispatch_config; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the modified execution + + error = + clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_READ, newBuffer, + num_elements * sizeof(cl_int), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMap failed for newBuffer"); + + for (size_t i = 0; i < num_elements; i++) + { + if (newBuffer[i] != 1) + { + log_error("Modified verification failed at index %zu: Got %d, " + "wanted 1\n", + i, newBuffer[i]); + return TEST_FAIL; + } + } + + error = clEnqueueSVMUnmap(queue, newBuffer, 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMUnmap failed for newBuffer"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + // Clean up + + clSVMFree(context, initWrapper); + clSVMFree(context, initBuffer); + clSVMFree(context, newWrapper); + clSVMFree(context, newBuffer); + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; +}; + + +int test_mutable_dispatch_local_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} + +int test_mutable_dispatch_global_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest(device, context, + queue, num_elements); +} + +int test_mutable_dispatch_pod_arguments(cl_device_id device, cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} + +int test_mutable_dispatch_null_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} + +int test_mutable_dispatch_svm_arguments(cl_device_id device, cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} \ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h index 966695834..c88c14d1c 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h @@ -19,6 +19,17 @@ #include "../basic_command_buffer.h" #include "../command_buffer_test_base.h" +// If it is supported get the addresses of all the APIs here. +#define GET_EXTENSION_ADDRESS(FUNC) \ + FUNC = reinterpret_cast( \ + clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \ + if (FUNC == nullptr) \ + { \ + log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \ + " with " #FUNC "\n"); \ + return TEST_FAIL; \ + } + struct BasicMutableCommandBufferTest : BasicCommandBufferTest { BasicMutableCommandBufferTest(cl_device_id device, cl_context context, @@ -84,24 +95,52 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest &platform, nullptr); test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); - // If it is supported get the addresses of all the APIs here. -#define GET_EXTENSION_ADDRESS(FUNC) \ - FUNC = reinterpret_cast( \ - clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \ - if (FUNC == nullptr) \ - { \ - log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \ - " with " #FUNC "\n"); \ - return TEST_FAIL; \ + GET_EXTENSION_ADDRESS(clUpdateMutableCommandsKHR); + + return CL_SUCCESS; } + + clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; + + const char* kernelString = "__kernel void empty() {}"; + const size_t global_work_size = 4 * 16; +}; + +struct InfoMutableCommandBufferTest : BasicMutableCommandBufferTest +{ + InfoMutableCommandBufferTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + cl_int error = init_extension_functions(); + test_error(error, "Unable to initialise extension functions"); + + return CL_SUCCESS; + } + + cl_int init_extension_functions() + { + BasicCommandBufferTest::init_extension_functions(); + + cl_platform_id platform; + cl_int error = + clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), + &platform, nullptr); + test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); + GET_EXTENSION_ADDRESS(clGetMutableCommandInfoKHR); return CL_SUCCESS; } clGetMutableCommandInfoKHR_fn clGetMutableCommandInfoKHR = nullptr; - const char* kernelString = "__kernel void empty() {}"; - const size_t global_work_size = 4 * sizeof(cl_int); }; -#endif // CL_KHR_MUTABLE_COMMAND_BASIC_H +#undef GET_EXTENSION_ADDRESS + +#endif //_CL_KHR_MUTABLE_COMMAND_BASIC_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp new file mode 100644 index 000000000..80bc015a3 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp @@ -0,0 +1,170 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include "imageHelpers.h" +#include "mutable_command_basic.h" + +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases: +// +// CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR + +struct MutableDispatchGlobalOffset : InfoMutableCommandBufferTest +{ + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; + + MutableDispatchGlobalOffset(cl_device_id device, cl_context context, + cl_command_queue queue) + : InfoMutableCommandBufferTest(device, context, queue) + {} + + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR; + + return !mutable_support || InfoMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + const char *global_offset_kernel = + R"( + __kernel void sample_test(__global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = get_global_offset(0); + })"; + + cl_int error = + create_single_kernel_helper(context, &program, &kernel, 1, + &global_offset_kernel, "sample_test"); + test_error(error, "Creating kernel failed"); + + clMemWrapper stream; + stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, + nullptr, &error); + test_error(error, "Creating test array failed"); + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &global_work_size, nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 0 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + nullptr /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + &update_global_offset /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clGetMutableCommandInfoKHR( + command, CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR, + sizeof(info_global_offset), &info_global_offset, nullptr); + test_error(error, "clGetMutableCommandInfoKHR failed"); + + if (info_global_offset != update_global_offset) + { + log_error("ERROR: Wrong size returned from " + "clGetMutableCommandInfoKHR."); + return TEST_FAIL; + } + + std::vector resultData; + resultData.resize(num_elements); + + error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + if (i < update_global_offset && 0 != resultData[i]) + { + log_error("Data failed to verify: update_global_offset != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + else if (i >= update_global_offset + && update_global_offset != resultData[i]) + { + log_error("Data failed to verify: update_global_offset != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + return CL_SUCCESS; + } + + size_t info_global_offset = 0; + const size_t update_global_offset = 3; + const size_t sizeToAllocate = + (global_work_size + update_global_offset) * sizeof(cl_int); + const size_t num_elements = sizeToAllocate / sizeof(cl_int); + cl_mutable_command_khr command = nullptr; +}; + +int test_mutable_dispatch_global_offset(cl_device_id device, cl_context context, + cl_command_queue queue, + int num_elements) +{ + + return MakeAndRunTest(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp new file mode 100644 index 000000000..091f0c8d3 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp @@ -0,0 +1,167 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include "imageHelpers.h" +#include "mutable_command_basic.h" + +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases: +// +// CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR + +struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest +{ + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; + + MutableDispatchGlobalSize(cl_device_id device, cl_context context, + cl_command_queue queue) + : InfoMutableCommandBufferTest(device, context, queue) + {} + + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR; + + return !mutable_support || InfoMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + const char *global_size_kernel = + R"( + __kernel void sample_test(__global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = get_global_size(0); + })"; + + cl_int error = create_single_kernel_helper( + context, &program, &kernel, 1, &global_size_kernel, "sample_test"); + test_error(error, "Creating kernel failed"); + + clMemWrapper stream; + stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, + nullptr, &error); + test_error(error, "Creating test array failed"); + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &global_work_size, nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 0 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + nullptr /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + &update_global_size /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clGetMutableCommandInfoKHR( + command, CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR, + sizeof(info_global_size), &info_global_size, nullptr); + test_error(error, "clGetMutableCommandInfoKHR failed"); + + if (info_global_size != update_global_size) + { + log_error("ERROR: Wrong size returned from " + "clGetMutableCommandInfoKHR."); + return TEST_FAIL; + } + + std::vector resultData; + resultData.resize(num_elements); + + error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + if (i >= update_global_size && global_work_size != resultData[i]) + { + log_error("Data failed to verify: update_global_size != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + else if (i < update_global_size + && update_global_size != resultData[i]) + { + log_error("Data failed to verify: update_global_size != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + + return CL_SUCCESS; + } + + size_t info_global_size = 0; + const size_t update_global_size = 3; + const size_t sizeToAllocate = global_work_size; + const size_t num_elements = sizeToAllocate / sizeof(cl_int); + cl_mutable_command_khr command = nullptr; +}; + +int test_mutable_dispatch_global_size(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_image_arguments.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_image_arguments.cpp new file mode 100644 index 000000000..b1ce25ec1 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_image_arguments.cpp @@ -0,0 +1,427 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include "imageHelpers.h" +#include "mutable_command_basic.h" + +#include +#include +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases for +// CL_MUTABLE_DISPATCH_ARGUMENTS_KHR: +// - image arguments + +struct MutableDispatchImage1DArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchImage1DArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + return CL_SUCCESS; + } + + bool Skip() override + { + cl_bool image_support; + + cl_int error = + clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, + sizeof(image_support), &image_support, nullptr); + test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed"); + + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; + + return (!mutable_support || !image_support) + || BasicMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + const char *sample_const_arg_kernel = + R"(__kernel void sample_test( read_only image1d_t source, sampler_t + sampler, write_only image1d_t dest) + { + int offset = get_global_id(0); + + int4 color = read_imagei( source, sampler, offset ); + + write_imagei( dest, offset, color ); + })"; + + cl_int error; + clProgramWrapper program; + clKernelWrapper kernel; + + cl_image_desc image_desc; + memset(&image_desc, 0x0, sizeof(cl_image_desc)); + image_desc.image_type = CL_MEM_OBJECT_IMAGE1D; + image_desc.image_width = 4; + image_desc.image_row_pitch = 0; + image_desc.num_mip_levels = 0; + + const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; + + image_descriptor imageInfo = { 0 }; + imageInfo.type = CL_MEM_OBJECT_IMAGE1D; + imageInfo.format = &formats; + imageInfo.width = 4; + + BufferOwningPtr imageValues_input, imageValues_output, outputData; + MTdataHolder d(gRandomSeed); + generate_random_image_data(&imageInfo, imageValues_input, d); + generate_random_image_data(&imageInfo, imageValues_output, d); + generate_random_image_data(&imageInfo, outputData, d); + + char *host_ptr_input = (char *)imageValues_input; + char *host_ptr_output = (char *)imageValues_output; + + clMemWrapper src_image = create_image_1d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, 0, host_ptr_input, nullptr, &error); + test_error(error, "create_image_1d failed"); + + clMemWrapper dst_image = create_image_1d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, 0, host_ptr_output, nullptr, &error); + test_error(error, "create_image_2d failed"); + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + clSamplerWrapper sampler = clCreateSampler( + context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error); + test_error(error, "Unable to create sampler"); + + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &src_image); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 1, sizeof(cl_sampler), &sampler); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dst_image); + test_error(error, "Unable to set indexed kernel arguments"); + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + size_t globalDim[3] = { 4, 1, 1 }, localDim[3] = { 1, 1, 1 }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, globalDim, + localDim, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + clMemWrapper new_image = create_image_1d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, 0, host_ptr_output, nullptr, &error); + test_error(error, "create_image_1d failed"); + + cl_mutable_dispatch_arg_khr arg_2{ 2, sizeof(cl_mem), &new_image }; + cl_mutable_dispatch_arg_khr args[] = { arg_2 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + size_t origin[3] = { 0, 0, 0 }; + size_t region[3] = { image_desc.image_width, 1, 1 }; + + error = clEnqueueReadImage(queue, new_image, CL_TRUE, origin, region, 0, + 0, outputData, 0, nullptr, nullptr); + test_error(error, "clEnqueueReadImage failed"); + + for (size_t i = 0; i < imageInfo.width; ++i) + { + if (imageValues_input[i] != outputData[i]) + { + log_error("Data failed to verify: imageValues[%d]=%d != " + "outputData[%d]=%d\n", + i, imageValues_input[i], i, outputData[i]); + + return TEST_FAIL; + } + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; +}; + +struct MutableDispatchImage2DArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchImage2DArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + return CL_SUCCESS; + } + + bool Skip() override + { + cl_bool image_support; + + cl_int error = + clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, + sizeof(image_support), &image_support, nullptr); + test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed"); + + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; + + return (!mutable_support || !image_support) + || BasicMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + + const char *sample_const_arg_kernel = + R"(__kernel void sample_test( read_only image2d_t source, sampler_t + sampler, write_only image2d_t dest) + { + int x = get_global_id(0); + int y = get_global_id(1); + + int4 color = read_imagei( source, sampler, (int2) (x, y) ); + + write_imagei( dest, (int2) (x, y), color ); + })"; + + cl_int error; + clProgramWrapper program; + clKernelWrapper kernel; + + cl_image_desc image_desc; + memset(&image_desc, 0x0, sizeof(cl_image_desc)); + image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; + image_desc.image_width = 4; + image_desc.image_height = 4; + image_desc.image_row_pitch = 0; + image_desc.num_mip_levels = 0; + + size_t data_size = + image_desc.image_width * image_desc.image_height * sizeof(cl_int); + + const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; + + image_descriptor imageInfo = { 0 }; + imageInfo.type = CL_MEM_OBJECT_IMAGE2D; + imageInfo.width = 4; + imageInfo.height = 4; + imageInfo.format = &formats; + + BufferOwningPtr imageValues_input, imageValues_output; + + MTdataHolder d(gRandomSeed); + generate_random_image_data(&imageInfo, imageValues_input, d); + generate_random_image_data(&imageInfo, imageValues_output, d); + + char *host_ptr_input = (char *)imageValues_input; + char *host_ptr_output = (char *)imageValues_output; + std::vector outputData(data_size); + + clMemWrapper src_image = + create_image_2d(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + &formats, image_desc.image_width, + image_desc.image_height, 0, host_ptr_input, &error); + test_error(error, "create_image_2d failed"); + + clMemWrapper dst_image = create_image_2d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, image_desc.image_height, 0, host_ptr_output, + &error); + test_error(error, "create_image_2d failed"); + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + clSamplerWrapper sampler = clCreateSampler( + context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error); + test_error(error, "Unable to create sampler"); + + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &src_image); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 1, sizeof(cl_sampler), &sampler); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dst_image); + test_error(error, "Unable to set indexed kernel arguments"); + + size_t globalDim[3] = { 4, 4, 1 }, localDim[3] = { 1, 1, 1 }; + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, globalDim, + localDim, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + clMemWrapper new_image = create_image_2d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, image_desc.image_height, 0, + imageValues_output, &error); + test_error(error, "create_image_2d failed"); + + cl_mutable_dispatch_arg_khr arg_2{ 2, sizeof(cl_mem), &new_image }; + cl_mutable_dispatch_arg_khr args[] = { arg_2 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + size_t origin[3] = { 0, 0, 0 }; + size_t region[3] = { image_desc.image_width, image_desc.image_height, + 1 }; + + error = clEnqueueReadImage(queue, new_image, CL_TRUE, origin, region, 0, + 0, outputData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadImage failed"); + + for (size_t i = 0; i < imageInfo.width * imageInfo.height; ++i) + { + if (imageValues_input[i] != outputData[i]) + { + log_error("Data failed to verify: imageValues[%d]=%d != " + "outputData[%d]=%d\n", + i, imageValues_input[i], i, outputData[i]); + return TEST_FAIL; + } + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; +}; + +int test_mutable_dispatch_image_1d_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest(device, context, + queue, num_elements); +} + +int test_mutable_dispatch_image_2d_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest(device, context, + queue, num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp index cc425a4d6..61600dc90 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp @@ -42,13 +42,13 @@ // CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR // CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR -struct InfoDeviceQuery : public BasicMutableCommandBufferTest +struct InfoDeviceQuery : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoDeviceQuery(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -71,12 +71,12 @@ struct InfoDeviceQuery : public BasicMutableCommandBufferTest } }; -struct InfoBuffer : public BasicMutableCommandBufferTest +struct InfoBuffer : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoBuffer(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -108,13 +108,13 @@ struct InfoBuffer : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct PropertiesArray : public BasicMutableCommandBufferTest +struct PropertiesArray : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; PropertiesArray(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -140,7 +140,7 @@ struct PropertiesArray : public BasicMutableCommandBufferTest if (size != sizeof(props) || test_props[0] != props[0] || test_props[1] != props[1]) { - log_error("ERROR: Incorrect command buffer returned from " + log_error("ERROR: Incorrect properties returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -154,12 +154,12 @@ struct PropertiesArray : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct Kernel : public BasicMutableCommandBufferTest +struct Kernel : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; Kernel(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -181,7 +181,7 @@ struct Kernel : public BasicMutableCommandBufferTest // opaque object. if (test_kernel != kernel) { - log_error("ERROR: Incorrect command buffer returned from " + log_error("ERROR: Incorrect kernel returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -195,12 +195,12 @@ struct Kernel : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct Dimensions : public BasicMutableCommandBufferTest +struct Dimensions : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; Dimensions(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -210,8 +210,7 @@ struct Dimensions : public BasicMutableCommandBufferTest &global_work_size, nullptr, 0, nullptr, nullptr, &command); test_error(error, "clCommandNDRangeKernelKHR failed"); - size_t test_dimensions; - + cl_uint test_dimensions = 0; error = clGetMutableCommandInfoKHR( command, CL_MUTABLE_DISPATCH_DIMENSIONS_KHR, sizeof(test_dimensions), &test_dimensions, nullptr); @@ -219,7 +218,7 @@ struct Dimensions : public BasicMutableCommandBufferTest if (test_dimensions != dimensions) { - log_error("ERROR: Incorrect command buffer returned from " + log_error("ERROR: Incorrect dimensions returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -234,12 +233,12 @@ struct Dimensions : public BasicMutableCommandBufferTest const size_t dimensions = 3; }; -struct InfoType : public BasicMutableCommandBufferTest +struct InfoType : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoType(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -271,12 +270,12 @@ struct InfoType : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct InfoQueue : public BasicMutableCommandBufferTest +struct InfoQueue : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoQueue(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -308,13 +307,13 @@ struct InfoQueue : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct InfoGlobalWorkOffset : public BasicMutableCommandBufferTest +struct InfoGlobalWorkOffset : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoGlobalWorkOffset(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -330,7 +329,7 @@ struct InfoGlobalWorkOffset : public BasicMutableCommandBufferTest if (test_global_work_offset != global_work_offset) { - log_error("ERROR: Wrong size returned from " + log_error("ERROR: Wrong global work offset returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -346,13 +345,13 @@ struct InfoGlobalWorkOffset : public BasicMutableCommandBufferTest size_t test_global_work_offset = 0; }; -struct InfoGlobalWorkSize : public BasicMutableCommandBufferTest +struct InfoGlobalWorkSize : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoGlobalWorkSize(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -368,7 +367,7 @@ struct InfoGlobalWorkSize : public BasicMutableCommandBufferTest if (test_global_work_size != global_work_size) { - log_error("ERROR: Wrong size returned from " + log_error("ERROR: Wrong global work size returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -383,13 +382,13 @@ struct InfoGlobalWorkSize : public BasicMutableCommandBufferTest size_t test_global_work_size = 0; }; -struct InfoLocalWorkSize : public BasicMutableCommandBufferTest +struct InfoLocalWorkSize : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoLocalWorkSize(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -405,7 +404,7 @@ struct InfoLocalWorkSize : public BasicMutableCommandBufferTest if (test_local_work_size != local_work_size) { - log_error("ERROR: Wrong size returned from " + log_error("ERROR: Wrong local work size returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_local_size.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_local_size.cpp new file mode 100644 index 000000000..22a9da6d5 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_local_size.cpp @@ -0,0 +1,174 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include "typeWrappers.h" +#include "procs.h" +#include "testHarness.h" +#include "mutable_command_basic.h" +#include + +#include +#include + +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases: +// +// CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR + +struct MutableDispatchLocalSize : public InfoMutableCommandBufferTest +{ + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; + + MutableDispatchLocalSize(cl_device_id device, cl_context context, + cl_command_queue queue) + : InfoMutableCommandBufferTest(device, context, queue) + {} + + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR; + + return !mutable_support || InfoMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + const char *local_size_kernel = + R"( + __kernel void sample_test(__global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = get_local_size(0); + })"; + + cl_int error = create_single_kernel_helper( + context, &program, &kernel, 1, &local_size_kernel, "sample_test"); + test_error(error, "Creating kernel failed"); + + clMemWrapper stream; + stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, + nullptr, &error); + test_error(error, "Creating test array failed"); + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &global_work_size, &local_work_size, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 0 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + nullptr /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + &update_global_size /* global_work_size */, + &update_local_size /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clGetMutableCommandInfoKHR( + command, CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR, + sizeof(info_local_size), &info_local_size, nullptr); + test_error(error, "clGetMutableCommandInfoKHR failed"); + + if (info_local_size != update_local_size) + { + log_error("ERROR: Wrong size returned from " + "clGetMutableCommandInfoKHR."); + return TEST_FAIL; + } + + std::vector resultData; + resultData.resize(num_elements); + + error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + if (i < update_global_size && update_local_size != resultData[i]) + { + log_error("Data failed to verify: update_local_size != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + else if (i >= update_global_size + && local_work_size != resultData[i]) + { + log_error("Data failed to verify: update_local_size != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + + return CL_SUCCESS; + } + + size_t info_local_size = 0; + const size_t global_work_size = 16; + const size_t local_work_size = 8; + const size_t update_global_size = 8; + const size_t update_local_size = 4; + const size_t sizeToAllocate = 64; + const size_t num_elements = sizeToAllocate / sizeof(cl_int); + + cl_mutable_command_khr command = nullptr; +}; + +int test_mutable_dispatch_local_size(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp new file mode 100644 index 000000000..d507dadfa --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp @@ -0,0 +1,454 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include +#include +#include "mutable_command_basic.h" + +#include +#include +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases: +// - simultaneous use +// - cross-queue simultaneous-use + +namespace { + +template +struct OutOfOrderTest : public BasicMutableCommandBufferTest +{ + OutOfOrderTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue), + out_of_order_queue(nullptr), out_of_order_command_buffer(this), + user_event(nullptr), wait_pass_event(nullptr), kernel_fill(nullptr), + program_fill(nullptr) + { + simultaneous_use_requested = simultaneous_request; + if (simultaneous_request) buffer_size_multiplier = 2; + } + + //-------------------------------------------------------------------------- + cl_int SetUpKernel() override + { + cl_int error = BasicMutableCommandBufferTest::SetUpKernel(); + test_error(error, "BasicMutableCommandBufferTest::SetUpKernel failed"); + + // create additional kernel to properly prepare output buffer for test + const char* kernel_str = + R"( + __kernel void fill(int pattern, __global int* out, __global int* + offset) + { + size_t id = get_global_id(0); + size_t ind = offset[0] + id ; + out[ind] = pattern; + })"; + + error = create_single_kernel_helper_create_program( + context, &program_fill, 1, &kernel_str); + test_error(error, "Failed to create program with source"); + + error = + clBuildProgram(program_fill, 1, &device, nullptr, nullptr, nullptr); + test_error(error, "Failed to build program"); + + kernel_fill = clCreateKernel(program_fill, "fill", &error); + test_error(error, "Failed to create copy kernel"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int SetUpKernelArgs() override + { + cl_int error = BasicMutableCommandBufferTest::SetUpKernelArgs(); + test_error(error, + "BasicMutableCommandBufferTest::SetUpKernelArgs failed"); + + error = clSetKernelArg(kernel_fill, 0, sizeof(cl_int), + &overwritten_pattern); + test_error(error, "clSetKernelArg failed"); + + error = clSetKernelArg(kernel_fill, 1, sizeof(out_mem), &out_mem); + test_error(error, "clSetKernelArg failed"); + + error = clSetKernelArg(kernel_fill, 2, sizeof(off_mem), &off_mem); + test_error(error, "clSetKernelArg failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int SetUp(int elements) override + { + cl_int error = BasicMutableCommandBufferTest::SetUp(elements); + test_error(error, "BasicMutableCommandBufferTest::SetUp failed"); + + error = SetUpKernel(); + test_error(error, "SetUpKernel failed"); + + out_of_order_queue = clCreateCommandQueue( + context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error); + test_error(error, "Unable to create command queue to test with"); + + cl_command_buffer_properties_khr properties[3] = { + CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_MUTABLE_KHR, 0 + }; + + out_of_order_command_buffer = clCreateCommandBufferKHR( + 1, &out_of_order_queue, properties, &error); + test_error(error, "clCreateCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; + + + return !out_of_order_support + || (simultaneous_use_requested && !simultaneous_use_support) + || !mutable_support || BasicMutableCommandBufferTest::Skip(); + } + + //-------------------------------------------------------------------------- + cl_int Run() override + { + cl_int error = CL_SUCCESS; + + if (simultaneous_use_support) + { + // enqueue simultaneous command-buffers with out-of-order calls + error = RunSimultaneous(); + test_error(error, "RunSimultaneous failed"); + } + else + { + // enqueue single command-buffer with out-of-order calls + error = RunSingle(); + test_error(error, "RunSingle failed"); + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RecordCommandBuffer() + { + cl_sync_point_khr sync_points[2]; + const cl_int pattern = pattern_pri; + cl_int error = + clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem, + &pattern, sizeof(cl_int), 0, data_size(), 0, + nullptr, &sync_points[0], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, + out_mem, &overwritten_pattern, + sizeof(cl_int), 0, data_size(), 0, + nullptr, &sync_points[1], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + error = clCommandNDRangeKernelKHR( + out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &num_elements, nullptr, 2, sync_points, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(out_of_order_command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RunSingle() + { + cl_int error; + + error = RecordCommandBuffer(); + test_error(error, "RecordCommandBuffer failed"); + + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 0, nullptr, &single_event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data(num_elements); + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, + data_size(), output_data.data(), 1, + &single_event, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i); + } + + clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + sizeof(cl_int) * num_elements + * buffer_size_multiplier, + nullptr, &error); + test_error(error, "clCreateBuffer failed"); + + cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem), + &new_out_mem }; + cl_mutable_dispatch_arg_khr args[] = { arg_1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(out_of_order_command_buffer, + &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 0, nullptr, &single_event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clEnqueueReadBuffer(out_of_order_queue, new_out_mem, CL_TRUE, 0, + data_size(), output_data.data(), 1, + &single_event, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i); + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RecordSimultaneousCommandBuffer() + { + cl_sync_point_khr sync_points[2]; + // for both simultaneous passes this call will fill entire in_mem buffer + cl_int error = clCommandFillBufferKHR( + out_of_order_command_buffer, nullptr, in_mem, &pattern_pri, + sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr, + &sync_points[0], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + // to avoid overwriting the entire result buffer instead of filling + // only relevant part this additional kernel was introduced + + error = clCommandNDRangeKernelKHR(out_of_order_command_buffer, nullptr, + nullptr, kernel_fill, 1, nullptr, + &num_elements, nullptr, 0, nullptr, + &sync_points[1], &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clCommandNDRangeKernelKHR( + out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &num_elements, nullptr, 2, sync_points, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(out_of_order_command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + struct SimulPassData + { + cl_int offset; + std::vector output_buffer; + // 0:user event, 1:offset-buffer fill event, 2:kernel done event + clEventWrapper wait_events[3]; + }; + + //-------------------------------------------------------------------------- + cl_int EnqueueSimultaneousPass(SimulPassData& pd) + { + cl_int error = CL_SUCCESS; + if (!user_event) + { + user_event = clCreateUserEvent(context, &error); + test_error(error, "clCreateUserEvent failed"); + } + + pd.wait_events[0] = user_event; + + // filling offset buffer must wait for previous pass completeness + error = clEnqueueFillBuffer( + out_of_order_queue, off_mem, &pd.offset, sizeof(cl_int), 0, + sizeof(cl_int), (wait_pass_event != nullptr ? 1 : 0), + (wait_pass_event != nullptr ? &wait_pass_event : nullptr), + &pd.wait_events[1]); + test_error(error, "clEnqueueFillBuffer failed"); + + // command buffer execution must wait for two wait-events + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0], + &pd.wait_events[2]); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_FALSE, + pd.offset * sizeof(cl_int), data_size(), + pd.output_buffer.data(), 1, + &pd.wait_events[2], nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + sizeof(cl_int) * num_elements + * buffer_size_multiplier, + nullptr, &error); + test_error(error, "clCreateBuffer failed"); + + cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem), + &new_out_mem }; + cl_mutable_dispatch_arg_khr args[] = { arg_1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(out_of_order_command_buffer, + &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + // command buffer execution must wait for two wait-events + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0], + &pd.wait_events[2]); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clEnqueueReadBuffer(out_of_order_queue, new_out_mem, CL_FALSE, + pd.offset * sizeof(cl_int), data_size(), + pd.output_buffer.data(), 1, + &pd.wait_events[2], nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RunSimultaneous() + { + cl_int error = RecordSimultaneousCommandBuffer(); + test_error(error, "RecordSimultaneousCommandBuffer failed"); + + cl_int offset = static_cast(num_elements); + + std::vector simul_passes = { + { 0, std::vector(num_elements) }, + { offset, std::vector(num_elements) } + }; + + for (auto&& pass : simul_passes) + { + error = EnqueueSimultaneousPass(pass); + test_error(error, "EnqueueSimultaneousPass failed"); + + wait_pass_event = pass.wait_events[2]; + } + + error = clSetUserEventStatus(user_event, CL_COMPLETE); + test_error(error, "clSetUserEventStatus failed"); + + error = clFinish(out_of_order_queue); + test_error(error, "clFinish failed"); + + // verify the result buffers + for (auto&& pass : simul_passes) + { + auto& res_data = pass.output_buffer; + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i); + } + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + clCommandQueueWrapper out_of_order_queue; + clCommandBufferWrapper out_of_order_command_buffer; + + clEventWrapper user_event; + clEventWrapper single_event; + clEventWrapper wait_pass_event; + + clKernelWrapper kernel_fill; + clProgramWrapper program_fill; + + const size_t test_global_work_size = 3 * sizeof(cl_int); + cl_mutable_command_khr command = nullptr; + + const cl_int overwritten_pattern = 0xACDC; + const cl_int pattern_pri = 42; +}; + +} // anonymous namespace + +int test_mutable_dispatch_out_of_order(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest>(device, context, queue, + num_elements); +} + +int test_mutable_dispatch_simultaneous_out_of_order(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest>(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h index 4b6dacb69..1db48917f 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h @@ -59,4 +59,51 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); -#endif // CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H +extern int test_mutable_dispatch_image_1d_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_image_2d_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_global_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_local_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_pod_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_null_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_svm_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_out_of_order(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_simultaneous_out_of_order( + cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_global_size(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_local_size(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_global_offset(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +#endif /*_CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H*/ diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_finalize.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_finalize.cpp new file mode 100644 index 000000000..bd669165c --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_finalize.cpp @@ -0,0 +1,85 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "basic_command_buffer.h" +#include "procs.h" + +namespace { + +// Test that finalizing a command-buffer that has already been finalized returns +// the correct error code. +struct FinalizeInvalid : public BasicCommandBufferTest +{ + using BasicCommandBufferTest::BasicCommandBufferTest; + + cl_int Run() override + { + cl_int error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + // Finalizing an already finalized command-buffer must return + // CL_INVALID_OPERATION + error = clFinalizeCommandBufferKHR(command_buffer); + test_failure_error_ret( + error, CL_INVALID_OPERATION, + "clFinalizeCommandBufferKHR should return CL_INVALID_OPERATION", + TEST_FAIL); + + return CL_SUCCESS; + } +}; + +// Check that an empty command-buffer can be finalized and then executed. +struct FinalizeEmpty : public BasicCommandBufferTest +{ + using BasicCommandBufferTest::BasicCommandBufferTest; + + cl_int Run() override + { + // Finalize an empty command-buffer + cl_int error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + // Execute empty command-buffer and then wait to complete + clEventWrapper event; + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, &event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clWaitForEvents(1, &event); + test_error(error, "clWaitForEvents failed"); + + return CL_SUCCESS; + } +}; +} // anonymous namespace + +int test_finalize_invalid(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, + num_elements); +} + +int test_finalize_empty(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest(device, context, queue, num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp index d46b28887..2ad77dbe0 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp @@ -26,6 +26,7 @@ enum class CombufInfoTestMode CITM_REF_COUNT, CITM_STATE, CITM_PROP_ARRAY, + CITM_CONTEXT, }; namespace { @@ -38,6 +39,7 @@ namespace { // -test case for CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR query // -test case for CL_COMMAND_BUFFER_STATE_KHR query // -test case for CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR query +// -test case for CL_COMMAND_BUFFER_CONTEXT_KHR query template struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest @@ -70,6 +72,10 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest error = RunPropArrayInfoTest(); test_error(error, "RunPropArrayInfoTest failed"); break; + case CombufInfoTestMode::CITM_CONTEXT: + error = RunContextInfoTest(); + test_error(error, "RunContextInfoTest failed"); + break; } return CL_SUCCESS; @@ -205,8 +211,7 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest // lambda to verify given state auto verify_state = [&](const cl_command_buffer_state_khr &expected) { - cl_command_buffer_state_khr state = - CL_COMMAND_BUFFER_STATE_INVALID_KHR; + cl_command_buffer_state_khr state = ~cl_command_buffer_state_khr(0); cl_int error = clGetCommandBufferInfoKHR( command_buffer, CL_COMMAND_BUFFER_STATE_KHR, sizeof(state), @@ -323,6 +328,46 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest return TEST_FAIL; } + cl_int RunContextInfoTest() + { + cl_int error = TEST_PASS; + + // record command buffers + error = RecordCommandBuffer(); + test_error(error, "RecordCommandBuffer failed"); + + size_t ret_value_size = 0; + error = clGetCommandBufferInfoKHR(command_buffer, + CL_COMMAND_BUFFER_CONTEXT_KHR, 0, + nullptr, &ret_value_size); + test_error(error, "clGetCommandBufferInfoKHR failed"); + + test_assert_error( + ret_value_size == sizeof(cl_context), + "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!"); + + cl_context ret_context = nullptr; + error = clGetCommandBufferInfoKHR( + command_buffer, CL_COMMAND_BUFFER_CONTEXT_KHR, sizeof(cl_context), + &ret_context, nullptr); + test_error(error, "clGetCommandBufferInfoKHR failed"); + test_assert_error( + ret_context != nullptr, + "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!"); + + cl_context expected_context = nullptr; + error = + clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), + &expected_context, nullptr); + test_error(error, "clGetCommandQueueInfo failed"); + + test_assert_error( + ret_context == expected_context, + "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!"); + + return TEST_PASS; + } + const cl_int pattern = 0xE; }; @@ -360,3 +405,11 @@ int test_info_prop_array(cl_device_id device, cl_context context, CommandBufferGetCommandBufferInfo>( device, context, queue, num_elements); } + +int test_info_context(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest< + CommandBufferGetCommandBufferInfo>( + device, context, queue, num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp index d73fc9ce7..82ff16f0e 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp @@ -70,15 +70,42 @@ struct BarrierWithWaitListKHR : public BasicCommandBufferTest 0, nullptr, out_of_order_command_buffer, 0, nullptr, &event); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(num_elements); + std::vector output_data_1(num_elements); error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, - data_size(), output_data.data(), 1, &event, - nullptr); + data_size(), output_data_1.data(), 1, + &event, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < num_elements; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = + clEnqueueFillBuffer(queue, in_mem, &zero_pattern, sizeof(cl_int), 0, + data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = + clEnqueueFillBuffer(queue, out_mem, &zero_pattern, sizeof(cl_int), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 0, nullptr, &event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(num_elements); + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, + data_size(), output_data_2.data(), 1, + &event, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern, output_data_2[i], i); } return CL_SUCCESS; @@ -106,6 +133,7 @@ struct BarrierWithWaitListKHR : public BasicCommandBufferTest } const cl_int pattern = 0x16; + const cl_int zero_pattern = 0x0; clCommandQueueWrapper out_of_order_queue; clCommandBufferWrapper out_of_order_command_buffer; clEventWrapper event; diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp index 102ae761e..7a1f0e6d5 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp @@ -38,7 +38,7 @@ struct CopyImageKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillImageKHR(command_buffer, nullptr, src_image, - fill_color, origin, region, 0, + fill_color_1, origin, region, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillImageKHR failed"); @@ -56,13 +56,38 @@ struct CopyImageKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); - error = clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0, - 0, output_data.data(), 0, nullptr, nullptr); + std::vector output_data_1(data_size); + error = + clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0, 0, + output_data_1.data(), 0, nullptr, nullptr); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillImage(queue, src_image, fill_color_2, origin, + region, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImageKHR failed"); + + error = clEnqueueFillImage(queue, dst_image, fill_color_2, origin, + region, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImageKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); + error = + clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0, 0, + output_data_2.data(), 0, nullptr, nullptr); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; @@ -97,8 +122,12 @@ struct CopyImageKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_uint pattern = 0x05; - const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern }; + const cl_uint pattern_1 = 0x05; + const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1, + pattern_1 }; + const cl_uint pattern_2 = 0x1; + const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2, + pattern_2 }; const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper src_image; clMemWrapper dst_image; @@ -111,7 +140,7 @@ struct CopyBufferKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0, data_size(), 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -127,20 +156,45 @@ struct CopyBufferKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size()); + std::vector output_data_1(data_size()); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), + output_data_1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = clEnqueueFillBuffer(queue, out_mem, &pattern_2, sizeof(cl_char), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size()); error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), - output_data.data(), 0, nullptr, nullptr); + output_data_2.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size(); i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; } - const cl_char pattern = 0x14; + const cl_char pattern_1 = 0x14; + const cl_char pattern_2 = 0x28; }; struct CopyBufferToImageKHR : public BasicCommandBufferTest @@ -150,7 +204,7 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, buffer, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, buffer, &pattern_1, sizeof(cl_char), 0, data_size, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -168,15 +222,40 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); + std::vector output_data_1(data_size); error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, - output_data.data(), 0, nullptr, nullptr); + output_data_1.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadImage failed"); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillBuffer(queue, buffer, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueFillImage(queue, image, &fill_color_2, origin, region, + 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImage failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); + + error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, + output_data_2.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadImage failed"); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; @@ -211,7 +290,14 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_char pattern = 0x11; + const cl_char pattern_1 = 0x11; + const cl_char pattern_2 = 0x22; + + const cl_uint fill_color_2[4] = { static_cast(pattern_2), + static_cast(pattern_2), + static_cast(pattern_2), + static_cast(pattern_2) }; + const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper buffer; @@ -225,7 +311,7 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = - clCommandFillImageKHR(command_buffer, nullptr, image, fill_color, + clCommandFillImageKHR(command_buffer, nullptr, image, fill_color_1, origin, region, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillImageKHR failed"); @@ -243,16 +329,39 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); + std::vector output_data_1(data_size); error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size, - output_data.data(), 0, nullptr, nullptr); + output_data_1.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(static_cast(pattern), - output_data[i], i); + CHECK_VERIFICATION_ERROR(static_cast(pattern_1), + output_data_1[i], i); + } + + error = clEnqueueFillImage(queue, image, fill_color_2, origin, region, + 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImage failed"); + + error = clEnqueueFillBuffer(queue, buffer, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); + + error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size, + output_data_2.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(static_cast(pattern_1), + output_data_2[i], i); } return CL_SUCCESS; @@ -287,8 +396,12 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_uint pattern = 0x12; - const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern }; + const cl_uint pattern_1 = 0x12; + const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1, + pattern_1 }; + const cl_uint pattern_2 = 0x24; + const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2, + pattern_2 }; const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper image; @@ -302,7 +415,7 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0, data_size, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -319,14 +432,38 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); + std::vector output_data_1(data_size); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size, + output_data_1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueFillBuffer(queue, out_mem, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size, - output_data.data(), 0, nullptr, nullptr); + output_data_2.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; @@ -353,7 +490,8 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_char pattern = 0x13; + const cl_char pattern_1 = 0x13; + const cl_char pattern_2 = 0x26; clMemWrapper in_mem; clMemWrapper out_mem; diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp index 88e97a271..0ba8055a1 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp @@ -35,7 +35,7 @@ struct FillImageKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = - clCommandFillImageKHR(command_buffer, nullptr, image, fill_color, + clCommandFillImageKHR(command_buffer, nullptr, image, fill_color_1, origin, region, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillImageKHR failed"); @@ -47,14 +47,34 @@ struct FillImageKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size); + std::vector output_data_1(data_size); error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, - output_data.data(), 0, nullptr, nullptr); + output_data_1.data(), 0, nullptr, nullptr); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(static_cast(pattern), - output_data[i], i); + CHECK_VERIFICATION_ERROR(static_cast(pattern_1), + output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillImage(queue, image, fill_color_2, origin, region, + 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImage failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size); + error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, + output_data_2.data(), 0, nullptr, nullptr); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(static_cast(pattern_1), + output_data_2[i], i); } return CL_SUCCESS; @@ -85,8 +105,12 @@ struct FillImageKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_uint pattern = 0x10; - const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern }; + const cl_uint pattern_1 = 0x10; + const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1, + pattern_1 }; + const cl_uint pattern_2 = 0x20; + const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2, + pattern_2 }; const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper image; @@ -99,7 +123,7 @@ struct FillBufferKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0, data_size(), 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -111,20 +135,40 @@ struct FillBufferKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector output_data(data_size()); + std::vector output_data_1(data_size()); + error = clEnqueueReadBuffer(queue, in_mem, CL_TRUE, 0, data_size(), + output_data_1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char), 0, + data_size(), 0, nullptr, nullptr); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector output_data_2(data_size()); error = clEnqueueReadBuffer(queue, in_mem, CL_TRUE, 0, data_size(), - output_data.data(), 0, nullptr, nullptr); + output_data_2.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size(); i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; } - const char pattern = 0x15; + const char pattern_1 = 0x15; + const char pattern_2 = 0x30; }; }; diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp index 4eefc8ab1..3e923f6cd 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp @@ -26,6 +26,7 @@ test_definition test_list[] = { ADD_TEST(info_ref_count), ADD_TEST(info_state), ADD_TEST(info_prop_array), + ADD_TEST(info_context), ADD_TEST(basic_profiling), ADD_TEST(simultaneous_profiling), ADD_TEST(regular_wait_for_command_buffer), @@ -58,7 +59,9 @@ test_definition test_list[] = { ADD_TEST(event_info_command_queue), ADD_TEST(event_info_execution_status), ADD_TEST(event_info_context), - ADD_TEST(event_info_reference_count) + ADD_TEST(event_info_reference_count), + ADD_TEST(finalize_invalid), + ADD_TEST(finalize_empty) }; int main(int argc, const char *argv[]) diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h index 53a7d9349..cd839cbb0 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h @@ -41,6 +41,8 @@ extern int test_info_state(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_info_prop_array(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_info_context(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); extern int test_basic_set_kernel_arg(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_pending_set_kernel_arg(cl_device_id device, cl_context context, @@ -130,5 +132,9 @@ extern int test_event_info_reference_count(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_finalize_invalid(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_finalize_empty(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); #endif // CL_KHR_COMMAND_BUFFER_PROCS_H diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp index a7ed307ee..89ab17b38 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp +++ b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp @@ -120,9 +120,11 @@ int test_external_semaphores_queries(cl_device_id deviceID, cl_context context, SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_TYPE_KHR, cl_semaphore_type_khr, CL_SEMAPHORE_TYPE_BINARY_KHR); - SEMAPHORE_PARAM_TEST(CL_DEVICE_HANDLE_LIST_KHR, cl_uint, 1); + SEMAPHORE_PARAM_TEST(CL_DEVICE_HANDLE_LIST_KHR, cl_device_id, deviceID); - SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, cl_uint, 1); + SEMAPHORE_PARAM_TEST( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, cl_uint, + getCLSemaphoreTypeFromVulkanType(vkExternalSemaphoreHandleType)); // Confirm that querying CL_SEMAPHORE_CONTEXT_KHR returns the right context SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_CONTEXT_KHR, cl_context, context); @@ -290,7 +292,7 @@ static int semaphore_external_cross_queue_helper(cl_device_id deviceID, nullptr, 0, nullptr, &wait_event); test_error(err, "Could not wait semaphore"); - // Finish queue_1 and queue_2 + // Finish queue_1 and queue_2 err = clFinish(queue_1); test_error(err, "Could not finish queue"); @@ -304,7 +306,7 @@ static int semaphore_external_cross_queue_helper(cl_device_id deviceID, return TEST_PASS; } -// Confirm that a signal followed by a wait will complete successfully +// Confirm that a signal followed by a wait will complete successfully int test_external_semaphores_simple_1(cl_device_id deviceID, cl_context context, cl_command_queue defaultQueue, int num_elements) @@ -931,420 +933,3 @@ int test_external_semaphores_multi_wait(cl_device_id deviceID, return TEST_PASS; } - -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first one deferred wait, then one non deferred signal. -int test_external_semaphores_order_1(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, - int num_elements) -{ - if (!is_extension_available(deviceID, "cl_khr_external_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - if (init_vuikan_device()) - { - log_info("Cannot initialise Vulkan. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - VulkanDevice vkDevice; - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - - const std::vector - vkExternalMemoryHandleTypeList = - getSupportedVulkanExternalMemoryHandleTypeList(); - VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = - getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; - VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); - - clExternalSemaphore sema_ext(vkVk2CLSemaphore, context, - vkExternalSemaphoreHandleType, deviceID); - - cl_int err = CL_SUCCESS; - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create user event - clEventWrapper user_event = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Wait semaphore (dependency on user_event) - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Signal semaphore - clEventWrapper signal_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 0, nullptr, &signal_event); - test_error(err, "Could not signal semaphore"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure signal event is completed while wait event is not - test_assert_event_complete(signal_event); - test_assert_event_inprogress(wait_event); - - // Complete user_event - err = clSetUserEventStatus(user_event, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - - return TEST_PASS; -} - -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first two deferred signals, then one deferred wait. Unblock -// signal, then unblock wait. When wait completes, unblock the other signal. -int test_external_semaphores_order_2(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, - int num_elements) -{ - if (!is_extension_available(deviceID, "cl_khr_external_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - if (init_vuikan_device()) - { - log_info("Cannot initialise Vulkan. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - VulkanDevice vkDevice; - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - - const std::vector - vkExternalMemoryHandleTypeList = - getSupportedVulkanExternalMemoryHandleTypeList(); - VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = - getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; - VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); - - clExternalSemaphore sema_ext(vkVk2CLSemaphore, context, - vkExternalSemaphoreHandleType, deviceID); - - cl_int err = CL_SUCCESS; - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_3 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_1, - &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Signal semaphore (dependency on user_event_2) - clEventWrapper signal_2_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_2, - &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore (dependency on user_event_3) - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_3, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Complete user_event_1 - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Complete user_event_3 - err = clSetUserEventStatus(user_event_3, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure all events are completed except for second signal - test_assert_event_complete(signal_1_event); - test_assert_event_inprogress(signal_2_event); - test_assert_event_complete(wait_event); - - // Complete user_event_2 - err = clSetUserEventStatus(user_event_2, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_1_event); - test_assert_event_complete(signal_2_event); - test_assert_event_complete(wait_event); - - return TEST_PASS; -} - -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first two deferred signals, then two deferred waits. Unblock -// one signal and one wait (both blocked by the same user event). When wait -// completes, unblock the other signal. Then unblock the other wait. -int test_external_semaphores_order_3(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, - int num_elements) -{ - if (!is_extension_available(deviceID, "cl_khr_external_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - if (init_vuikan_device()) - { - log_info("Cannot initialise Vulkan. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - VulkanDevice vkDevice; - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - - const std::vector - vkExternalMemoryHandleTypeList = - getSupportedVulkanExternalMemoryHandleTypeList(); - VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = - getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; - VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); - - clExternalSemaphore sema_ext(vkVk2CLSemaphore, context, - vkExternalSemaphoreHandleType, deviceID); - - cl_int err = CL_SUCCESS; - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_3 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_1, - &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Signal semaphore (dependency on user_event_2) - clEventWrapper signal_2_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_2, - &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore (dependency on user_event_3) - clEventWrapper wait_1_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_3, &wait_1_event); - test_error(err, "Could not wait semaphore"); - - // Wait semaphore (dependency on user_event_2) - clEventWrapper wait_2_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_2, &wait_2_event); - test_error(err, "Could not wait semaphore"); - - // Complete user_event_2 - err = clSetUserEventStatus(user_event_2, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure only second signal and second wait completed - cl_event event_list[] = { signal_2_event, wait_2_event }; - err = clWaitForEvents(2, event_list); - test_error(err, "Could not wait for events"); - - test_assert_event_inprogress(signal_1_event); - test_assert_event_inprogress(wait_1_event); - - // Complete user_event_1 - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Complete user_event_3 - err = clSetUserEventStatus(user_event_3, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_1_event); - test_assert_event_complete(signal_2_event); - test_assert_event_complete(wait_1_event); - test_assert_event_complete(wait_2_event); - - return TEST_PASS; -} - -// Test that an invalid semaphore command results in the invalidation of the -// command's event and the dependencies' events -int test_external_semaphores_invalid_command(cl_device_id deviceID, - cl_context context, - cl_command_queue defaultQueue, - int num_elements) -{ - if (!is_extension_available(deviceID, "cl_khr_external_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - if (init_vuikan_device()) - { - log_info("Cannot initialise Vulkan. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - VulkanDevice vkDevice; - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - - const std::vector - vkExternalMemoryHandleTypeList = - getSupportedVulkanExternalMemoryHandleTypeList(); - VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = - getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; - VulkanSemaphore vkVk2CLSemaphore1(vkDevice, vkExternalSemaphoreHandleType); - VulkanSemaphore vkVk2CLSemaphore2(vkDevice, vkExternalSemaphoreHandleType); - - clExternalSemaphore sema_ext_1(vkVk2CLSemaphore1, context, - vkExternalSemaphoreHandleType, deviceID); - clExternalSemaphore sema_ext_2(vkVk2CLSemaphore2, context, - vkExternalSemaphoreHandleType, deviceID); - - cl_int err = CL_SUCCESS; - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore_1 (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(), - nullptr, 1, &user_event_1, - &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore_1 and semaphore_2 (dependency on user_event_1) - clEventWrapper wait_event; - cl_semaphore_khr sema_list[] = { sema_ext_1.getCLSemaphore(), - sema_ext_2.getCLSemaphore() }; - err = clEnqueueWaitSemaphoresKHR(queue, 2, sema_list, nullptr, 1, - &user_event_1, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Signal semaphore_1 (dependency on wait_event and user_event_2) - clEventWrapper signal_2_event; - cl_event wait_list[] = { user_event_2, wait_event }; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(), - nullptr, 2, wait_list, &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure all events are not completed - test_assert_event_inprogress(signal_1_event); - test_assert_event_inprogress(signal_2_event); - test_assert_event_inprogress(wait_event); - - // Complete user_event_1 (expect failure as waiting on semaphore_2 is not - // allowed (unsignaled) - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_assert_error(err != CL_SUCCESS, - "signal_2_event completed unexpectedly"); - - // Ensure signal_1 is completed while others failed (the second signal - // should fail as it depends on wait) - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - test_assert_event_complete(signal_1_event); - test_assert_event_terminated(wait_event); - test_assert_event_terminated(signal_2_event); - - return TEST_PASS; -} diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp index 56d15808d..887c9dca7 100644 --- a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp +++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp @@ -48,8 +48,10 @@ static inline size_t get_format_size(cl_context context, cl_image_desc image_desc = { 0 }; image_desc.image_type = imageType; - /* Size 1 only to query element size */ - image_desc.image_width = 1; + /* We use a width of 4 to query element size, as this is + the smallest possible value that satisfies the requirements + of all image formats (including extensions). */ + image_desc.image_width = 4; if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType && CL_MEM_OBJECT_IMAGE1D != imageType) { diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp index 74c5a160a..8d4234087 100644 --- a/test_conformance/math_brute_force/unary_two_results_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_float.cpp @@ -189,12 +189,11 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) // Get that moving if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); - FPU_mode_type oldMode; + FPU_mode_type oldMode = 0; RoundingMode oldRoundMode = kRoundToNearestEven; if (isFract) { // Calculate the correctly rounded reference result - memset(&oldMode, 0, sizeof(oldMode)); if (ftz || relaxedMode) ForceFTZ(&oldMode); // Set the rounding mode to match the device diff --git a/test_conformance/relationals/test_comparisons_fp.cpp b/test_conformance/relationals/test_comparisons_fp.cpp index c3d8f67a3..73ff3dd9e 100644 --- a/test_conformance/relationals/test_comparisons_fp.cpp +++ b/test_conformance/relationals/test_comparisons_fp.cpp @@ -22,6 +22,8 @@ #include #include +#include "harness/stringHelpers.h" + #include #include "test_comparisons_fp.h" @@ -83,29 +85,6 @@ extension, // clang-format on -std::string concat_kernel(const char* sstr[], int num) -{ - std::string res; - for (int i = 0; i < num; i++) res += std::string(sstr[i]); - return res; -} - -template -std::string string_format(const std::string& format, Args... args) -{ - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) - + 1; // Extra space for '\0' - if (size_s <= 0) - { - throw std::runtime_error("Error during formatting."); - } - auto size = static_cast(size_s); - std::unique_ptr buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); - return std::string(buf.get(), - buf.get() + size - 1); // We don't want the '\0' inside -} - template bool verify(const T& A, const T& B) { return F()(A, B); @@ -226,14 +205,14 @@ int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize, auto str = concat_kernel(equivTestKerPat_3, sizeof(equivTestKerPat_3) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str(), opName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str(), opName.c_str()); } else { auto str = concat_kernel(equivTestKerPatLessGreater_3, sizeof(equivTestKerPatLessGreater_3) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str()); } } else @@ -243,14 +222,14 @@ int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize, auto str = concat_kernel(equivTestKernPat, sizeof(equivTestKernPat) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str(), opName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str(), opName.c_str()); } else { auto str = concat_kernel(equivTestKernPatLessGreater, sizeof(equivTestKernPatLessGreater) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str()); } } diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index b0cda09fd..8a0567c34 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -14,11 +14,14 @@ // limitations under the License. // #include "harness/compat.h" +#include "harness/typeWrappers.h" #include #include #include #include +#include + #if ! defined( _WIN32) #if defined(__APPLE__) #include @@ -66,6 +69,16 @@ static void printUsage( void ); #define BUFFER_SIZE (1024*1024) #define KPAGESIZE 4096 +#define test_error_count(errCode, msg) \ + { \ + auto errCodeResult = errCode; \ + if (errCodeResult != CL_SUCCESS) \ + { \ + gFailCount++; \ + print_error(errCodeResult, msg); \ + return errCode; \ + } \ + } // When we indicate non wimpy mode, the types that are 32 bits value will // test their entire range and 64 bits test will test the 32 bit @@ -74,12 +87,6 @@ static void printUsage( void ); static bool s_wimpy_mode = false; static int s_wimpy_reduction_factor = 256; -// Tests are broken into the major test which is based on the -// src and cmp type and their corresponding vector types and -// sub tests which is for each individual test. The following -// tracks the subtests -int s_test_cnt = 0; - //----------------------------------------- // Static helper functions //----------------------------------------- @@ -237,6 +244,9 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont if (srctype == kdouble) strcpy( extension, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" ); + if (srctype == khalf) + strcpy(extension, "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"); + // create type name and testname switch( vec_len ) { @@ -288,25 +298,14 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont return program; } - #define VECTOR_SIZE_COUNT 6 static int doTest(cl_command_queue queue, cl_context context, Type stype, Type cmptype, cl_device_id device) { int err = CL_SUCCESS; - int s_test_fail = 0; - MTdataHolder d; + MTdataHolder d(gRandomSeed); const size_t element_count[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 }; - cl_mem src1 = NULL; - cl_mem src2 = NULL; - cl_mem cmp = NULL; - cl_mem dest = NULL; - void *ref = NULL; - void *sref = NULL; - void *src1_host = NULL; - void *src2_host = NULL; - void *cmp_host = NULL; - void *dest_host = NULL; + clMemWrapper src1, src2, cmp, dest; cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE; size_t block_elements = BUFFER_SIZE / type_size[stype]; @@ -315,16 +314,22 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c // It is more efficient to create the tests all at once since we // use the same test data on each of the vector sizes - int vecsize; - cl_program programs[VECTOR_SIZE_COUNT]; - cl_kernel kernels[VECTOR_SIZE_COUNT]; + clProgramWrapper programs[VECTOR_SIZE_COUNT]; + clKernelWrapper kernels[VECTOR_SIZE_COUNT]; - if(stype == kdouble && ! is_extension_available( device, "cl_khr_fp64" )) + if (stype == kdouble && !is_extension_available(device, "cl_khr_fp64")) { log_info("Skipping double because cl_khr_fp64 extension is not supported.\n"); return 0; } + if (stype == khalf && !is_extension_available(device, "cl_khr_fp16")) + { + log_info( + "Skipping half because cl_khr_fp16 extension is not supported.\n"); + return 0; + } + if (gIsEmbedded) { if (( stype == klong || stype == kulong ) && ! is_extension_available( device, "cles_khr_int64" )) @@ -340,54 +345,41 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c } } - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) - { - programs[vecsize] = makeSelectProgram(&kernels[vecsize], context, stype, cmptype, element_count[vecsize] ); - if (!programs[vecsize] || !kernels[vecsize]) { - ++s_test_fail; - ++s_test_cnt; - return -1; - } - } - - ref = malloc( BUFFER_SIZE ); - if( NULL == ref ){ log_error("Error: could not allocate ref buffer\n" ); goto exit; } - sref = malloc( BUFFER_SIZE ); - if( NULL == sref ){ log_error("Error: could not allocate ref buffer\n" ); goto exit; } src1 = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate src1 buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate src1 buffer\n"); src2 = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate src2 buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate src2 buffer\n"); cmp = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate cmp buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate cmp buffer\n"); dest = clCreateBuffer( context, CL_MEM_WRITE_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate dest buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate dest buffer\n"); - src1_host = malloc(BUFFER_SIZE); - if (NULL == src1_host) - { - log_error("Error: could not allocate src1_host buffer\n"); - goto exit; - } - src2_host = malloc(BUFFER_SIZE); - if (NULL == src2_host) - { - log_error("Error: could not allocate src2_host buffer\n"); - goto exit; - } - cmp_host = malloc(BUFFER_SIZE); - if (NULL == cmp_host) + for (int vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) { - log_error("Error: could not allocate cmp_host buffer\n"); - goto exit; - } - dest_host = malloc(BUFFER_SIZE); - if (NULL == dest_host) - { - log_error("Error: could not allocate dest_host buffer\n"); - goto exit; + programs[vecsize] = makeSelectProgram(&kernels[vecsize], context, stype, + cmptype, element_count[vecsize]); + if (!programs[vecsize] || !kernels[vecsize]) + { + return -1; + } + + err = clSetKernelArg(kernels[vecsize], 0, sizeof dest, &dest); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 1, sizeof src1, &src1); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 2, sizeof src2, &src2); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 3, sizeof cmp, &cmp); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); } + std::vector ref(BUFFER_SIZE); + std::vector sref(BUFFER_SIZE); + std::vector src1_host(BUFFER_SIZE); + std::vector src2_host(BUFFER_SIZE); + std::vector cmp_host(BUFFER_SIZE); + std::vector dest_host(BUFFER_SIZE); + // We block the test as we are running over the range of compare values // "block the test" means "break the test into blocks" if( type_size[stype] == 4 ) @@ -396,111 +388,63 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c cmp_stride = block_elements * step * (0xffffffffffffffffULL / 0x100000000ULL + 1); log_info("Testing..."); - d = MTdataHolder(gRandomSeed); uint64_t i; + for (i=0; i < blocks; i+=step) { - void *s1 = clEnqueueMapBuffer( queue, src1, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map src1" ); goto exit; } - // Setup the input data to change for each block - initSrcBuffer( s1, stype, d); - - void *s2 = clEnqueueMapBuffer( queue, src2, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map src2" ); goto exit; } - // Setup the input data to change for each block - initSrcBuffer( s2, stype, d); - - void *s3 = clEnqueueMapBuffer( queue, cmp, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map cmp" ); goto exit; } - // Setup the input data to change for each block - initCmpBuffer(s3, cmptype, i * cmp_stride, block_elements); - - if( (err = clEnqueueUnmapMemObject( queue, src1, s1, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap src1\n" ); ++s_test_fail; goto exit; } - if( (err = clEnqueueUnmapMemObject( queue, src2, s2, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap src2\n" ); ++s_test_fail; goto exit; } - if( (err = clEnqueueUnmapMemObject( queue, cmp, s3, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap cmp\n" ); ++s_test_fail; goto exit; } - - // Create the reference result - err = clEnqueueReadBuffer(queue, src1, CL_TRUE, 0, BUFFER_SIZE, - src1_host, 0, NULL, NULL); - if (err) - { - log_error("Error: Reading buffer from src1 to src1_host failed\n"); - ++s_test_fail; - goto exit; - } - err = clEnqueueReadBuffer(queue, src2, CL_TRUE, 0, BUFFER_SIZE, - src2_host, 0, NULL, NULL); - if (err) - { - log_error("Error: Reading buffer from src2 to src2_host failed\n"); - ++s_test_fail; - goto exit; - } - err = clEnqueueReadBuffer(queue, cmp, CL_TRUE, 0, BUFFER_SIZE, cmp_host, - 0, NULL, NULL); - if (err) - { - log_error("Error: Reading buffer from cmp to cmp_host failed\n"); - ++s_test_fail; - goto exit; - } + initSrcBuffer(src1_host.data(), stype, d); + initSrcBuffer(src2_host.data(), stype, d); + initCmpBuffer(cmp_host.data(), cmptype, i * cmp_stride, block_elements); + + err = clEnqueueWriteBuffer(queue, src1, CL_FALSE, 0, BUFFER_SIZE, + src1_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write src1"); + + err = clEnqueueWriteBuffer(queue, src2, CL_FALSE, 0, BUFFER_SIZE, + src2_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write src2"); + + err = clEnqueueWriteBuffer(queue, cmp, CL_FALSE, 0, BUFFER_SIZE, + cmp_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write cmp"); Select sfunc = (cmptype == ctype[stype][0]) ? vrefSelects[stype][0] : vrefSelects[stype][1]; - (*sfunc)(ref, src1_host, src2_host, cmp_host, block_elements); + (*sfunc)(ref.data(), src1_host.data(), src2_host.data(), + cmp_host.data(), block_elements); sfunc = (cmptype == ctype[stype][0]) ? refSelects[stype][0] : refSelects[stype][1]; - (*sfunc)(sref, src1_host, src2_host, cmp_host, block_elements); + (*sfunc)(sref.data(), src1_host.data(), src2_host.data(), + cmp_host.data(), block_elements); - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) + for (int vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) { size_t vector_size = element_count[vecsize] * type_size[stype]; size_t vector_count = (BUFFER_SIZE + vector_size - 1) / vector_size; - if((err = clSetKernelArg(kernels[vecsize], 0, sizeof dest, &dest) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 1, sizeof src1, &src1) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 2, sizeof src2, &src2) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 3, sizeof cmp, &cmp) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - - // Wipe destination - void *d = clEnqueueMapBuffer( queue, dest, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map dest" ); ++s_test_fail; goto exit; } - memset( d, -1, BUFFER_SIZE ); - if( (err = clEnqueueUnmapMemObject( queue, dest, d, 0, NULL, NULL ) ) ){ log_error( "Error: Could not unmap dest" ); ++s_test_fail; goto exit; } + const cl_int pattern = -1; + err = clEnqueueFillBuffer(queue, dest, &pattern, sizeof(cl_int), 0, + BUFFER_SIZE, 0, nullptr, nullptr); + test_error_count(err, "clEnqueueFillBuffer failed"); + err = clEnqueueNDRangeKernel(queue, kernels[vecsize], 1, NULL, &vector_count, NULL, 0, NULL, NULL); - if (err != CL_SUCCESS) { - log_error("clEnqueueNDRangeKernel failed errcode:%d\n", err); - ++s_test_fail; - goto exit; - } + test_error_count(err, "clEnqueueNDRangeKernel failed errcode\n"); err = clEnqueueReadBuffer(queue, dest, CL_TRUE, 0, BUFFER_SIZE, - dest_host, 0, NULL, NULL); - if (err) - { - log_error( - "Error: Reading buffer from dest to dest_host failed\n"); - ++s_test_fail; - goto exit; - } + dest_host.data(), 0, NULL, NULL); + test_error_count( + err, "Error: Reading buffer from dest to dest_host failed\n"); - if ((*checkResults[stype])(dest_host, vecsize == 0 ? sref : ref, + if ((*checkResults[stype])(dest_host.data(), + vecsize == 0 ? sref.data() : ref.data(), block_elements, element_count[vecsize]) != 0) { log_error("vec_size:%d indx: 0x%16.16llx\n", (int)element_count[vecsize], i); - ++s_test_fail; - goto exit; + return TEST_FAIL; } } // for vecsize } // for i @@ -510,28 +454,6 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c else log_info(" Wimpy Passed\n\n"); -exit: - if( src1 ) clReleaseMemObject( src1 ); - if( src2 ) clReleaseMemObject( src2 ); - if( cmp ) clReleaseMemObject( cmp ); - if( dest) clReleaseMemObject( dest ); - if( ref ) free(ref ); - if( sref ) free(sref ); - if (src1_host) free(src1_host); - if (src2_host) free(src2_host); - if (cmp_host) free(cmp_host); - if (dest_host) free(dest_host); - - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; vecsize++) { - clReleaseKernel(kernels[vecsize]); - clReleaseProgram(programs[vecsize]); - } - ++s_test_cnt; - if (s_test_fail) - { - err = TEST_FAIL; - gFailCount++; - } return err; } @@ -567,6 +489,16 @@ int test_select_short_short(cl_device_id deviceID, cl_context context, cl_comman { return doTest(queue, context, kshort, kshort, deviceID); } +int test_select_half_ushort(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, khalf, kushort, deviceID); +} +int test_select_half_short(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, khalf, kshort, deviceID); +} int test_select_uint_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return doTest(queue, context, kuint, kuint, deviceID); @@ -617,26 +549,17 @@ int test_select_double_long(cl_device_id deviceID, cl_context context, cl_comman } test_definition test_list[] = { - ADD_TEST( select_uchar_uchar ), - ADD_TEST( select_uchar_char ), - ADD_TEST( select_char_uchar ), - ADD_TEST( select_char_char ), - ADD_TEST( select_ushort_ushort ), - ADD_TEST( select_ushort_short ), - ADD_TEST( select_short_ushort ), - ADD_TEST( select_short_short ), - ADD_TEST( select_uint_uint ), - ADD_TEST( select_uint_int ), - ADD_TEST( select_int_uint ), - ADD_TEST( select_int_int ), - ADD_TEST( select_float_uint ), - ADD_TEST( select_float_int ), - ADD_TEST( select_ulong_ulong ), - ADD_TEST( select_ulong_long ), - ADD_TEST( select_long_ulong ), - ADD_TEST( select_long_long ), - ADD_TEST( select_double_ulong ), - ADD_TEST( select_double_long ), + ADD_TEST(select_uchar_uchar), ADD_TEST(select_uchar_char), + ADD_TEST(select_char_uchar), ADD_TEST(select_char_char), + ADD_TEST(select_ushort_ushort), ADD_TEST(select_ushort_short), + ADD_TEST(select_short_ushort), ADD_TEST(select_short_short), + ADD_TEST(select_half_ushort), ADD_TEST(select_half_short), + ADD_TEST(select_uint_uint), ADD_TEST(select_uint_int), + ADD_TEST(select_int_uint), ADD_TEST(select_int_int), + ADD_TEST(select_float_uint), ADD_TEST(select_float_int), + ADD_TEST(select_ulong_ulong), ADD_TEST(select_ulong_long), + ADD_TEST(select_long_ulong), ADD_TEST(select_long_long), + ADD_TEST(select_double_ulong), ADD_TEST(select_double_long), }; const int test_num = ARRAY_SIZE( test_list ); diff --git a/test_conformance/select/test_select.h b/test_conformance/select/test_select.h index c51ae13c2..5cd786022 100644 --- a/test_conformance/select/test_select.h +++ b/test_conformance/select/test_select.h @@ -28,18 +28,20 @@ #endif // Defines the set of types we support (no support for double) -typedef enum { +typedef enum +{ kuchar = 0, kchar = 1, kushort = 2, kshort = 3, - kuint = 4, - kint = 5, - kfloat = 6, - kulong = 7, - klong = 8, - kdouble = 9, - kTypeCount // always goes last + khalf = 4, + kuint = 5, + kint = 6, + kfloat = 7, + kulong = 8, + klong = 9, + kdouble = 10, + kTypeCount // always goes last } Type; @@ -56,7 +58,8 @@ extern const size_t type_size[kTypeCount]; extern const Type ctype[kTypeCount][2]; // Reference functions for the primitive (non vector) type -typedef void (*Select)(void *dest, void *src1, void *src2, void *cmp, size_t c); +typedef void (*Select)(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t c); extern Select refSelects[kTypeCount][2]; // Reference functions for the primtive type but uses the vector @@ -64,7 +67,8 @@ extern Select refSelects[kTypeCount][2]; extern Select vrefSelects[kTypeCount][2]; // Check functions for each output type -typedef size_t (*CheckResults)(void *out1, void *out2, size_t count, size_t vectorSize); +typedef size_t (*CheckResults)(const void *const out1, const void *const out2, + size_t count, size_t vectorSize); extern CheckResults checkResults[kTypeCount]; // Helpful macros diff --git a/test_conformance/select/util_select.cpp b/test_conformance/select/util_select.cpp index f9641e993..b85f54a76 100644 --- a/test_conformance/select/util_select.cpp +++ b/test_conformance/select/util_select.cpp @@ -13,7 +13,6 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include "harness/errorHelpers.h" #include @@ -25,29 +24,28 @@ //----------------------------------------- -const char *type_name[kTypeCount] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "float", "ulong", "long", "double" }; +const char *type_name[kTypeCount] = { "uchar", "char", "ushort", "short", + "half", "uint", "int", "float", + "ulong", "long", "double" }; const size_t type_size[kTypeCount] = { - sizeof(cl_uchar), sizeof(cl_char), - sizeof(cl_ushort), sizeof(cl_short), - sizeof(cl_uint), sizeof(cl_int), - sizeof(cl_float), sizeof(cl_ulong), sizeof(cl_long), sizeof( cl_double ) }; + sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short), + sizeof(cl_half), sizeof(cl_uint), sizeof(cl_int), sizeof(cl_float), + sizeof(cl_ulong), sizeof(cl_long), sizeof(cl_double) +}; const Type ctype[kTypeCount][2] = { - { kuchar, kchar }, // uchar - { kuchar, kchar }, // char - { kushort, kshort}, // ushort - { kushort, kshort}, // short - { kuint, kint }, // uint - { kuint, kint }, // int - { kuint, kint }, // float - { kulong, klong }, // ulong - { kulong, klong }, // long - { kulong, klong } // double + { kuchar, kchar }, // uchar + { kuchar, kchar }, // char + { kushort, kshort }, // ushort + { kushort, kshort }, // short + { kushort, kshort }, // half + { kuint, kint }, // uint + { kuint, kint }, // int + { kuint, kint }, // float + { kulong, klong }, // ulong + { kulong, klong }, // long + { kulong, klong } // double }; @@ -55,510 +53,594 @@ const Type ctype[kTypeCount][2] = { // Reference functions //----------------------------------------- -void refselect_1i8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y, *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_char*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) { d[i] = m[i] ? y[i] : x[i]; } } -void refselect_1u8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y; - cl_char *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_char*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) { d[i] = m[i] ? y[i] : x[i]; } } -void refselect_1i16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_short *d, *x, *y, *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_short*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ushort *d, *x, *y; - cl_short *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_short*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i32(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_int *d, *x, *y, *m; - d = (cl_int*)dest; - x = (cl_int*)src1; - y = (cl_int*)src2; - m = (cl_int*)cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u32(void *dest, void *src1, void *src2, void *cmp, size_t count){ +void refselect_1u32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*)dest; - x = (cl_uint*)src1; - y = (cl_uint*)src2; - m = (cl_int*)cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y, *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y; - cl_uchar *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_uchar*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y, *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_uchar*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y; - cl_ushort *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_ushort*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y, *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_ushort*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_uint *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_uint*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y, *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y, *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ffi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_hhi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; + for (i = 0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; +} + +void refselect_hhu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_int *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_int*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; + for (i = 0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; +} + +void refselect_ffi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ffu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ffu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_uint *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ddi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ddi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_long *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ddu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ddu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void vrefselect_1i8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y, *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_char*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80) ? y[i] : x[i]; } -void vrefselect_1u8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y; - cl_char *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_char*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80) ? y[i] : x[i]; } -void vrefselect_1i16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y, *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_short*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; } -void vrefselect_1u16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y; - cl_short *m; - d = (cl_ushort*) dest; - x = (cl_ushort*)src1; - y = (cl_ushort*)src2; - m = (cl_short*)cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; } -void vrefselect_1i32(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y, *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_int*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_1u32(void *dest, void *src1, void *src2, void *cmp, size_t count){ +void vrefselect_1u32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_int*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_1i64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y, *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_1u64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_1i8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_char *d, *x, *y; - cl_uchar *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_uchar*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80U) ? y[i] : x[i]; } -void vrefselect_1u8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uchar *d, *x, *y, *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_uchar*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80U) ? y[i] : x[i]; } -void vrefselect_1i16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y; - cl_ushort *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_ushort*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; } -void vrefselect_1u16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y, *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_ushort*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; } -void vrefselect_1i32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_uint *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_uint*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_1u32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y, *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_1i64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } -void vrefselect_1u64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y, *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } -void vrefselect_ffi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_hhi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; + for (i = 0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; +} + +void vrefselect_hhu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; + for (i = 0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; +} + +void vrefselect_ffi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_int*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_ffu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ffu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_uint *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_ddi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ddi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_ddu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ddu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_ulong *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } // Define refSelects -Select refSelects[kTypeCount][2] = { - { refselect_1u8u, refselect_1u8 }, // cl_uchar - { refselect_1i8u, refselect_1i8 }, // char +Select refSelects[kTypeCount][2] = { + { refselect_1u8u, refselect_1u8 }, // cl_uchar + { refselect_1i8u, refselect_1i8 }, // char { refselect_1u16u, refselect_1u16 }, // ushort { refselect_1i16u, refselect_1i16 }, // short + { refselect_hhu, refselect_hhi }, // half { refselect_1u32u, refselect_1u32 }, // uint { refselect_1i32u, refselect_1i32 }, // int - { refselect_ffu, refselect_ffi }, // float + { refselect_ffu, refselect_ffi }, // float { refselect_1u64u, refselect_1u64 }, // ulong { refselect_1i64u, refselect_1i64 }, // long - { refselect_ddu, refselect_ddi } // double + { refselect_ddu, refselect_ddi } // double }; // Define vrefSelects (vector refSelects) -Select vrefSelects[kTypeCount][2] = { - { vrefselect_1u8u, vrefselect_1u8 }, // cl_uchar - { vrefselect_1i8u, vrefselect_1i8 }, // char +Select vrefSelects[kTypeCount][2] = { + { vrefselect_1u8u, vrefselect_1u8 }, // cl_uchar + { vrefselect_1i8u, vrefselect_1i8 }, // char { vrefselect_1u16u, vrefselect_1u16 }, // ushort { vrefselect_1i16u, vrefselect_1i16 }, // short + { vrefselect_hhu, vrefselect_hhi }, // half { vrefselect_1u32u, vrefselect_1u32 }, // uint { vrefselect_1i32u, vrefselect_1i32 }, // int - { vrefselect_ffu, vrefselect_ffi }, // float + { vrefselect_ffu, vrefselect_ffi }, // float { vrefselect_1u64u, vrefselect_1u64 }, // ulong { vrefselect_1i64u, vrefselect_1i64 }, // long - { vrefselect_ddu, vrefselect_ddi } // double + { vrefselect_ddu, vrefselect_ddi } // double }; //----------------------------------------- // Check functions //----------------------------------------- -size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size) { - const cl_uchar *t = (const cl_uchar *) test; - const cl_uchar *c = (const cl_uchar *) correct; +size_t check_uchar(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uchar *const t = (const cl_uchar *)test; + const cl_uchar *const c = (const cl_uchar *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -576,9 +658,11 @@ size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_char(void *test, void *correct, size_t count, size_t vector_size) { - const cl_char *t = (const cl_char *) test; - const cl_char *c = (const cl_char *) correct; +size_t check_char(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_char *const t = (const cl_char *)test; + const cl_char *const c = (const cl_char *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -597,9 +681,11 @@ size_t check_char(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size) { - const cl_ushort *t = (const cl_ushort *) test; - const cl_ushort *c = (const cl_ushort *) correct; +size_t check_ushort(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ushort *const t = (const cl_ushort *)test; + const cl_ushort *const c = (const cl_ushort *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -618,9 +704,11 @@ size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_short(void *test, void *correct, size_t count, size_t vector_size) { - const cl_short *t = (const cl_short *) test; - const cl_short *c = (const cl_short *) correct; +size_t check_short(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_short *const t = (const cl_short *)test; + const cl_short *const c = (const cl_short *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -639,9 +727,11 @@ size_t check_short(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) { - const cl_uint *t = (const cl_uint *) test; - const cl_uint *c = (const cl_uint *) correct; +size_t check_uint(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uint *const t = (const cl_uint *)test; + const cl_uint *const c = (const cl_uint *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -660,9 +750,11 @@ size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_int(void *test, void *correct, size_t count, size_t vector_size) { - const cl_int *t = (const cl_int *) test; - const cl_int *c = (const cl_int *) correct; +size_t check_int(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_int *const t = (const cl_int *)test; + const cl_int *const c = (const cl_int *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -682,9 +774,11 @@ size_t check_int(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size) { - const cl_ulong *t = (const cl_ulong *) test; - const cl_ulong *c = (const cl_ulong *) correct; +size_t check_ulong(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ulong *const t = (const cl_ulong *)test; + const cl_ulong *const c = (const cl_ulong *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -703,9 +797,11 @@ size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_long(void *test, void *correct, size_t count, size_t vector_size) { - const cl_long *t = (const cl_long *) test; - const cl_long *c = (const cl_long *) correct; +size_t check_long(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_long *const t = (const cl_long *)test; + const cl_long *const c = (const cl_long *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -724,9 +820,36 @@ size_t check_long(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_float( void *test, void *correct, size_t count, size_t vector_size ) { - const cl_uint *t = (const cl_uint *) test; - const cl_uint *c = (const cl_uint *) correct; +size_t check_half(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ushort *const t = (const cl_ushort *)test; + const cl_ushort *const c = (const cl_ushort *)correct; + size_t i; + + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) /* Allow nans to be binary different */ + if ((t[i] != c[i]) + && !(isnan(((cl_half *)correct)[i]) + && isnan(((cl_half *)test)[i]))) + { + log_error("\n(check_half) Error for vector size %ld found at " + "0x%8.8lx (of 0x%8.8lx): " + "*0x%4.4x vs 0x%4.4x\n", + vector_size, i, count, c[i], t[i]); + return i + 1; + } + } + + return 0; +} + +size_t check_float(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uint *const t = (const cl_uint *)test; + const cl_uint *const c = (const cl_uint *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -746,9 +869,11 @@ size_t check_float( void *test, void *correct, size_t count, size_t vector_size return 0; } -size_t check_double( void *test, void *correct, size_t count, size_t vector_size ) { - const cl_ulong *t = (const cl_ulong *) test; - const cl_ulong *c = (const cl_ulong *) correct; +size_t check_double(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ulong *const t = (const cl_ulong *)test; + const cl_ulong *const c = (const cl_ulong *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -770,5 +895,7 @@ size_t check_double( void *test, void *correct, size_t count, size_t vector_size } CheckResults checkResults[kTypeCount] = { - check_uchar, check_char, check_ushort, check_short, check_uint, - check_int, check_float, check_ulong, check_long, check_double }; + check_uchar, check_char, check_ushort, check_short, + check_half, check_uint, check_int, check_float, + check_ulong, check_long, check_double +}; diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 new file mode 100644 index 000000000..491271874 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 @@ -0,0 +1,35 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 17 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "op_neg_half" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_half +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %10 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_half + %11 = OpLabel + %12 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0 + %13 = OpCompositeExtract %uint %12 0 + %14 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %13 + %15 = OpLoad %half %14 + %16 = OpFNegate %half %15 + OpStore %14 %16 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 new file mode 100644 index 000000000..9c7e3d6df --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 @@ -0,0 +1,39 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 20 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Int64 + OpCapability Float16 + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "op_neg_half" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %ulong = OpTypeInt 64 0 + %v3ulong = OpTypeVector %ulong 3 +%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_half + %ulong_32 = OpConstant %ulong 32 +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input + %1 = OpFunction %void None %10 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_half + %12 = OpLabel + %13 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0 + %14 = OpCompositeExtract %ulong %13 0 + %15 = OpShiftLeftLogical %ulong %14 %ulong_32 + %16 = OpShiftRightArithmetic %ulong %15 %ulong_32 + %17 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %16 + %18 = OpLoad %half %17 + %19 = OpFNegate %half %18 + OpStore %17 %19 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm32 new file mode 100644 index 000000000..985b52622 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm32 @@ -0,0 +1,42 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 22 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Vector16 + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "vector_half8_extract" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %4 FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %half = OpTypeFloat 16 + %v8half = OpTypeVector %half 4 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half +%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half + %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_v8half %_ptr_CrossWorkgroup_half %uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %13 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_v8half + %4 = OpFunctionParameter %_ptr_CrossWorkgroup_half + %14 = OpFunctionParameter %uint + %15 = OpLabel + %16 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0 + %17 = OpCompositeExtract %uint %16 0 + %18 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %in %17 + %19 = OpLoad %v8half %18 + %20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %4 %17 + %21 = OpVectorExtractDynamic %half %19 %14 + OpStore %20 %21 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm64 new file mode 100644 index 000000000..dd14f66c9 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm64 @@ -0,0 +1,47 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 26 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Int64 + OpCapability Vector16 + OpCapability Float16 + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "vector_half8_extract" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %4 FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %ulong = OpTypeInt 64 0 + %v3ulong = OpTypeVector %ulong 3 +%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong + %ulong_32 = OpConstant %ulong 32 + %uint = OpTypeInt 32 0 + %void = OpTypeVoid + %half = OpTypeFloat 16 + %v8half = OpTypeVector %half 8 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half +%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half + %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v8half %_ptr_CrossWorkgroup_half %uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input + %1 = OpFunction %void None %15 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_v8half + %4 = OpFunctionParameter %_ptr_CrossWorkgroup_half + %16 = OpFunctionParameter %uint + %17 = OpLabel + %18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0 + %19 = OpCompositeExtract %ulong %18 0 + %20 = OpShiftLeftLogical %ulong %19 %ulong_32 + %21 = OpShiftRightArithmetic %ulong %20 %ulong_32 + %22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %in %21 + %23 = OpLoad %v8half %22 + %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %4 %21 + %25 = OpVectorExtractDynamic %half %23 %16 + OpStore %24 %25 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm32 new file mode 100644 index 000000000..278129388 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm32 @@ -0,0 +1,43 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 23 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Vector16 + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "vector_half8_insert" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %4 FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %half = OpTypeFloat 16 + %v8half = OpTypeVector %half 8 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half +%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half + %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_v8half %uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %13 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_half + %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v8half + %14 = OpFunctionParameter %uint + %15 = OpLabel + %16 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0 + %17 = OpCompositeExtract %uint %16 0 + %18 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %17 + %19 = OpLoad %half %18 + %20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %4 %17 + %21 = OpLoad %v8half %20 + %22 = OpVectorInsertDynamic %v8half %21 %19 %14 + OpStore %20 %22 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm64 new file mode 100644 index 000000000..f140fc253 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm64 @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 27 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Int64 + OpCapability Vector16 + OpCapability Float16 + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "vector_half8_insert" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %4 FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %ulong = OpTypeInt 64 0 + %v3ulong = OpTypeVector %ulong 3 +%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong + %ulong_32 = OpConstant %ulong 32 + %uint = OpTypeInt 32 0 + %void = OpTypeVoid + %half = OpTypeFloat 16 + %v8half = OpTypeVector %half 8 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half +%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half + %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_v8half %uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input + %1 = OpFunction %void None %15 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_half + %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v8half + %16 = OpFunctionParameter %uint + %17 = OpLabel + %18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0 + %19 = OpCompositeExtract %ulong %18 0 + %20 = OpShiftLeftLogical %ulong %19 %ulong_32 + %21 = OpShiftRightArithmetic %ulong %20 %ulong_32 + %22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %21 + %23 = OpLoad %half %22 + %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %4 %21 + %25 = OpLoad %v8half %24 + %26 = OpVectorInsertDynamic %v8half %25 %23 %16 + OpStore %24 %26 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 new file mode 100644 index 000000000..6fda7d8f1 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 @@ -0,0 +1,46 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 25 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID + OpName %res "res" + OpName %lhs "lhs" + OpName %rhs "rhs" + OpDecorate %5 FuncParamAttr NoCapture + %5 = OpDecorationGroup + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + OpGroupDecorate %5 %res %lhs %rhs + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %v4half = OpTypeVector %half 4 +%_ptr_CrossWorkgroup_v4half = OpTypePointer CrossWorkgroup %v4half + %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_half +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %15 + %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %rhs = OpFunctionParameter %_ptr_CrossWorkgroup_half + %16 = OpLabel + %17 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0 + %18 = OpCompositeExtract %uint %17 0 + %19 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %lhs %18 + %20 = OpLoad %v4half %19 Aligned 8 + %21 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %rhs %18 + %22 = OpLoad %half %21 Aligned 2 + %23 = OpVectorTimesScalar %v4half %20 %22 + %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %res %18 + OpStore %24 %23 Aligned 8 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 new file mode 100644 index 000000000..fa2d52210 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 @@ -0,0 +1,50 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 28 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Int64 + OpCapability Float16 + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID + OpName %res "res" + OpName %lhs "lhs" + OpName %rhs "rhs" + OpDecorate %5 FuncParamAttr NoCapture + %5 = OpDecorationGroup + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + OpGroupDecorate %5 %res %lhs %rhs + %ulong = OpTypeInt 64 0 + %v3ulong = OpTypeVector %ulong 3 +%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong + %ulong_32 = OpConstant %ulong 32 + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %v4half = OpTypeVector %half 4 +%_ptr_CrossWorkgroup_v4half = OpTypePointer CrossWorkgroup %v4half + %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_half +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input + %1 = OpFunction %void None %16 + %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %rhs = OpFunctionParameter %_ptr_CrossWorkgroup_half + %17 = OpLabel + %18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0 + %19 = OpCompositeExtract %ulong %18 0 + %20 = OpShiftLeftLogical %ulong %19 %ulong_32 + %21 = OpShiftRightArithmetic %ulong %20 %ulong_32 + %22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %lhs %21 + %23 = OpLoad %v4half %22 Aligned 8 + %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %rhs %21 + %25 = OpLoad %half %24 Aligned 2 + %26 = OpVectorTimesScalar %v4half %23 %25 + %27 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %res %21 + OpStore %27 %26 Aligned 8 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/test_op_negate.cpp b/test_conformance/spirv_new/test_op_negate.cpp index e3dc1f349..5009be931 100644 --- a/test_conformance/spirv_new/test_op_negate.cpp +++ b/test_conformance/spirv_new/test_op_negate.cpp @@ -32,6 +32,15 @@ int test_negation(cl_device_id deviceID, return 0; } } + if (std::string(Tname).find("half") != std::string::npos) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info( + "Extension cl_khr_fp16 not supported; skipping half tests.\n"); + return 0; + } + } cl_int err = CL_SUCCESS; int num = (int)h_in.size(); @@ -73,29 +82,28 @@ int test_negation(cl_device_id deviceID, return 0; } -#define TEST_NEGATION(TYPE, Tv, OP, FUNC) \ - TEST_SPIRV_FUNC(OP##_##TYPE) \ - { \ - int num = 1 << 20; \ - std::vector in(num); \ - RandomSeed seed(gRandomSeed); \ - for (int i = 0; i < num; i++) { \ - in[i] = genrand(seed); \ - } \ - return test_negation(deviceID, \ - context, \ - queue, \ - #TYPE, \ - #OP, \ - in, FUNC); \ - } \ +#define TEST_NEGATION(TYPE, Tv, OP, FUNC) \ + TEST_SPIRV_FUNC(OP##_##TYPE) \ + { \ + int num = 1 << 20; \ + std::vector in(num); \ + RandomSeed seed(gRandomSeed); \ + for (int i = 0; i < num; i++) \ + { \ + in[i] = genrand(seed); \ + } \ + return test_negation(deviceID, context, queue, #TYPE, #OP, in, \ + FUNC); \ + } +#define TEST_NEG_HALF TEST_NEGATION(half, cl_half, op_neg, negOpHalf) #define TEST_NEG(TYPE) TEST_NEGATION(TYPE, cl_##TYPE, op_neg, negOp) #define TEST_NOT(TYPE) TEST_NEGATION(TYPE, cl_##TYPE, op_not, notOp) #define TEST_NEG_VEC(TYPE, N) TEST_NEGATION(TYPE##N, cl_##TYPE##N, op_neg, (negOpVec)) #define TEST_NOT_VEC(TYPE, N) TEST_NEGATION(TYPE##N, cl_##TYPE##N, op_not, (notOpVec)) +TEST_NEG_HALF TEST_NEG(float) TEST_NEG(double) TEST_NEG(int) diff --git a/test_conformance/spirv_new/test_op_vector_extract.cpp b/test_conformance/spirv_new/test_op_vector_extract.cpp index fe1f82538..f77aa7a2e 100644 --- a/test_conformance/spirv_new/test_op_vector_extract.cpp +++ b/test_conformance/spirv_new/test_op_vector_extract.cpp @@ -25,6 +25,17 @@ int test_extract(cl_device_id deviceID, cl_context context, return 0; } } + + if (std::string(name).find("half") != std::string::npos) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info( + "Extension cl_khr_fp16 not supported; skipping half tests.\n"); + return 0; + } + } + cl_int err = CL_SUCCESS; clProgramWrapper prog; @@ -76,27 +87,30 @@ int test_extract(cl_device_id deviceID, cl_context context, return 0; } -#define TEST_VECTOR_EXTRACT(TYPE, N) \ - TEST_SPIRV_FUNC(op_vector_##TYPE##N##_extract) \ - { \ - typedef cl_##TYPE##N Tv; \ - typedef cl_##TYPE Ts; \ - const int num = 1 << 20; \ - std::vector in(num); \ - const char *name = "vector_" #TYPE #N "_extract"; \ - \ - RandomSeed seed(gRandomSeed); \ - \ - for (int i = 0; i < num; i++) { \ - in[i] = genrand(seed); \ - } \ - \ - return test_extract(deviceID, \ - context, queue, \ - name, \ - in, N); \ +#define TEST_VECTOR_EXTRACT(TYPE, N) \ + TEST_SPIRV_FUNC(op_vector_##TYPE##N##_extract) \ + { \ + if (sizeof(cl_##TYPE) == 2) \ + { \ + PASSIVE_REQUIRE_FP16_SUPPORT(deviceID); \ + } \ + typedef cl_##TYPE##N Tv; \ + typedef cl_##TYPE Ts; \ + const int num = 1 << 20; \ + std::vector in(num); \ + const char *name = "vector_" #TYPE #N "_extract"; \ + \ + RandomSeed seed(gRandomSeed); \ + \ + for (int i = 0; i < num; i++) \ + { \ + in[i] = genrand(seed); \ + } \ + \ + return test_extract(deviceID, context, queue, name, in, N); \ } +TEST_VECTOR_EXTRACT(half, 8) TEST_VECTOR_EXTRACT(int, 4) TEST_VECTOR_EXTRACT(float, 4) TEST_VECTOR_EXTRACT(long, 2) diff --git a/test_conformance/spirv_new/test_op_vector_insert.cpp b/test_conformance/spirv_new/test_op_vector_insert.cpp index 0749c14ab..62fc78cb5 100644 --- a/test_conformance/spirv_new/test_op_vector_insert.cpp +++ b/test_conformance/spirv_new/test_op_vector_insert.cpp @@ -25,6 +25,17 @@ int test_insert(cl_device_id deviceID, cl_context context, return 0; } } + + if (std::string(name).find("half") != std::string::npos) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info( + "Extension cl_khr_fp16 not supported; skipping half tests.\n"); + return 0; + } + } + cl_int err = CL_SUCCESS; clProgramWrapper prog; err = get_program_with_il(prog, deviceID, context, name); @@ -94,27 +105,30 @@ int test_insert(cl_device_id deviceID, cl_context context, return 0; } -#define TEST_VECTOR_INSERT(TYPE, N) \ - TEST_SPIRV_FUNC(op_vector_##TYPE##N##_insert) \ - { \ - typedef cl_##TYPE##N Tv; \ - typedef cl_##TYPE Ts; \ - const int num = 1 << 20; \ - std::vector in(num); \ - const char *name = "vector_" #TYPE #N "_insert"; \ - \ - RandomSeed seed(gRandomSeed); \ - \ - for (int i = 0; i < num; i++) { \ - in[i] = genrand(seed); \ - } \ - \ - return test_insert(deviceID, \ - context, queue, \ - name, \ - in, N); \ +#define TEST_VECTOR_INSERT(TYPE, N) \ + TEST_SPIRV_FUNC(op_vector_##TYPE##N##_insert) \ + { \ + if (sizeof(cl_##TYPE) == 2) \ + { \ + PASSIVE_REQUIRE_FP16_SUPPORT(deviceID); \ + } \ + typedef cl_##TYPE##N Tv; \ + typedef cl_##TYPE Ts; \ + const int num = 1 << 20; \ + std::vector in(num); \ + const char *name = "vector_" #TYPE #N "_insert"; \ + \ + RandomSeed seed(gRandomSeed); \ + \ + for (int i = 0; i < num; i++) \ + { \ + in[i] = genrand(seed); \ + } \ + \ + return test_insert(deviceID, context, queue, name, in, N); \ } +TEST_VECTOR_INSERT(half, 8) TEST_VECTOR_INSERT(int, 4) TEST_VECTOR_INSERT(float, 4) TEST_VECTOR_INSERT(long, 2) diff --git a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp index 0859668cb..0be4e8b71 100644 --- a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp +++ b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp @@ -17,6 +17,8 @@ or Khronos Conformance Test Source License Agreement as executed between Khronos #include #include +using half = cl_half; + template int test_vector_times_scalar(cl_device_id deviceID, cl_context context, @@ -32,6 +34,16 @@ int test_vector_times_scalar(cl_device_id deviceID, } } + if (std::string(Tname).find("half") != std::string::npos) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info("Extension cl_khr_fp16 not supported; skipping half " + "tests.\n"); + return 0; + } + } + cl_int err = CL_SUCCESS; int num = (int)h_lhs.size(); size_t lhs_bytes = num * sizeof(Tv); @@ -171,5 +183,7 @@ int test_vector_times_scalar(cl_device_id deviceID, lhs, rhs); \ } + TEST_VECTOR_TIMES_SCALAR(float, 4) TEST_VECTOR_TIMES_SCALAR(double, 4) +TEST_VECTOR_TIMES_SCALAR(half, 4) diff --git a/test_conformance/spirv_new/types.hpp b/test_conformance/spirv_new/types.hpp index e7fceba0c..728b24455 100644 --- a/test_conformance/spirv_new/types.hpp +++ b/test_conformance/spirv_new/types.hpp @@ -43,6 +43,8 @@ VEC_NOT_EQ_FUNC(cl_float, 2) VEC_NOT_EQ_FUNC(cl_float, 4) VEC_NOT_EQ_FUNC(cl_double, 2) VEC_NOT_EQ_FUNC(cl_double, 4) +VEC_NOT_EQ_FUNC(cl_half, 2) +VEC_NOT_EQ_FUNC(cl_half, 4) template bool isNotEqual(const T &lhs, const T &rhs) @@ -109,6 +111,9 @@ GENRAND_REAL_FUNC(cl_float, 2) GENRAND_REAL_FUNC(cl_float, 4) GENRAND_REAL_FUNC(cl_double, 2) GENRAND_REAL_FUNC(cl_double, 4) +GENRAND_REAL_FUNC(cl_half, 2) +GENRAND_REAL_FUNC(cl_half, 4) +GENRAND_REAL_FUNC(cl_half, 8) template<> inline cl_half genrandReal(RandomSeed &seed) { @@ -157,6 +162,8 @@ Tv negOp(Tv in) return -in; } +inline cl_half negOpHalf(cl_half v) { return v ^ 0x8000; } + template Tv notOp(Tv in) { diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index f779ef370..d9dfc3b8c 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -483,29 +483,30 @@ template struct SHF static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, const WorkGroupParams &test_params) { - int ii, i, j, k, n; + int ii, k; + size_t n; cl_uint l; - int nw = test_params.local_workgroup_size; - int ns = test_params.subgroup_size; + size_t nw = test_params.local_workgroup_size; + size_t ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; - int nj = (nw + ns - 1) / ns; + size_t nj = (nw + ns - 1) / ns; Ty tr, rr; ng = ng / nw; for (k = 0; k < ng; ++k) { // for each work_group - for (j = 0; j < nw; ++j) + for (size_t j = 0; j < nw; ++j) { // inside the work_group mx[j] = x[j]; // read host inputs for work_group my[j] = y[j]; // read device outputs for work_group } - for (j = 0; j < nj; ++j) + for (size_t j = 0; j < nj; ++j) { // for each subgroup ii = j * ns; n = ii + ns > nw ? nw - ii : ns; - for (i = 0; i < n; ++i) + for (size_t i = 0; i < n; ++i) { // inside the subgroup // shuffle index storage int midx = 4 * ii + 4 * i + 2; diff --git a/test_conformance/subgroups/subhelpers.cpp b/test_conformance/subgroups/subhelpers.cpp index 11268f640..440cde20f 100644 --- a/test_conformance/subgroups/subhelpers.cpp +++ b/test_conformance/subgroups/subhelpers.cpp @@ -206,7 +206,7 @@ void set_last_workgroup_params(int non_uniform_size, int &number_of_subgroups, } void fill_and_shuffle_safe_values(std::vector &safe_values, - int sb_size) + size_t sb_size) { // max product is 720, cl_half has enough precision for it const std::vector non_one_values{ 2, 3, 4, 5, 6 }; diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index bcb523cf8..ed92e5d3c 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -44,7 +44,7 @@ cl_uint4 generate_bit_mask(cl_uint subgroup_local_id, // for each subgroup values defined different values // for rest of workitems set 1 shuffle values void fill_and_shuffle_safe_values(std::vector &safe_values, - int sb_size); + size_t sb_size); struct WorkGroupParams { diff --git a/test_conformance/subgroups/test_workitem.cpp b/test_conformance/subgroups/test_workitem.cpp index b69f31385..5b2a5eb83 100644 --- a/test_conformance/subgroups/test_workitem.cpp +++ b/test_conformance/subgroups/test_workitem.cpp @@ -36,7 +36,7 @@ struct get_test_data }; static int check_group(const get_test_data *result, int nw, cl_uint ensg, - int maxwgs) + size_t maxwgs) { int first = -1; int last = -1; @@ -168,7 +168,7 @@ static int check_group(const get_test_data *result, int nw, cl_uint ensg, j = (result[first].subGroupSize + 31) / 32 * result[i].subGroupId + (result[i].subGroupLocalId >> 5); - if (j < sizeof(hit) / 4) + if (j < static_cast(sizeof(hit) / 4)) { cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU); if ((hit[j] & b) != 0) @@ -191,7 +191,7 @@ int test_work_item_functions(cl_device_id device, cl_context context, static const size_t lsize = 200; int error; int i, j, k, q, r, nw; - int maxwgs; + size_t maxwgs; cl_uint ensg; size_t global; size_t local; @@ -235,7 +235,7 @@ int test_work_item_functions(cl_device_id device, cl_context context, error = get_max_allowed_work_group_size(context, kernel, &local, NULL); if (error != 0) return error; - maxwgs = (int)local; + maxwgs = local; // Limit it a bit so we have muliple work groups // Ideally this will still be large enough to give us multiple subgroups diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp index 5901420ad..3d7b30e76 100644 --- a/test_conformance/vulkan/main.cpp +++ b/test_conformance/vulkan/main.cpp @@ -52,7 +52,8 @@ static void params_reset() } extern int test_buffer_common(cl_device_id device_, cl_context context_, - cl_command_queue queue_, int numElements_); + cl_command_queue queue_, int numElements_, + float use_fence); extern int test_image_common(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_); @@ -61,7 +62,7 @@ int test_buffer_single_queue(cl_device_id device_, cl_context context_, { params_reset(); log_info("RUNNING TEST WITH ONE QUEUE...... \n\n"); - return test_buffer_common(device_, context_, queue_, numElements_); + return test_buffer_common(device_, context_, queue_, numElements_, false); } int test_buffer_multiple_queue(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_) @@ -69,7 +70,7 @@ int test_buffer_multiple_queue(cl_device_id device_, cl_context context_, params_reset(); numCQ = 2; log_info("RUNNING TEST WITH TWO QUEUE...... \n\n"); - return test_buffer_common(device_, context_, queue_, numElements_); + return test_buffer_common(device_, context_, queue_, numElements_, false); } int test_buffer_multiImport_sameCtx(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_) @@ -78,7 +79,7 @@ int test_buffer_multiImport_sameCtx(cl_device_id device_, cl_context context_, multiImport = true; log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT " "IN SAME CONTEXT...... \n\n"); - return test_buffer_common(device_, context_, queue_, numElements_); + return test_buffer_common(device_, context_, queue_, numElements_, false); } int test_buffer_multiImport_diffCtx(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_) @@ -88,7 +89,45 @@ int test_buffer_multiImport_diffCtx(cl_device_id device_, cl_context context_, multiCtx = true; log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT " "IN DIFFERENT CONTEXT...... \n\n"); - return test_buffer_common(device_, context_, queue_, numElements_); + return test_buffer_common(device_, context_, queue_, numElements_, false); +} +int test_buffer_single_queue_fence(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_) +{ + params_reset(); + log_info("RUNNING TEST WITH ONE QUEUE...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_, true); +} +int test_buffer_multiple_queue_fence(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_) +{ + params_reset(); + numCQ = 2; + log_info("RUNNING TEST WITH TWO QUEUE...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_, true); +} +int test_buffer_multiImport_sameCtx_fence(cl_device_id device_, + cl_context context_, + cl_command_queue queue_, + int numElements_) +{ + params_reset(); + multiImport = true; + log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT " + "IN SAME CONTEXT...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_, true); +} +int test_buffer_multiImport_diffCtx_fence(cl_device_id device_, + cl_context context_, + cl_command_queue queue_, + int numElements_) +{ + params_reset(); + multiImport = true; + multiCtx = true; + log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT " + "IN DIFFERENT CONTEXT...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_, true); } int test_image_single_queue(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_) @@ -110,6 +149,10 @@ test_definition test_list[] = { ADD_TEST(buffer_single_queue), ADD_TEST(buffer_multiple_queue), ADD_TEST(buffer_multiImport_sameCtx), ADD_TEST(buffer_multiImport_diffCtx), + ADD_TEST(buffer_single_queue_fence), + ADD_TEST(buffer_multiple_queue_fence), + ADD_TEST(buffer_multiImport_sameCtx_fence), + ADD_TEST(buffer_multiImport_diffCtx_fence), ADD_TEST(image_single_queue), ADD_TEST(image_multiple_queue), ADD_TEST(consistency_external_buffer), diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp index 9b0bc9de7..5390ef690 100644 --- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "harness/errorHelpers.h" @@ -82,7 +83,8 @@ __kernel void checkKernel(__global unsigned char *ptr, int size, int expVal, __g int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, cl_command_queue &cmd_queue2, cl_kernel *kernel, cl_kernel &verify_kernel, VulkanDevice &vkDevice, - uint32_t numBuffers, uint32_t bufferSize) + uint32_t numBuffers, uint32_t bufferSize, + bool use_fence) { int err = CL_SUCCESS; size_t global_work_size[1]; @@ -117,6 +119,7 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + std::shared_ptr fence = nullptr; VulkanQueue &vkQueue = vkDevice.getQueue(); @@ -136,10 +139,17 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, vkDescriptorSetLayout); - clVk2CLExternalSemaphore = new clExternalSemaphore( - vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore = new clExternalSemaphore( - vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + if (use_fence) + { + fence = std::make_shared(vkDevice); + } + else + { + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + } const uint32_t maxIter = innerIterations; VulkanCommandPool vkCommandPool(vkDevice); @@ -227,16 +237,27 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + + clVk2CLExternalSemaphore->wait(cmd_queue1); } - clVk2CLExternalSemaphore->wait(cmd_queue1); + err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t), (void *)&bufferSize); @@ -286,7 +307,14 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, goto CLEANUP; } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFlush(cmd_queue1); + clFlush(cmd_queue2); + clFinish(cmd_queue1); + clFinish(cmd_queue2); + } + else if (!use_fence && iter != (maxIter - 1)) { clCl2VkExternalSemaphore->signal(cmd_queue2); } @@ -387,8 +415,11 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, } if (program) clReleaseProgram(program); if (kernel_cq) clReleaseKernel(kernel_cq); - if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; - if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + if (!use_fence) + { + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + } if (error_2) free(error_2); if (error_1) clReleaseMemObject(error_1); @@ -398,7 +429,7 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel, cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, - uint32_t bufferSize) + uint32_t bufferSize, bool use_fence) { log_info("RUNNING TEST WITH ONE QUEUE...... \n\n"); size_t global_work_size[1]; @@ -416,6 +447,7 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + std::shared_ptr fence = nullptr; VulkanQueue &vkQueue = vkDevice.getQueue(); @@ -434,10 +466,18 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, vkDescriptorSetLayout); - clVk2CLExternalSemaphore = new clExternalSemaphore( - vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore = new clExternalSemaphore( - vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + if (use_fence) + { + fence = std::make_shared(vkDevice); + } + else + { + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + } + const uint32_t maxIter = innerIterations; VulkanCommandPool vkCommandPool(vkDevice); VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool); @@ -526,16 +566,26 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + + clVk2CLExternalSemaphore->wait(cmd_queue1); } - clVk2CLExternalSemaphore->wait(cmd_queue1); err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t), (void *)&bufferSize); @@ -562,7 +612,12 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, " error\n"); goto CLEANUP; } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFlush(cmd_queue1); + clFinish(cmd_queue1); + } + else if (!use_fence && (iter != (maxIter - 1))) { clCl2VkExternalSemaphore->signal(cmd_queue1); } @@ -656,8 +711,13 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, delete externalMemory[i]; } } - if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; - if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + + if (!use_fence) + { + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + } + if (error_2) free(error_2); if (error_1) clReleaseMemObject(error_1); return err; @@ -666,7 +726,7 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, int run_test_with_multi_import_same_ctx( cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel, cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, - uint32_t bufferSize, uint32_t bufferSizeForOffset) + uint32_t bufferSize, uint32_t bufferSizeForOffset, float use_fence) { size_t global_work_size[1]; uint8_t *error_2; @@ -687,6 +747,7 @@ int run_test_with_multi_import_same_ctx( getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + std::shared_ptr fence = nullptr; VulkanQueue &vkQueue = vkDevice.getQueue(); @@ -706,10 +767,18 @@ int run_test_with_multi_import_same_ctx( VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, vkDescriptorSetLayout); - clVk2CLExternalSemaphore = new clExternalSemaphore( - vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore = new clExternalSemaphore( - vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + if (use_fence) + { + fence = std::make_shared(vkDevice); + } + else + { + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + } + const uint32_t maxIter = innerIterations; VulkanCommandPool vkCommandPool(vkDevice); VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool); @@ -832,16 +901,34 @@ int run_test_with_multi_import_same_ctx( for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } } - clVk2CLExternalSemaphore->wait(cmd_queue1); + + if (use_fence) + { + fence->wait(); + } + else + { + clVk2CLExternalSemaphore->wait(cmd_queue1); + } + for (uint8_t launchIter = 0; launchIter < numImports; launchIter++) { @@ -874,7 +961,11 @@ int run_test_with_multi_import_same_ctx( goto CLEANUP; } } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFinish(cmd_queue1); + } + else if (!use_fence && iter != (maxIter - 1)) { clCl2VkExternalSemaphore->signal(cmd_queue1); } @@ -987,8 +1078,13 @@ int run_test_with_multi_import_same_ctx( } } } - if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; - if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + + if (!use_fence) + { + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + } + if (error_2) free(error_2); if (error_1) clReleaseMemObject(error_1); return err; @@ -998,7 +1094,8 @@ int run_test_with_multi_import_diff_ctx( cl_context &context, cl_context &context2, cl_command_queue &cmd_queue1, cl_command_queue &cmd_queue2, cl_kernel *kernel1, cl_kernel *kernel2, cl_kernel &verify_kernel, cl_kernel verify_kernel2, VulkanDevice &vkDevice, - uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset) + uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset, + float use_fence) { size_t global_work_size[1]; uint8_t *error_3; @@ -1023,6 +1120,7 @@ int run_test_with_multi_import_diff_ctx( getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + std::shared_ptr fence = nullptr; VulkanQueue &vkQueue = vkDevice.getQueue(); @@ -1042,15 +1140,24 @@ int run_test_with_multi_import_diff_ctx( VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, vkDescriptorSetLayout); - clVk2CLExternalSemaphore = new clExternalSemaphore( - vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore = new clExternalSemaphore( - vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - - clVk2CLExternalSemaphore2 = new clExternalSemaphore( - vkVk2CLSemaphore, context2, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore2 = new clExternalSemaphore( - vkCl2VkSemaphore, context2, vkExternalSemaphoreHandleType, deviceId); + if (use_fence) + { + fence = std::make_shared(vkDevice); + } + else + { + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + + clVk2CLExternalSemaphore2 = + new clExternalSemaphore(vkVk2CLSemaphore, context2, + vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore2 = + new clExternalSemaphore(vkCl2VkSemaphore, context2, + vkExternalSemaphoreHandleType, deviceId); + } const uint32_t maxIter = innerIterations; VulkanCommandPool vkCommandPool(vkDevice); @@ -1192,16 +1299,33 @@ int run_test_with_multi_import_diff_ctx( for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + } + + if (use_fence) + { + fence->wait(); + } + else + { + clVk2CLExternalSemaphore->wait(cmd_queue1); } - clVk2CLExternalSemaphore->wait(cmd_queue1); for (uint8_t launchIter = 0; launchIter < numImports; launchIter++) @@ -1235,7 +1359,11 @@ int run_test_with_multi_import_diff_ctx( goto CLEANUP; } } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFinish(cmd_queue1); + } + else if (!use_fence && iter != (maxIter - 1)) { clCl2VkExternalSemaphore->signal(cmd_queue1); } @@ -1243,16 +1371,33 @@ int run_test_with_multi_import_diff_ctx( clFinish(cmd_queue1); for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + } + + if (use_fence) + { + fence->wait(); + } + else + { + clVk2CLExternalSemaphore2->wait(cmd_queue2); } - clVk2CLExternalSemaphore2->wait(cmd_queue2); for (uint8_t launchIter = 0; launchIter < numImports; launchIter++) @@ -1286,7 +1431,11 @@ int run_test_with_multi_import_diff_ctx( goto CLEANUP; } } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFinish(cmd_queue2); + } + else if (!use_fence && iter != (maxIter - 1)) { clCl2VkExternalSemaphore2->signal(cmd_queue2); } @@ -1474,10 +1623,15 @@ int run_test_with_multi_import_diff_ctx( } } } - if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; - if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; - if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2; - if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2; + + if (!use_fence) + { + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2; + if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2; + } + if (error_3) free(error_3); if (error_1) clReleaseMemObject(error_1); if (error_2) clReleaseMemObject(error_2); @@ -1485,7 +1639,8 @@ int run_test_with_multi_import_diff_ctx( } int test_buffer_common(cl_device_id device_, cl_context context_, - cl_command_queue queue_, int numElements_) + cl_command_queue queue_, int numElements_, + float use_fence) { int current_device = 0; @@ -1738,26 +1893,26 @@ int test_buffer_common(cl_device_id device_, cl_context context_, { errNum = run_test_with_multi_import_same_ctx( context, cmd_queue1, kernel, verify_kernel, vkDevice, - numBuffers, bufferSize, bufferSizeForOffset); + numBuffers, bufferSize, bufferSizeForOffset, use_fence); } else if (multiImport && multiCtx) { errNum = run_test_with_multi_import_diff_ctx( context, context2, cmd_queue1, cmd_queue3, kernel, kernel2, verify_kernel, verify_kernel2, vkDevice, numBuffers, - bufferSize, bufferSizeForOffset); + bufferSize, bufferSizeForOffset, use_fence); } else if (numCQ == 2) { errNum = run_test_with_two_queue( context, cmd_queue1, cmd_queue2, kernel, verify_kernel, - vkDevice, numBuffers + 1, bufferSize); + vkDevice, numBuffers + 1, bufferSize, use_fence); } else { - errNum = run_test_with_one_queue(context, cmd_queue1, kernel, - verify_kernel, vkDevice, - numBuffers, bufferSize); + errNum = run_test_with_one_queue( + context, cmd_queue1, kernel, verify_kernel, vkDevice, + numBuffers, bufferSize, use_fence); } if (errNum != CL_SUCCESS) { diff --git a/test_conformance/workgroups/test_wg_all.cpp b/test_conformance/workgroups/test_wg_all.cpp index 41abd1249..f9b574e45 100644 --- a/test_conformance/workgroups/test_wg_all.cpp +++ b/test_conformance/workgroups/test_wg_all.cpp @@ -75,7 +75,6 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu size_t wg_size[1]; size_t num_elements; int err; - int i; MTdata d; err = create_single_kernel_helper(context, &program, &kernel, 1, @@ -110,7 +109,7 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu p = input_ptr[0]; d = init_genrand( gRandomSeed ); - for (i=0; i<(num_elements+1); i++) + for (size_t i = 0; i < (num_elements + 1); i++) { p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d); } diff --git a/test_conformance/workgroups/test_wg_any.cpp b/test_conformance/workgroups/test_wg_any.cpp index e0242cfb4..f7ff899a3 100644 --- a/test_conformance/workgroups/test_wg_any.cpp +++ b/test_conformance/workgroups/test_wg_any.cpp @@ -75,7 +75,6 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu size_t wg_size[1]; size_t num_elements; int err; - int i; MTdata d; err = create_single_kernel_helper(context, &program, &kernel, 1, @@ -110,7 +109,7 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu p = input_ptr[0]; d = init_genrand( gRandomSeed ); - for (i=0; i<(num_elements+1); i++) + for (size_t i = 0; i < (num_elements + 1); i++) { p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d); } diff --git a/test_conformance/workgroups/test_wg_broadcast.cpp b/test_conformance/workgroups/test_wg_broadcast.cpp index e24ac7b98..a4cb0c6fe 100644 --- a/test_conformance/workgroups/test_wg_broadcast.cpp +++ b/test_conformance/workgroups/test_wg_broadcast.cpp @@ -70,7 +70,7 @@ verify_wg_broadcast_1D(float *inptr, float *outptr, size_t n, size_t wg_size) for (i=0,group_id=0; i wg_size ? wg_size : (n-i); + size_t local_size = (n - i) > wg_size ? wg_size : (n - i); float broadcast_result = inptr[i + (group_id % local_size)]; for (j=0; j