From 095091bc5755fb3a239f049a6a8ade1d82169fc6 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 13 Jun 2023 08:39:22 +0200 Subject: [PATCH 1/5] Added cl_khr_fp16 extension support for test_vec_type_hint from basic (#1724) * Added cl_khr_fp16 extension support for test_vec_type_hint from basic (issue #142, basic) * Added correction to fix casting problem --- test_conformance/basic/test_vec_type_hint.cpp | 152 ++++++++++-------- 1 file changed, 85 insertions(+), 67 deletions(-) diff --git a/test_conformance/basic/test_vec_type_hint.cpp b/test_conformance/basic/test_vec_type_hint.cpp index 33168b136..0ba105db6 100644 --- a/test_conformance/basic/test_vec_type_hint.cpp +++ b/test_conformance/basic/test_vec_type_hint.cpp @@ -13,28 +13,27 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include #include #include #include #include - +#include #include "procs.h" #include "harness/conversions.h" #include "harness/typeWrappers.h" - static const char *sample_kernel = { - "%s\n" // optional pragma string - "__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global int *src, __global int *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = src[tid];\n" - "\n" - "}\n" + "%s\n" + "__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global " + "int *src, __global int *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + " dst[tid] = src[tid];\n" + "\n" + "}\n" }; int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) @@ -42,66 +41,85 @@ int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_que int error; int vec_type_index, vec_size_index; - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble }; - const char *size_names[] = {"", "2", "4", "8", "16"}; - char *program_source; - - program_source = (char*)malloc(sizeof(char)*4096); + ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, + kLong, kULong, kFloat, kHalf, kDouble }; + const char *size_names[] = { "", "2", "4", "8", "16" }; + std::vector program_source(4096); + + for (vec_type_index = 0; + vec_type_index < sizeof(vecType) / sizeof(vecType[0]); vec_type_index++) + { + + if (vecType[vec_type_index] == kHalf + && !is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info( + "Extension cl_khr_fp16 not supported; skipping half tests.\n"); + continue; + } + else if (vecType[vec_type_index] == kDouble + && !is_extension_available(deviceID, "cl_khr_fp64")) + { + log_info( + "Extension cl_khr_fp64 not supported; skipping double tests.\n"); + continue; + } + else if ((vecType[vec_type_index] == kLong + || vecType[vec_type_index] == kULong) + && !gHasLong) + { + log_info( + "Extension cl_khr_int64 not supported; skipping long tests.\n"); + continue; + } - for (vec_type_index=0; vec_type_index<10; vec_type_index++) { - if (vecType[vec_type_index] == kDouble) { - if (!is_extension_available(deviceID, "cl_khr_fp64")) { - log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n"); - continue; + for (vec_size_index = 0; vec_size_index < 5; vec_size_index++) + { + clProgramWrapper program; + clKernelWrapper kernel; + clMemWrapper in, out; + size_t global[] = { 1, 1, 1 }; + + log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", + get_explicit_type_name(vecType[vec_type_index]), + size_names[vec_size_index]); + char extension[128] = { 0 }; + if (vecType[vec_type_index] == kDouble) + std::snprintf(extension, sizeof(extension), + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"); + else if (vecType[vec_type_index] == kHalf) + std::snprintf(extension, sizeof(extension), + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"); + + sprintf(program_source.data(), sample_kernel, extension, + get_explicit_type_name(vecType[vec_type_index]), + size_names[vec_size_index]); + + const char *src = &program_source.front(); + error = create_single_kernel_helper(context, &program, &kernel, 1, + &src, "sample_test"); + test_error(error, "create_single_kernel_helper failed"); + + in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * 10, + NULL, &error); + test_error(error, "clCreateBuffer failed"); + out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int) * 10, + NULL, &error); + test_error(error, "clCreateBuffer failed"); + + error = clSetKernelArg(kernel, 0, sizeof(in), &in); + test_error(error, "clSetKernelArg failed"); + error = clSetKernelArg(kernel, 1, sizeof(out), &out); + test_error(error, "clSetKernelArg failed"); + + error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, + 0, NULL, NULL); + test_error(error, "clEnqueueNDRangeKernel failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); } - log_info("Testing doubles.\n"); - } - - if (vecType[vec_type_index] == kLong || vecType[vec_type_index] == kULong) - { - if (!gHasLong) - { - log_info("Extension cl_khr_int64 not supported; skipping long tests.\n"); - continue; - } - } - - for (vec_size_index=0; vec_size_index<5; vec_size_index++) { - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper in, out; - size_t global[] = {1,1,1}; - - log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]); - - program_source[0] = '\0'; - sprintf(program_source, sample_kernel, - (vecType[vec_type_index] == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]); - - error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&program_source, "sample_test" ); - if( error != 0 ) - return error; - - in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*10, NULL, &error); - test_error(error, "clCreateBuffer failed"); - out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*10, NULL, &error); - test_error(error, "clCreateBuffer failed"); - - error = clSetKernelArg(kernel, 0, sizeof(in), &in); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 1, sizeof(out), &out); - test_error(error, "clSetKernelArg failed"); - - error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL); - test_error(error, "clEnqueueNDRangeKernel failed"); - - error = clFinish(queue); - test_error(error, "clFinish failed"); - } } - free(program_source); - return 0; } From 16a75dc0af2e0c55d27a91ffefd0aa1b97b3f484 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 13 Jun 2023 17:41:39 +0200 Subject: [PATCH 2/5] Added cl_khr_fp16 extension support for test_vector_creation from basic (#1728) * Added cl_khr_fp16 extension support for vector_creation test from basic * Added corrections related to vendor's review * Added protection to avoid similar creation cases * Added comment for recent correction * cosmetics * Corrected factor array to restore lost capacity of original test.. leaving only 16-sizes vector tests limited. --- .../basic/test_vector_creation.cpp | 489 +++++++++++------- 1 file changed, 294 insertions(+), 195 deletions(-) diff --git a/test_conformance/basic/test_vector_creation.cpp b/test_conformance/basic/test_vector_creation.cpp index d9530b4e9..801c72b18 100644 --- a/test_conformance/basic/test_vector_creation.cpp +++ b/test_conformance/basic/test_vector_creation.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -17,48 +17,41 @@ #include "harness/conversions.h" #include "harness/typeWrappers.h" #include "harness/errorHelpers.h" +#include - - +#include #define DEBUG 0 #define DEPTH 16 // Limit the maximum code size for any given kernel. -#define MAX_CODE_SIZE (1024*32) - -const int sizes[] = {1, 2, 3, 4, 8, 16, -1, -1, -1, -1}; -const char *size_names[] = {"", "2", "3", "4", "8", "16" , "!!a", "!!b", "!!c", "!!d"}; - -// Creates a kernel by enumerating all possible ways of building the vector out of vloads -// skip_to_results will skip results up to a given number. If the amount of code generated -// is greater than MAX_CODE_SIZE, this function will return the number of results used, -// which can then be used as the skip_to_result value to continue where it left off. -int create_kernel(ExplicitType type, int output_size, char *program, int *number_of_results, int skip_to_result) { +#define MAX_CODE_SIZE (1024 * 32) + +static const int sizes[] = { 1, 2, 3, 4, 8, 16, -1, -1, -1, -1 }; +static const int initial_no_sizes[] = { 0, 0, 0, 0, 0, 0, 2 }; +static const char *size_names[] = { "", "2", "3", "4", "8", + "16", "!!a", "!!b", "!!c", "!!d" }; +static char extension[128] = { 0 }; + +// Creates a kernel by enumerating all possible ways of building the vector out +// of vloads skip_to_results will skip results up to a given number. If the +// amount of code generated is greater than MAX_CODE_SIZE, this function will +// return the number of results used, which can then be used as the +// skip_to_result value to continue where it left off. +int create_kernel(ExplicitType type, int output_size, char *program, + int *number_of_results, int skip_to_result) +{ int number_of_sizes; - switch (output_size) { - case 1: - number_of_sizes = 1; - break; - case 2: - number_of_sizes = 2; - break; - case 3: - number_of_sizes = 3; - break; - case 4: - number_of_sizes = 4; - break; - case 8: - number_of_sizes = 5; - break; - case 16: - number_of_sizes = 6; - break; - default: - log_error("Invalid size: %d\n", output_size); - return -1; + switch (output_size) + { + case 1: number_of_sizes = 1; break; + case 2: number_of_sizes = 2; break; + case 3: number_of_sizes = 3; break; + case 4: number_of_sizes = 4; break; + case 8: number_of_sizes = 5; break; + case 16: number_of_sizes = 6; break; + default: log_error("Invalid size: %d\n", output_size); return -1; } int total_results = 0; @@ -67,102 +60,125 @@ int create_kernel(ExplicitType type, int output_size, char *program, int *number int total_program_length = 0; int aborted_due_to_size = 0; - if (skip_to_result < 0) - skip_to_result = 0; + if (skip_to_result < 0) skip_to_result = 0; // The line of code for the vector creation char line[1024]; - // Keep track of what size vector we are using in each position so we can iterate through all fo them + // Keep track of what size vector we are using in each position so we can + // iterate through all fo them int pos[DEPTH]; int max_size = output_size; if (DEBUG > 1) log_info("max_size: %d\n", max_size); program[0] = '\0'; - sprintf(program, "%s\n__kernel void test_vector_creation(__global %s *src, __global %s%s *result) {\n", - type == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name(type), get_explicit_type_name(type), ( number_of_sizes == 3 ) ? "" : size_names[number_of_sizes-1]); + sprintf(program, + "%s\n__kernel void test_vector_creation(__global %s *src, __global " + "%s%s *result) {\n", + extension, get_explicit_type_name(type), + get_explicit_type_name(type), + (number_of_sizes == 3) ? "" : size_names[number_of_sizes - 1]); total_program_length += (int)strlen(program); - char storePrefix[ 128 ], storeSuffix[ 128 ]; + char storePrefix[128], storeSuffix[128]; - // Start out trying sizes 1,1,1,1,1... - for (int i=0; i 1) { + while (!done) + { + if (DEBUG > 1) + { log_info("pos size[] = ["); - for (int k=0; k 1) log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far); + if (DEBUG > 1) + log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far); - // If they did not fit the required size exactly it is too long, so there is no point in checking any other combinations + // If they did not fit the required size exactly it is too long, so + // there is no point in checking any other combinations // of the sizes to the right. Prune them from the search. - if (size_so_far != max_size) { + if (size_so_far != max_size) + { // Zero all the sizes to the right - for (int k=vloads+1; k=0; d--) { + for (int d = vloads; d >= 0; d--) + { pos[d]++; - if (pos[d] >= number_of_sizes) { + if (pos[d] >= number_of_sizes) + { pos[d] = 0; - if (d == 0) { + if (d == 0) + { // If we rolled over then we are done done = 1; break; } - } else { + } + else + { break; } } - // Go on to the next size since this one (and all others "under" it) didn't fit + // Go on to the next size since this one (and all others "under" it) + // didn't fit continue; } // Generate the actual load line if we are building this part - line[0]= '\0'; - if (skip_to_result == 0 || total_results >= skip_to_result) { - if( number_of_sizes == 3 ) + line[0] = '\0'; + if (skip_to_result == 0 || total_results >= skip_to_result) + { + if (number_of_sizes == 3) { - sprintf( storePrefix, "vstore3( " ); - sprintf( storeSuffix, ", %d, result )", current_result ); + sprintf(storePrefix, "vstore3( "); + sprintf(storeSuffix, ", %d, result )", current_result); } else { - sprintf( storePrefix, "result[%d] = ", current_result ); - storeSuffix[ 0 ] = 0; + sprintf(storePrefix, "result[%d] = ", current_result); + storeSuffix[0] = 0; } - sprintf(line, "\t%s(%s%d)(", storePrefix, get_explicit_type_name(type), output_size); + sprintf(line, "\t%s(%s%d)(", storePrefix, + get_explicit_type_name(type), output_size); current_result++; int offset = 0; - for (int i=0; i MAX_CODE_SIZE) { + if (total_program_length > MAX_CODE_SIZE) + { aborted_due_to_size = 1; done = 1; } @@ -179,132 +196,194 @@ int create_kernel(ExplicitType type, int output_size, char *program, int *number if (DEBUG) log_info("line is: %s", line); - // If we did not use all of them, then we ignore any changes further to the right. - // We do this by causing those loops to skip on the next iteration. - if (vloads < DEPTH) { + // If we did not use all of them, then we ignore any changes further to + // the right. We do this by causing those loops to skip on the next + // iteration. + if (vloads < DEPTH) + { if (DEBUG > 1) log_info("done with this depth\n"); - for (int k=vloads; k=0; d--) { + for (int d = DEPTH - 1; d >= 0; d--) + { pos[d]++; - if (pos[d] >= number_of_sizes) { + if (pos[d] >= number_of_sizes) + { pos[d] = 0; - if (d == 0) { + if (d == 0) + { // If we rolled over at the far-left then we are done done = 1; break; } - } else { + } + else + { break; } } - if (done) - break; + if (done) break; // Continue until we are done. } - strcat(program, "}\n\n"); //log_info("%s\n", program); + strcat(program, "}\n\n"); // log_info("%s\n", program); total_program_length += 3; - if (DEBUG) log_info("\t\t(Program for vector type %s%s contains %d vector creations, of total program length %gkB, with a total of %d vloads.)\n", - get_explicit_type_name(type), size_names[number_of_sizes-1], total_results, total_program_length/1024.0, total_vloads); + if (DEBUG) + log_info( + "\t\t(Program for vector type %s%s contains %d vector creations, " + "of total program length %gkB, with a total of %d vloads.)\n", + get_explicit_type_name(type), size_names[number_of_sizes - 1], + total_results, total_program_length / 1024.0, total_vloads); *number_of_results = current_result; - if (aborted_due_to_size) - return total_results; + if (aborted_due_to_size) return total_results; return 0; } - - -int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_vector_creation(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble }; - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16}; + const std::vector vecType = { kChar, kUChar, kShort, kUShort, + kInt, kUInt, kLong, kULong, + kFloat, kHalf, kDouble }; + // should be in sync with global array size_names + const std::vector vecSizes = { 1, 2, 3, 4, 8, 16 }; - char *program_source; - int error; + int error = CL_SUCCESS; int total_errors = 0; + int number_of_results = 0; - cl_int input_data_int[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - cl_double input_data_double[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - void *input_data_converted; - void *output_data; - - int number_of_results;; - - input_data_converted = malloc(sizeof(cl_double)*16); - program_source = (char*)malloc(sizeof(char)*1024*1024*4); + std::vector input_data_converted(sizeof(cl_double) * 16); + std::vector program_source(sizeof(char) * 1024 * 1024 * 4); + std::vector output_data; // Iterate over all the types - for (int type_index=0; type_index<10; type_index++) { - if(!gHasLong && ((vecType[type_index] == kLong) || (vecType[type_index] == kULong))) + for (int type_index = 0; type_index < vecType.size(); type_index++) { - log_info("Long/ULong data type not supported on this device\n"); - continue; - } - - clMemWrapper input; - if (vecType[type_index] == kDouble) { - if (!is_extension_available(deviceID, "cl_khr_fp64")) { - log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n"); + if (!gHasLong + && ((vecType[type_index] == kLong) + || (vecType[type_index] == kULong))) + { + log_info("Long/ULong data type not supported on this device\n"); + continue; + } + else if (vecType[type_index] == kDouble) + { + if (!is_extension_available(deviceID, "cl_khr_fp64")) + { + log_info("Extension cl_khr_fp64 not supported; skipping double " + "tests.\n"); continue; } - log_info("Testing doubles.\n"); + snprintf(extension, sizeof(extension), "%s", + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"); } + else if (vecType[type_index] == kHalf) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info("Extension cl_khr_fp16 not supported; skipping half " + "tests.\n"); + continue; + } + snprintf(extension, sizeof(extension), "%s", + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"); + } + + log_info("Testing %s.\n", get_explicit_type_name(vecType[type_index])); // Convert the data to the right format for the test. - memset(input_data_converted, 0xff, sizeof(cl_double)*16); - if (vecType[type_index] != kDouble) { - for (int j=0; j<16; j++) { - convert_explicit_value(&input_data_int[j], ((char*)input_data_converted)+get_explicit_type_size(vecType[type_index])*j, - kInt, 0, kRoundToEven, vecType[type_index]); + memset(input_data_converted.data(), 0xff, sizeof(cl_double) * 16); + if (vecType[type_index] == kDouble) + { + const cl_double input_data_double[16] = { 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15 }; + memcpy(input_data_converted.data(), &input_data_double, + sizeof(cl_double) * 16); + } + else if (vecType[type_index] == kHalf) + { + cl_half *buf = + reinterpret_cast(input_data_converted.data()); + for (int j = 0; j < 16; j++) + buf[j] = cl_half_from_float(float(j), CL_HALF_RTE); + } + else + { + for (int j = 0; j < 16; j++) + { + convert_explicit_value( + &j, + ((char *)input_data_converted.data()) + + get_explicit_type_size(vecType[type_index]) * j, + kInt, 0, kRoundToEven, vecType[type_index]); } - } else { - memcpy(input_data_converted, &input_data_double, sizeof(cl_double)*16); } - input = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, get_explicit_type_size(vecType[type_index])*16, - (vecType[type_index] != kDouble) ? input_data_converted : input_data_double, &error); - if (error) { + clMemWrapper input = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + get_explicit_type_size(vecType[type_index]) * 16, + input_data_converted.data(), &error); + if (error) + { print_error(error, "clCreateBuffer failed"); total_errors++; continue; } // Iterate over all the vector sizes. - for (int size_index=1; size_index< 5; size_index++) { - size_t global[] = {1,1,1}; + for (int size_index = 1; size_index < vecSizes.size(); size_index++) + { + size_t global[] = { 1, 1, 1 }; int number_generated = -1; int previous_number_generated = 0; - log_info("Testing %s%s...\n", get_explicit_type_name(vecType[type_index]), size_names[size_index]); - while (number_generated != 0) { + log_info("Testing %s%s...\n", + get_explicit_type_name(vecType[type_index]), + size_names[size_index]); + while (number_generated != 0) + { clMemWrapper output; clKernelWrapper kernel; clProgramWrapper program; - number_generated = create_kernel(vecType[type_index], vecSizes[size_index], program_source, &number_of_results, number_generated); - if (number_generated != 0) { + number_generated = + create_kernel(vecType[type_index], vecSizes[size_index], + program_source.data(), &number_of_results, + number_generated); + if (number_generated != 0) + { if (previous_number_generated == 0) - log_info("Code size greater than %gkB; splitting test into multiple kernels.\n", MAX_CODE_SIZE/1024.0); - log_info("\tExecuting vector permutations %d to %d...\n", previous_number_generated, number_generated-1); + log_info("Code size greater than %gkB; splitting test " + "into multiple kernels.\n", + MAX_CODE_SIZE / 1024.0); + log_info("\tExecuting vector permutations %d to %d...\n", + previous_number_generated, number_generated - 1); } - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&program_source, "test_vector_creation"); - if (error) { + char *src = program_source.data(); + error = create_single_kernel_helper(context, &program, &kernel, + 1, (const char **)&src, + "test_vector_creation"); + if (error) + { log_error("create_single_kernel_helper failed.\n"); total_errors++; break; } - output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, - number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index], - NULL, &error); - if (error) { + output = clCreateBuffer( + context, CL_MEM_WRITE_ONLY, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index], + NULL, &error); + if (error) + { print_error(error, "clCreateBuffer failed"); total_errors++; break; @@ -312,95 +391,115 @@ int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_q error = clSetKernelArg(kernel, 0, sizeof(input), &input); error |= clSetKernelArg(kernel, 1, sizeof(output), &output); - if (error) { + if (error) + { print_error(error, "clSetKernelArg failed"); total_errors++; break; } - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL); - if (error) { + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, + NULL, 0, NULL, NULL); + if (error) + { print_error(error, "clEnqueueNDRangeKernel failed"); total_errors++; break; } error = clFinish(queue); - if (error) { + if (error) + { print_error(error, "clFinish failed"); total_errors++; break; } - output_data = malloc(number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]); - if (output_data == NULL) { - log_error("Failed to allocate memory for output data.\n"); - total_errors++; - break; - } - memset(output_data, 0xff, number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]); - error = clEnqueueReadBuffer(queue, output, CL_TRUE, 0, - number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index], - output_data, 0, NULL, NULL); - if (error) { + output_data.resize(number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index]); + memset(output_data.data(), 0xff, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index]); + error = clEnqueueReadBuffer( + queue, output, CL_TRUE, 0, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index], + output_data.data(), 0, NULL, NULL); + if (error) + { print_error(error, "clEnqueueReadBuffer failed"); total_errors++; - free(output_data); break; } // Check the results - char *res = (char *)output_data; - char *exp = (char *)input_data_converted; - for (int i=0; i Date: Fri, 16 Jun 2023 10:53:08 +0100 Subject: [PATCH 3/5] basic: fix unused-but-set variables (#1764) Remove the unused `numItems` variable. As this fixes all occurrences of this warning in test_basic, remove the suppression flag. Signed-off-by: Sven van Haastregt --- test_conformance/basic/CMakeLists.txt | 2 -- test_conformance/basic/test_work_item_functions.cpp | 3 --- 2 files changed, 5 deletions(-) diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index adf24bd80..c07d32b66 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -70,6 +70,4 @@ if(APPLE) list(APPEND ${MODULE_NAME}_SOURCES test_queue_priority.cpp) endif(APPLE) -set_gnulike_module_compile_flags("-Wno-unused-but-set-variable") - include(../CMakeCommon.txt) diff --git a/test_conformance/basic/test_work_item_functions.cpp b/test_conformance/basic/test_work_item_functions.cpp index d95915cf5..9683a8342 100644 --- a/test_conformance/basic/test_work_item_functions.cpp +++ b/test_conformance/basic/test_work_item_functions.cpp @@ -91,7 +91,6 @@ int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_comma { for( int i = 0; i < NUM_TESTS; i++ ) { - size_t numItems = 1; for( size_t j = 0; j < dim; j++ ) { // All of our thread sizes should be within the max local sizes, since they're all <= 20 @@ -100,8 +99,6 @@ int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_comma while( localThreads[ j ] > 1 && ( threads[ j ] % localThreads[ j ] != 0 ) ) localThreads[ j ]--; - numItems *= threads[ j ]; - // Hack for now: localThreads > 1 are iffy localThreads[ j ] = 1; } From 0e229b8f01afc9e16ca83234b656830c26f11215 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 20 Jun 2023 17:42:57 +0200 Subject: [PATCH 4/5] Added cl_khr_fp16 extension support for test_fpmath from basic (#1718) * Added half and double support for fpmath test from basic (issue #142, basic) * Cosmetic corrections due to code review * Removed unnecessary casting * Added corrections due to code review * Tuning range of input generation to avoid hitting infinity * Moved string helpers procedures due to request from test_commonfns PR #1695 --- .../harness/stringHelpers.h | 0 test_conformance/basic/CMakeLists.txt | 2 +- test_conformance/basic/main.cpp | 37 +- test_conformance/basic/procs.h | 10 +- test_conformance/basic/test_astype.cpp | 7 +- test_conformance/basic/test_fpmath.cpp | 386 ++++++++++++++++++ test_conformance/basic/test_fpmath_float.cpp | 196 --------- 7 files changed, 427 insertions(+), 211 deletions(-) rename test_conformance/basic/utils.h => test_common/harness/stringHelpers.h (100%) create mode 100644 test_conformance/basic/test_fpmath.cpp delete mode 100644 test_conformance/basic/test_fpmath_float.cpp diff --git a/test_conformance/basic/utils.h b/test_common/harness/stringHelpers.h similarity index 100% rename from test_conformance/basic/utils.h rename to test_common/harness/stringHelpers.h diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index c07d32b66..c89a93cf0 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -2,7 +2,7 @@ set(MODULE_NAME BASIC) set(${MODULE_NAME}_SOURCES main.cpp - test_fpmath_float.cpp + test_fpmath.cpp test_intmath.cpp test_hiloeo.cpp test_local.cpp test_pointercast.cpp test_if.cpp test_loop.cpp diff --git a/test_conformance/basic/main.cpp b/test_conformance/basic/main.cpp index 86c3cec35..24262dbf9 100644 --- a/test_conformance/basic/main.cpp +++ b/test_conformance/basic/main.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2023 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,14 +22,15 @@ #include #include #include + +#include + #include "harness/testHarness.h" #include "procs.h" test_definition test_list[] = { ADD_TEST(hostptr), - ADD_TEST(fpmath_float), - ADD_TEST(fpmath_float2), - ADD_TEST(fpmath_float4), + ADD_TEST(fpmath), ADD_TEST(intmath_int), ADD_TEST(intmath_int2), ADD_TEST(intmath_int4), @@ -164,9 +165,35 @@ test_definition test_list[] = { }; const int test_num = ARRAY_SIZE( test_list ); +cl_half_rounding_mode halfRoundingMode = CL_HALF_RTE; + +test_status InitCL(cl_device_id device) +{ + if (is_extension_available(device, "cl_khr_fp16")) + { + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + halfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + halfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode"); + return TEST_FAIL; + } + } + + return TEST_PASS; +} int main(int argc, const char *argv[]) { - return runTestHarness(argc, argv, test_num, test_list, false, 0); + return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0, + InitCL); } diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h index c14340de3..9cbc373a3 100644 --- a/test_conformance/basic/procs.h +++ b/test_conformance/basic/procs.h @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. // + #include "harness/kernelHelpers.h" #include "harness/testHarness.h" #include "harness/errorHelpers.h" @@ -21,9 +22,8 @@ #include "harness/rounding_mode.h" extern int test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_fpmath(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); extern int test_intmath_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_intmath_int2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_intmath_int4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); diff --git a/test_conformance/basic/test_astype.cpp b/test_conformance/basic/test_astype.cpp index 08a4cb85a..45669a7cb 100644 --- a/test_conformance/basic/test_astype.cpp +++ b/test_conformance/basic/test_astype.cpp @@ -14,6 +14,9 @@ // limitations under the License. // #include "harness/compat.h" +#include "harness/conversions.h" +#include "harness/stringHelpers.h" +#include "harness/typeWrappers.h" #include #include @@ -22,11 +25,7 @@ #include #include -#include "harness/conversions.h" -#include "harness/typeWrappers.h" - #include "procs.h" -#include "utils.h" // clang-format off diff --git a/test_conformance/basic/test_fpmath.cpp b/test_conformance/basic/test_fpmath.cpp new file mode 100644 index 000000000..6719e7281 --- /dev/null +++ b/test_conformance/basic/test_fpmath.cpp @@ -0,0 +1,386 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "harness/compat.h" +#include "harness/rounding_mode.h" +#include "harness/stringHelpers.h" + +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "procs.h" + +static const char *fp_kernel_code = R"( +%s +__kernel void test_fp(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *dst) +{ + int tid = get_global_id(0); + + dst[tid] = srcA[tid] OP srcB[tid]; +})"; + +extern cl_half_rounding_mode halfRoundingMode; + +#define HFF(num) cl_half_from_float(num, halfRoundingMode) +#define HTF(num) cl_half_to_float(num) + +template double toDouble(T val) +{ + if (std::is_same::value) + return HTF(val); + else + return val; +} + +bool isHalfNan(cl_half v) +{ + // Extract FP16 exponent and mantissa + uint16_t h_exp = (v >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = v & 0x3FF; + + // NaN test + return (h_exp == 0x1F && h_mant != 0); +} + +cl_half half_plus(cl_half a, cl_half b) +{ + return HFF(std::plus()(HTF(a), HTF(b))); +} + +cl_half half_minus(cl_half a, cl_half b) +{ + return HFF(std::minus()(HTF(a), HTF(b))); +} + +cl_half half_mult(cl_half a, cl_half b) +{ + return HFF(std::multiplies()(HTF(a), HTF(b))); +} + +template struct TestDef +{ + const char op; + std::function ref; + std::string type_str; + size_t vec_size; +}; + +template +int verify_fp(std::vector (&input)[2], std::vector &output, + const TestDef &test) +{ + auto &inA = input[0]; + auto &inB = input[1]; + for (int i = 0; i < output.size(); i++) + { + bool nan_test = false; + + T r = test.ref(inA[i], inB[i]); + + if (std::is_same::value) + nan_test = !(isHalfNan(r) && isHalfNan(output[i])); + + if (r != output[i] && nan_test) + { + log_error("FP math test for type: %s, vec size: %zu, failed at " + "index %d, %a '%c' %a, expected %a, get %a\n", + test.type_str.c_str(), test.vec_size, i, toDouble(inA[i]), + test.op, toDouble(inB[i]), toDouble(r), + toDouble(output[i])); + return -1; + } + } + + return 0; +} + +template void generate_random_inputs(std::vector (&input)[2]) +{ + RandomSeed seed(gRandomSeed); + + if (std::is_same::value) + { + auto random_generator = [&seed]() { + return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), + MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } + else if (std::is_same::value) + { + auto random_generator = [&seed]() { + return get_random_double(-MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63), + MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63), + seed); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } + else + { + auto random_generator = [&seed]() { + return HFF(get_random_float(-MAKE_HEX_FLOAT(0x1.0p8f, 0x1, 8), + MAKE_HEX_FLOAT(0x1.0p8f, 0x1, 8), + seed)); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } +} + +struct TypesIterator +{ + using TypeIter = std::tuple; + + TypesIterator(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elems) + : context(context), queue(queue), fpConfigHalf(0), fpConfigFloat(0), + num_elements(num_elems) + { + // typeid().name one day + type2name[sizeof(cl_half)] = "half"; + type2name[sizeof(cl_float)] = "float"; + type2name[sizeof(cl_double)] = "double"; + + fp16Support = is_extension_available(deviceID, "cl_khr_fp16"); + fp64Support = is_extension_available(deviceID, "cl_khr_fp64"); + + fpConfigFloat = get_default_rounding_mode(deviceID); + + if (fp16Support) + fpConfigHalf = + get_default_rounding_mode(deviceID, CL_DEVICE_HALF_FP_CONFIG); + + for_each_elem(it); + } + + template int test_fpmath(TestDef &test) + { + constexpr size_t vecSizes[] = { 1, 2, 4, 8, 16 }; + cl_int err = CL_SUCCESS; + + std::ostringstream sstr; + if (std::is_same::value) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + + if (std::is_same::value) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + + std::string program_source = + str_sprintf(std::string(fp_kernel_code), sstr.str().c_str()); + + for (unsigned i = 0; i < ARRAY_SIZE(vecSizes); i++) + { + test.vec_size = vecSizes[i]; + + std::ostringstream vecNameStr; + vecNameStr << test.type_str; + if (test.vec_size != 1) vecNameStr << test.vec_size; + + clMemWrapper streams[3]; + clProgramWrapper program; + clKernelWrapper kernel; + + size_t length = sizeof(T) * num_elements * test.vec_size; + + bool isRTZ = false; + RoundingMode oldMode = kDefaultRoundingMode; + + + // If we only support rtz mode + if (std::is_same::value) + { + if (CL_FP_ROUND_TO_ZERO == fpConfigHalf) + { + isRTZ = true; + oldMode = get_round(); + } + } + else if (std::is_same::value) + { + if (CL_FP_ROUND_TO_ZERO == fpConfigFloat) + { + isRTZ = true; + oldMode = get_round(); + } + } + + std::vector inputs[]{ + std::vector(test.vec_size * num_elements), + std::vector(test.vec_size * num_elements) + }; + std::vector output = + std::vector(test.vec_size * num_elements); + + generate_random_inputs(inputs); + + for (int i = 0; i < ARRAY_SIZE(streams); i++) + { + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, + NULL, &err); + test_error(err, "clCreateBuffer failed."); + } + for (int i = 0; i < ARRAY_SIZE(inputs); i++) + { + err = + clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length, + inputs[i].data(), 0, NULL, NULL); + test_error(err, "clEnqueueWriteBuffer failed."); + } + + std::string build_options = "-DTYPE="; + build_options.append(vecNameStr.str()) + .append(" -DOP=") + .append(1, test.op); + + const char *ptr = program_source.c_str(); + err = + create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "test_fp", build_options.c_str()); + + test_error(err, "create_single_kernel_helper failed"); + + for (int i = 0; i < ARRAY_SIZE(streams); i++) + { + err = + clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); + test_error(err, "clSetKernelArgs failed."); + } + + size_t threads[] = { static_cast(num_elements) }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, + 0, NULL, NULL); + test_error(err, "clEnqueueNDRangeKernel failed."); + + err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, + output.data(), 0, NULL, NULL); + test_error(err, "clEnqueueReadBuffer failed."); + + if (isRTZ) set_round(kRoundTowardZero, kfloat); + + err = verify_fp(inputs, output, test); + + if (isRTZ) set_round(oldMode, kfloat); + + test_error(err, "test verification failed"); + log_info("FP '%c' '%s' test passed\n", test.op, + vecNameStr.str().c_str()); + } + + return err; + } + + template int test_fpmath_common() + { + int err = TEST_PASS; + if (std::is_same::value) + { + TestDef tests[] = { { '+', half_plus, type2name[sizeof(T)] }, + { '-', half_minus, type2name[sizeof(T)] }, + { '*', half_mult, type2name[sizeof(T)] } }; + for (auto &test : tests) err |= test_fpmath(test); + } + else + { + TestDef tests[] = { + { '+', std::plus(), type2name[sizeof(T)] }, + { '-', std::minus(), type2name[sizeof(T)] }, + { '*', std::multiplies(), type2name[sizeof(T)] } + }; + for (auto &test : tests) err |= test_fpmath(test); + } + + return err; + } + + template bool skip_type() + { + if (std::is_same::value && !fp64Support) + return true; + else if (std::is_same::value && !fp16Support) + return true; + return false; + } + + template + void iterate_type(const Type &t) + { + bool doTest = !skip_type(); + + if (doTest) + { + if (test_fpmath_common()) + { + throw std::runtime_error("test_fpmath_common failed\n"); + } + } + } + + template + inline typename std::enable_if::type + for_each_elem( + const std::tuple &) // Unused arguments are given no names. + {} + + template + inline typename std::enable_if < Cnt::type + for_each_elem(const std::tuple &t) + { + iterate_type(std::get(t)); + for_each_elem(t); + } + +protected: + TypeIter it; + + cl_context context; + cl_command_queue queue; + + cl_device_fp_config fpConfigHalf; + cl_device_fp_config fpConfigFloat; + + bool fp16Support; + bool fp64Support; + + int num_elements; + std::map type2name; +}; + +int test_fpmath(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) +{ + try + { + TypesIterator(device, context, queue, num_elements); + } catch (const std::runtime_error &e) + { + log_error("%s", e.what()); + return TEST_FAIL; + } + + return TEST_PASS; +} diff --git a/test_conformance/basic/test_fpmath_float.cpp b/test_conformance/basic/test_fpmath_float.cpp deleted file mode 100644 index fced0f4ec..000000000 --- a/test_conformance/basic/test_fpmath_float.cpp +++ /dev/null @@ -1,196 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include -#include -#include "harness/rounding_mode.h" - -#include -#include -#include -#include - -#include "procs.h" - -struct TestDef -{ - const char op; - std::function ref; -}; - -static const char *fp_kernel_code = R"( -__kernel void test_fp(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *dst) -{ - int tid = get_global_id(0); - - dst[tid] = srcA[tid] OP srcB[tid]; -})"; - -static int verify_fp(std::vector (&input)[2], std::vector &output, - const TestDef &test) -{ - - auto &inA = input[0]; - auto &inB = input[1]; - for (int i = 0; i < output.size(); i++) - { - float r = test.ref(inA[i], inB[i]); - if (r != output[i]) - { - log_error("FP '%c' float test failed\n", test.op); - return -1; - } - } - - log_info("FP '%c' float test passed\n", test.op); - return 0; -} - - -void generate_random_inputs(std::vector (&input)[2]) -{ - RandomSeed seed(gRandomSeed); - - auto random_generator = [&seed]() { - return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), - MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed); - }; - - for (auto &v : input) - { - std::generate(v.begin(), v.end(), random_generator); - } -} - -template -int test_fpmath(cl_device_id device, cl_context context, cl_command_queue queue, - int num_elements, const std::string type_str, - const TestDef &test) -{ - clMemWrapper streams[3]; - clProgramWrapper program; - clKernelWrapper kernel; - - int err; - - size_t length = sizeof(cl_float) * num_elements * N; - - int isRTZ = 0; - RoundingMode oldMode = kDefaultRoundingMode; - - // If we only support rtz mode - if (CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device)) - { - isRTZ = 1; - oldMode = get_round(); - } - - - std::vector inputs[]{ std::vector(N * num_elements), - std::vector(N * num_elements) }; - std::vector output = std::vector(N * num_elements); - - generate_random_inputs(inputs); - - for (int i = 0; i < ARRAY_SIZE(streams); i++) - { - streams[i] = - clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err); - test_error(err, "clCreateBuffer failed."); - } - for (int i = 0; i < ARRAY_SIZE(inputs); i++) - { - err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length, - inputs[i].data(), 0, NULL, NULL); - test_error(err, "clEnqueueWriteBuffer failed."); - } - - std::string build_options = "-DTYPE="; - build_options.append(type_str).append(" -DOP=").append(1, test.op); - - err = create_single_kernel_helper(context, &program, &kernel, 1, - &fp_kernel_code, "test_fp", - build_options.c_str()); - - test_error(err, "create_single_kernel_helper failed"); - - for (int i = 0; i < ARRAY_SIZE(streams); i++) - { - err = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); - test_error(err, "clSetKernelArgs failed."); - } - - size_t threads[] = { static_cast(num_elements) }; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, - NULL); - test_error(err, "clEnqueueNDRangeKernel failed."); - - err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, - output.data(), 0, NULL, NULL); - test_error(err, "clEnqueueReadBuffer failed."); - - if (isRTZ) set_round(kRoundTowardZero, kfloat); - - err = verify_fp(inputs, output, test); - - if (isRTZ) set_round(oldMode, kfloat); - - return err; -} - - -template -int test_fpmath_common(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements, - const std::string type_str) -{ - TestDef tests[] = { { '+', std::plus() }, - { '-', std::minus() }, - { '*', std::multiplies() } }; - int err = TEST_PASS; - - for (const auto &test : tests) - { - err |= test_fpmath(device, context, queue, num_elements, type_str, - test); - } - - return err; -} - -int test_fpmath_float(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<1>(device, context, queue, num_elements, "float"); -} - -int test_fpmath_float2(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<2>(device, context, queue, num_elements, - "float2"); -} - -int test_fpmath_float4(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<4>(device, context, queue, num_elements, - "float4"); -} From df3ec8deecdb81661ee61c3c97ae63419b5f4822 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 20 Jun 2023 17:44:45 +0200 Subject: [PATCH 5/5] Added cl_khr_fp16 extension support for test_int2fp from basic (#1742) * Added cl_khr_fp16 and cl_khr_fp64 support for float2int and int2float tests from basic * removed debug output * Replaced procedure to generate random half values in specific range (issue #142, basic) * Added cosmetic fixes due to code review comments * Moved string helper procedures due to request for test_commonfns PR #1695 --- test_conformance/basic/CMakeLists.txt | 2 +- test_conformance/basic/main.cpp | 6 +- test_conformance/basic/procs.h | 6 +- test_conformance/basic/test_int2float.cpp | 140 ---------- test_conformance/basic/test_int2fp.cpp | 324 ++++++++++++++++++++++ 5 files changed, 332 insertions(+), 146 deletions(-) delete mode 100644 test_conformance/basic/test_int2float.cpp create mode 100644 test_conformance/basic/test_int2fp.cpp diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index c89a93cf0..47c1c980f 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -11,7 +11,7 @@ set(${MODULE_NAME}_SOURCES test_multireadimageonefmt.cpp test_multireadimagemultifmt.cpp test_imagedim.cpp test_vloadstore.cpp - test_int2float.cpp + test_int2fp.cpp test_createkernelsinprogram.cpp test_hostptr.cpp test_explicit_s2v.cpp diff --git a/test_conformance/basic/main.cpp b/test_conformance/basic/main.cpp index 24262dbf9..d1901f95d 100644 --- a/test_conformance/basic/main.cpp +++ b/test_conformance/basic/main.cpp @@ -59,8 +59,8 @@ test_definition test_list[] = { ADD_TEST(image_r8), ADD_TEST(barrier), ADD_TEST_VERSION(wg_barrier, Version(2, 0)), - ADD_TEST(int2float), - ADD_TEST(float2int), + ADD_TEST(int2fp), + ADD_TEST(fp2int), ADD_TEST(imagereadwrite), ADD_TEST(imagereadwrite3d), ADD_TEST(readimage3d), @@ -156,7 +156,7 @@ test_definition test_list[] = { ADD_TEST(simple_read_image_pitch), ADD_TEST(simple_write_image_pitch), -#if defined( __APPLE__ ) +#if defined(__APPLE__) ADD_TEST(queue_priority), #endif diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h index 9cbc373a3..b685ecd53 100644 --- a/test_conformance/basic/procs.h +++ b/test_conformance/basic/procs.h @@ -52,8 +52,10 @@ extern int test_image_r8(cl_device_id deviceID, cl_context context, cl_comm extern int test_simplebarrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_wg_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_int2float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_float2int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_int2fp(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_fp2int(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); extern int test_imagearraycopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_imagearraycopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_imagereadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); diff --git a/test_conformance/basic/test_int2float.cpp b/test_conformance/basic/test_int2float.cpp deleted file mode 100644 index c5afc2440..000000000 --- a/test_conformance/basic/test_int2float.cpp +++ /dev/null @@ -1,140 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include -#include -#include -#include -#include - -#include -#include - -#include "procs.h" - -namespace { -const char *int2float_kernel_code = R"( -__kernel void test_X2Y(__global TYPE_X *src, __global TYPE_Y *dst) -{ - int tid = get_global_id(0); - - dst[tid] = (TYPE_Y)src[tid]; - -})"; - -template const char *Type2str() { return ""; } -template <> const char *Type2str() { return "int"; } -template <> const char *Type2str() { return "float"; } - -template void generate_random_inputs(std::vector &v) -{ - RandomSeed seed(gRandomSeed); - - auto random_generator = [&seed]() { - return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), - MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed); - }; - - std::generate(v.begin(), v.end(), random_generator); -} - -template bool equal_value(Tx a, Ty b) -{ - return a == (Tx)b; -} - -template -int verify_X2Y(std::vector input, std::vector output, - const char *test_name) -{ - - if (!std::equal(output.begin(), output.end(), input.begin(), - equal_value)) - { - log_error("%s test failed\n", test_name); - return -1; - } - - log_info("%s test passed\n", test_name); - return 0; -} -template -int test_X2Y(cl_device_id device, cl_context context, cl_command_queue queue, - int num_elements, const char *test_name) -{ - clMemWrapper streams[2]; - clProgramWrapper program; - clKernelWrapper kernel; - int err; - - - std::vector input(num_elements); - std::vector output(num_elements); - - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(Tx) * num_elements, nullptr, &err); - test_error(err, "clCreateBuffer failed."); - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(Ty) * num_elements, nullptr, &err); - test_error(err, "clCreateBuffer failed."); - - generate_random_inputs(input); - - err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, - sizeof(Tx) * num_elements, input.data(), 0, - nullptr, nullptr); - test_error(err, "clEnqueueWriteBuffer failed."); - - std::string build_options; - build_options.append("-DTYPE_X=").append(Type2str()); - build_options.append(" -DTYPE_Y=").append(Type2str()); - err = create_single_kernel_helper(context, &program, &kernel, 1, - &int2float_kernel_code, "test_X2Y", - build_options.c_str()); - test_error(err, "create_single_kernel_helper failed."); - - err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); - err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); - test_error(err, "clSetKernelArg failed."); - - size_t threads[] = { (size_t)num_elements }; - err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads, nullptr, 0, - nullptr, nullptr); - test_error(err, "clEnqueueNDRangeKernel failed."); - - err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, - sizeof(Ty) * num_elements, output.data(), 0, - nullptr, nullptr); - test_error(err, "clEnqueueReadBuffer failed."); - - err = verify_X2Y(input, output, test_name); - - return err; -} -} -int test_int2float(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_X2Y(device, context, queue, num_elements, - "INT2FLOAT"); -} -int test_float2int(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_X2Y(device, context, queue, num_elements, - "FLOAT2INT"); -} diff --git a/test_conformance/basic/test_int2fp.cpp b/test_conformance/basic/test_int2fp.cpp new file mode 100644 index 000000000..8b1203a71 --- /dev/null +++ b/test_conformance/basic/test_int2fp.cpp @@ -0,0 +1,324 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "CL/cl_half.h" +#include "harness/compat.h" +#include "harness/errorHelpers.h" +#include "harness/stringHelpers.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "procs.h" + +extern cl_half_rounding_mode halfRoundingMode; + +#define HFF(num) cl_half_from_float(num, halfRoundingMode) +#define HTF(num) cl_half_to_float(num) + +namespace { +const char *int2float_kernel_code = R"( +%s +__kernel void test_X2Y(__global TYPE_X *src, __global TYPE_Y *dst) +{ + int tid = get_global_id(0); + + dst[tid] = (TYPE_Y)src[tid]; + +})"; + +template struct TypesIterator +{ + TypesIterator(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elems, const char *test_name) + : context(context), queue(queue), test_name(test_name), + num_elements(num_elems) + { + fp16Support = is_extension_available(deviceID, "cl_khr_fp16"); + fp64Support = is_extension_available(deviceID, "cl_khr_fp64"); + + type2name[sizeof(cl_half)] = std::make_pair("half", "short"); + type2name[sizeof(cl_float)] = std::make_pair("float", "int"); + type2name[sizeof(cl_double)] = std::make_pair("double", "long"); + + std::tuple it; + for_each_elem(it); + } + + template void generate_random_inputs(std::vector &v) + { + RandomSeed seed(gRandomSeed); + + if (sizeof(T) == sizeof(cl_half)) + { + // Bound generated half values to 0x1.ffcp+14(32752.0) which is the + // largest cl_half value smaller than the max value of cl_short, + // 32767. + if (int2fp) + { + auto random_generator = [&seed]() { + return (cl_short)get_random_float( + -MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), + MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), seed); + }; + std::generate(v.begin(), v.end(), random_generator); + } + else + { + auto random_generator = [&seed]() { + return HFF(get_random_float( + -MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), + MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), seed)); + }; + std::generate(v.begin(), v.end(), random_generator); + } + } + else if (sizeof(T) == sizeof(cl_float)) + { + auto random_generator = [&seed]() { + return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), + MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), + seed); + }; + std::generate(v.begin(), v.end(), random_generator); + } + else if (sizeof(T) == sizeof(cl_double)) + { + auto random_generator = [&seed]() { + return get_random_double(-MAKE_HEX_DOUBLE(0x1.0p63, 0x1, 63), + MAKE_HEX_DOUBLE(0x1.0p63, 0x1, 63), + seed); + }; + std::generate(v.begin(), v.end(), random_generator); + } + } + + template static bool equal_value(Tx a, Ty b) + { + return a == (Tx)b; + } + + static bool equal_value_from_half(cl_short a, cl_half b) + { + return a == (cl_short)HTF(b); + } + + static bool equal_value_to_half(cl_half a, cl_short b) + { + return a == HFF((float)b); + } + + + template + int verify_X2Y(std::vector input, std::vector output) + { + if (std::is_same::value + || std::is_same::value) + { + bool res = true; + if (int2fp) + res = std::equal(output.begin(), output.end(), input.begin(), + equal_value_to_half); + else + res = std::equal(output.begin(), output.end(), input.begin(), + equal_value_from_half); + + if (!res) + { + log_error("%s test failed\n", test_name.c_str()); + return -1; + } + } + else + { + if (!std::equal(output.begin(), output.end(), input.begin(), + equal_value)) + { + log_error("%s test failed\n", test_name.c_str()); + return -1; + } + } + + log_info("%s test passed\n", test_name.c_str()); + return 0; + } + + template int test_X2Y() + { + clMemWrapper streams[2]; + clProgramWrapper program; + clKernelWrapper kernel; + int err; + + std::vector input(num_elements); + std::vector output(num_elements); + + streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(Tx) * num_elements, nullptr, &err); + test_error(err, "clCreateBuffer failed."); + streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(Ty) * num_elements, nullptr, &err); + test_error(err, "clCreateBuffer failed."); + + generate_random_inputs(input); + + err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, + sizeof(Tx) * num_elements, input.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed."); + + std::string src_name = type2name[sizeof(Tx)].first; + std::string dst_name = type2name[sizeof(Tx)].second; + if (int2fp) std::swap(src_name, dst_name); + + std::string build_options; + build_options.append("-DTYPE_X=").append(src_name.c_str()); + build_options.append(" -DTYPE_Y=").append(dst_name.c_str()); + + std::string extension; + if (sizeof(Tx) == sizeof(cl_double)) + extension = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + + if (sizeof(Tx) == sizeof(cl_half)) + extension = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + + std::string kernelSource = + str_sprintf(int2float_kernel_code, extension.c_str()); + const char *ptr = kernelSource.c_str(); + + err = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "test_X2Y", build_options.c_str()); + test_error(err, "create_single_kernel_helper failed."); + + err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); + err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); + test_error(err, "clSetKernelArg failed."); + + size_t threads[] = { (size_t)num_elements }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads, + nullptr, 0, nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed."); + + err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, + sizeof(Ty) * num_elements, output.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed."); + + err = verify_X2Y(input, output); + + return err; + } + + template bool skip_type() + { + if (std::is_same::value && !fp64Support) + return true; + else if (std::is_same::value && !fp16Support) + return true; + return false; + } + + template void iterate_type(const T &t) + { + bool doTest = !skip_type(); + + if (doTest) + { + typedef typename std::conditional< + (sizeof(T) == sizeof(std::int16_t)), std::int16_t, + typename std::conditional<(sizeof(T) == sizeof(std::int32_t)), + std::int32_t, + std::int64_t>::type>::type U; + if (int2fp) + { + if (test_X2Y()) + throw std::runtime_error("test_X2Y failed\n"); + } + else + { + if (test_X2Y()) + throw std::runtime_error("test_X2Y failed\n"); + } + } + } + + template + inline typename std::enable_if::type + for_each_elem( + const std::tuple &) // Unused arguments are given no names. + {} + + template + inline typename std::enable_if < Cnt::type + for_each_elem(const std::tuple &t) + { + iterate_type(std::get(t)); + for_each_elem(t); + } + +protected: + cl_context context; + cl_command_queue queue; + + cl_device_fp_config fpConfigHalf; + cl_device_fp_config fpConfigFloat; + + bool fp16Support; + bool fp64Support; + + std::map> type2name; + + std::string test_name; + int num_elements; +}; + +} + +int test_int2fp(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) +{ + try + { + TypesIterator(device, context, queue, num_elements, "INT2FP"); + } catch (const std::runtime_error &e) + { + log_error("%s", e.what()); + return TEST_FAIL; + } + + return TEST_PASS; +} + +int test_fp2int(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) +{ + try + { + TypesIterator(device, context, queue, num_elements, "FP2INT"); + } catch (const std::runtime_error &e) + { + log_error("%s", e.what()); + return TEST_FAIL; + } + + return TEST_PASS; +}