diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
index 0da329e18b..26c4af9984 100644
--- a/.github/workflows/presubmit.yml
+++ b/.github/workflows/presubmit.yml
@@ -63,7 +63,7 @@ jobs:
     runs-on: ubuntu-22.04
     steps:
       - name: Install packages
-        run: sudo apt install -y clang-format clang-format-11
+        run: sudo apt install -y clang-format clang-format-14
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
diff --git a/check-format.sh b/check-format.sh
index b5dc0a72ca..b76117c15c 100755
--- a/check-format.sh
+++ b/check-format.sh
@@ -2,7 +2,7 @@
 
 # Arg used to specify non-'origin/main' comparison branch
 ORIGIN_BRANCH=${1:-"origin/main"}
-CLANG_BINARY=${2:-"`which clang-format-11`"}
+CLANG_BINARY=${2:-"`which clang-format-14`"}
 
 # Run git-clang-format to check for violations
 CLANG_FORMAT_OUTPUT=$(git-clang-format --diff $ORIGIN_BRANCH --extensions c,cpp,h,hpp --binary $CLANG_BINARY)
diff --git a/presubmit.sh b/presubmit.sh
index 10354abf14..b519d68388 100755
--- a/presubmit.sh
+++ b/presubmit.sh
@@ -62,7 +62,7 @@ cmake .. -G Ninja \
       -DCMAKE_BUILD_TYPE=Release \
       -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
       -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/
-cmake --build . -j2
+cmake --build . --parallel
 
 #Vulkan Loader
 cd ${TOP}
@@ -78,7 +78,7 @@ cmake .. -G Ninja \
       -DBUILD_WSI_XCB_SUPPORT=OFF \
       -DBUILD_WSI_WAYLAND_SUPPORT=OFF \
       -C helper.cmake ..
-cmake --build . -j2
+cmake --build . --parallel
 
 # Build CTS
 cd ${TOP}
@@ -87,14 +87,12 @@ mkdir build
 cd build
 if [[ ${RUNNER_OS} == "Windows" ]]; then
   CMAKE_OPENCL_LIBRARIES_OPTION="OpenCL"
-  CMAKE_CACHE_OPTIONS=""
 else
   CMAKE_OPENCL_LIBRARIES_OPTION="-lOpenCL -lpthread"
-  CMAKE_CACHE_OPTIONS="-DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache"
 fi
 cmake .. -G Ninja \
       -DCMAKE_BUILD_TYPE="${BUILD_CONFIG}" \
-      ${CMAKE_CACHE_OPTIONS} \
+      -DCMAKE_CACHE_OPTIONS="-DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache" \
       -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \
       -DCL_LIB_DIR=${TOP}/OpenCL-ICD-Loader/build \
       -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
@@ -105,4 +103,4 @@ cmake .. -G Ninja \
       -DVULKAN_IS_SUPPORTED=${BUILD_VULKAN_TEST} \
       -DVULKAN_INCLUDE_DIR=${TOP}/Vulkan-Headers/include/ \
       -DVULKAN_LIB_DIR=${TOP}/Vulkan-Loader/build/loader/
-cmake --build . -j3
+cmake --build . --parallel
diff --git a/readme-spir-v-binaries.txt b/readme-spir-v-binaries.txt
deleted file mode 100644
index ee09cd5016..0000000000
--- a/readme-spir-v-binaries.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-To run the conformance tests you need SPIR-V binaries.
-
-If you are using a conformance package then the binaries are included in the
-package. If you are using conformance tests from gitlab repositories then the
-binaries need to be picked up from Khronos SVN URL mentioned below:
-
-https://cvs.khronos.org/svn/repos/OpenCL/trunk/Khronos/spirv/spirv10_2015.11.25.zip
-
-
-Alternatively you can check out and build all of the below repositories.
-
-1. SPIRV-LLVM
-LLVM with support for SPIR-V (required by clang compiler)
-Repository: https://gitlab.khronos.org/opencl/SPIRV-LLVM
-Branch: spec_constants 
-Notes: spirv-3.6.1 is a main branch with support for OpenCL C++ kernel language,
-  spec_constants is based on it, but it adds support for specialization constants.
-
-2. Clang 
-Clang with support for OpenCL C++ kernel language
-Repository: https://gitlab.khronos.org/opencl/clang
-Branch: spec_constants 
-Notes: spirv-1.1 is a main branch with support for OpenCL C++ kernel language, 
-  spec_constants is based on it, but it adds support for specialization constants.
-
-3. libclcxx
-OpenCL C++ Standard Library
-Repository: https://gitlab.khronos.org/opencl/libclcxx 
-Branch: lit_tests_cl22   
-Notes: lit_tests_cl22 branch includes both LIT tests and changes introduced in 
-  spec_constants branch, that is, implementation of Specialization Constants Library.   
-
-4. OpenCL 2.2 headers
-OpenCL 2.2 headers
-Repository: https://gitlab.khronos.org/opencl/headers 
-Branch: opencl22 
-
-5. OpenCL ICD (with 2.2 support)
-OpenCL ICD 
-Repository: https://gitlab.khronos.org/opencl/icd 
-Branch: dev_cl22 
diff --git a/test_common/config.hpp b/test_common/config.hpp
deleted file mode 100644
index a037d4e462..0000000000
--- a/test_common/config.hpp
+++ /dev/null
@@ -1,41 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef TEST_COMMON_CONFIG_HPP
-#define TEST_COMMON_CONFIG_HPP
-
-// Enable development options for OpenCL C++ tests (test_conformance/clpp)
-// #define DEVELOPMENT
-#if defined(CLPP_DEVELOPMENT_OPTIONS) && !defined(DEVELOPMENT)
-    #define DEVELOPMENT
-#endif
-
-#ifdef DEVELOPMENT
-    // If defined OpenCL C++ tests only checks if OpenCL C++ kernels compiles correctly
-    // #define ONLY_SPIRV_COMPILATION
-    #if defined(CLPP_DEVELOPMENT_ONLY_SPIRV_COMPILATION) && !defined(ONLY_SPIRV_COMPILATION)
-        #define ONLY_SPIRV_COMPILATION
-    #endif
-
-    #ifndef ONLY_SPIRV_COMPILATION
-        // If defined OpenCL C++ tests are run using OpenCL C kernels
-        // #define USE_OPENCLC_KERNELS
-        #if defined(CLPP_DEVELOPMENT_USE_OPENCLC_KERNELS) && !defined(USE_OPENCLC_KERNELS)
-            #define USE_OPENCLC_KERNELS
-        #endif
-    #endif    
-#endif
-
-#endif // TEST_COMMON_CONFIG_HPP
\ No newline at end of file
diff --git a/test_common/gl/setup_x11.cpp b/test_common/gl/setup_x11.cpp
index abc065c94c..3292902f6d 100644
--- a/test_common/gl/setup_x11.cpp
+++ b/test_common/gl/setup_x11.cpp
@@ -26,20 +26,26 @@ class X11GLEnvironment : public GLEnvironment
 private:
     cl_device_id m_devices[64];
     cl_uint m_device_count;
+    bool m_glut_init;
 
 public:
     X11GLEnvironment()
     {
         m_device_count = 0;
+        m_glut_init = false;
     }
     virtual int Init( int *argc, char **argv, int use_opencl_32 )
     {
          // Create a GLUT window to render into
-        glutInit( argc, argv );
-        glutInitWindowSize( 512, 512 );
-        glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE );
-        glutCreateWindow( "OpenCL <-> OpenGL Test" );
-        glewInit();
+         if (!m_glut_init)
+         {
+             glutInit(argc, argv);
+             glutInitWindowSize(512, 512);
+             glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE);
+             glutCreateWindow("OpenCL <-> OpenGL Test");
+             glewInit();
+             m_glut_init = true;
+         }
         return 0;
     }
 
diff --git a/test_common/harness/deviceInfo.h b/test_common/harness/deviceInfo.h
index 912dd198ac..96ffdf1a3d 100644
--- a/test_common/harness/deviceInfo.h
+++ b/test_common/harness/deviceInfo.h
@@ -16,9 +16,6 @@
 #ifndef _deviceInfo_h
 #define _deviceInfo_h
 
-// Configuration
-#include "../config.hpp"
-
 #include <string>
 
 #include <CL/opencl.h>
diff --git a/test_common/harness/errorHelpers.cpp b/test_common/harness/errorHelpers.cpp
index eaccf64119..c2e1e5256b 100644
--- a/test_common/harness/errorHelpers.cpp
+++ b/test_common/harness/errorHelpers.cpp
@@ -202,6 +202,8 @@ const char *GetChannelTypeName(cl_channel_type type)
         case CL_SFIXED14_APPLE: return "CL_SFIXED14_APPLE";
 #endif
         case CL_UNORM_INT24: return "CL_UNORM_INT24";
+        case CL_UNSIGNED_INT_RAW10_EXT: return "CL_UNSIGNED_INT_RAW10_EXT";
+        case CL_UNSIGNED_INT_RAW12_EXT: return "CL_UNSIGNED_INT_RAW12_EXT";
         default: return NULL;
     }
 }
@@ -369,7 +371,7 @@ static float Ulp_Error_Half_Float(float test, double reference)
     return (float)scalbn(testVal - reference, ulp_exp);
 }
 
-float Ulp_Error_Half(cl_half test, float reference)
+float Ulp_Error_Half(cl_half test, double reference)
 {
     return Ulp_Error_Half_Float(cl_half_to_float(test), reference);
 }
diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h
index 3f1d8fb145..e6d4620b4d 100644
--- a/test_common/harness/errorHelpers.h
+++ b/test_common/harness/errorHelpers.h
@@ -185,7 +185,7 @@ static int vlog_win32(const char *format, ...);
 
 extern const char *IGetErrorString(int clErrorCode);
 
-extern float Ulp_Error_Half(cl_half test, float reference);
+extern float Ulp_Error_Half(cl_half test, double reference);
 extern float Ulp_Error(float test, double reference);
 extern float Ulp_Error_Double(double test, long double reference);
 
diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index 52a642383d..e13404b9a5 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -288,6 +288,9 @@ uint32_t get_pixel_size(const cl_image_format *format)
             return get_format_channel_count(format) * sizeof(cl_float);
         case CL_UNORM_INT_101010_2: return 4;
 
+        case CL_UNSIGNED_INT_RAW10_EXT:
+        case CL_UNSIGNED_INT_RAW12_EXT: return 2;
+
         default: return 0;
     }
 }
@@ -1125,13 +1128,15 @@ cl_ulong get_image_size_mb(image_descriptor const *imageInfo)
 uint64_t gRoundingStartValue = 0;
 
 
-void escape_inf_nan_values(char *data, size_t allocSize)
+void escape_inf_nan_subnormal_values(char *data, size_t allocSize)
 {
     // filter values with 8 not-quite-highest bits
     unsigned int *intPtr = (unsigned int *)data;
     for (size_t i = 0; i<allocSize>> 2; i++)
     {
         if ((intPtr[i] & 0x7F800000) == 0x7F800000) intPtr[i] ^= 0x40000000;
+        else if ((intPtr[i] & 0x7F800000) == 0)
+            intPtr[i] ^= 0x40000000;
     }
 
     // Ditto with half floats (16-bit numbers with the 5 not-quite-highest bits
@@ -1140,6 +1145,8 @@ void escape_inf_nan_values(char *data, size_t allocSize)
     for (size_t i = 0; i<allocSize>> 1; i++)
     {
         if ((shortPtr[i] & 0x7C00) == 0x7C00) shortPtr[i] ^= 0x4000;
+        else if ((shortPtr[i] & 0x7C00) == 0)
+            shortPtr[i] ^= 0x4000;
     }
 }
 
@@ -1218,7 +1225,7 @@ char *generate_random_image_data(image_descriptor *imageInfo,
 
         // Note: inf or nan float values would cause problems, although we don't
         // know this will actually be a float, so we just know what to look for
-        escape_inf_nan_values(data, allocSize);
+        escape_inf_nan_subnormal_values(data, allocSize);
         return data;
     }
 
@@ -1230,7 +1237,7 @@ char *generate_random_image_data(image_descriptor *imageInfo,
 
     // Note: inf or nan float values would cause problems, although we don't
     // know this will actually be a float, so we just know what to look for
-    escape_inf_nan_values(data, allocSize);
+    escape_inf_nan_subnormal_values(data, allocSize);
 
     if (/*!gTestMipmaps*/ imageInfo->num_mip_levels < 2)
     {
diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h
index 455f0edb4b..d49f358e78 100644
--- a/test_common/harness/imageHelpers.h
+++ b/test_common/harness/imageHelpers.h
@@ -191,6 +191,42 @@ cl_uint compute_max_mip_levels(size_t width, size_t height, size_t depth);
 cl_ulong compute_mipmapped_image_size(image_descriptor imageInfo);
 size_t compute_mip_level_offset(image_descriptor *imageInfo, size_t lod);
 
+constexpr size_t RAW10_EXT_CLUMP_SIZE = 5;
+constexpr size_t RAW10_EXT_CLUMP_NUM_PIXELS = 4;
+constexpr size_t RAW12_EXT_CLUMP_SIZE = 3;
+constexpr size_t RAW12_EXT_CLUMP_NUM_PIXELS = 2;
+
+inline bool is_width_compatible(image_descriptor imageInfo)
+{
+    if (imageInfo.format->image_channel_data_type == CL_UNSIGNED_INT_RAW10_EXT
+        && (imageInfo.width % RAW10_EXT_CLUMP_NUM_PIXELS) != 0)
+    {
+        return false;
+    }
+    if (imageInfo.format->image_channel_data_type == CL_UNSIGNED_INT_RAW12_EXT
+        && (imageInfo.width % RAW12_EXT_CLUMP_NUM_PIXELS) != 0)
+    {
+        return false;
+    }
+    return true;
+}
+
+inline size_t calculate_row_pitch(image_descriptor imageInfo, size_t pixelSize)
+{
+    if (imageInfo.format->image_channel_data_type == CL_UNSIGNED_INT_RAW10_EXT)
+    {
+        return (imageInfo.width * RAW10_EXT_CLUMP_SIZE)
+            / RAW10_EXT_CLUMP_NUM_PIXELS;
+    }
+    if (imageInfo.format->image_channel_data_type == CL_UNSIGNED_INT_RAW12_EXT)
+    {
+        return (imageInfo.width * RAW12_EXT_CLUMP_SIZE)
+            / RAW12_EXT_CLUMP_NUM_PIXELS;
+    }
+
+    return imageInfo.width * pixelSize;
+}
+
 template <class T>
 void read_image_pixel(void *imageData, image_descriptor *imageInfo, int x,
                       int y, int z, T *outData, int lod)
@@ -255,10 +291,24 @@ void read_image_pixel(void *imageData, image_descriptor *imageInfo, int x,
 
     // Advance to the right spot
     char *ptr = (char *)imageData;
-    size_t pixelSize = get_pixel_size(format);
-
-    ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
-
+    switch (format->image_channel_data_type)
+    {
+        case CL_UNSIGNED_INT_RAW10_EXT: {
+            ptr += z * slice_pitch_lod + y * row_pitch_lod
+                + (x / RAW10_EXT_CLUMP_NUM_PIXELS) * RAW10_EXT_CLUMP_SIZE;
+            break;
+        }
+        case CL_UNSIGNED_INT_RAW12_EXT: {
+            ptr += z * slice_pitch_lod + y * row_pitch_lod
+                + (x / RAW12_EXT_CLUMP_NUM_PIXELS) * RAW12_EXT_CLUMP_SIZE;
+            break;
+        }
+        default: {
+            size_t pixelSize = get_pixel_size(format);
+            ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
+            break;
+        }
+    }
     // OpenCL only supports reading floats from certain formats
     switch (format->image_channel_data_type)
     {
@@ -377,6 +427,26 @@ void read_image_pixel(void *imageData, image_descriptor *imageInfo, int x,
             break;
         }
 #endif
+        case CL_UNSIGNED_INT_RAW10_EXT: {
+            cl_uchar *dPtr = (cl_uchar *)ptr;
+            i = x % RAW10_EXT_CLUMP_NUM_PIXELS;
+            uint8_t bit_index = i << 1;
+            uint16_t hi_val = dPtr[i] << 2;
+            uint16_t lo_val = (dPtr[4] & (0x3 << bit_index)) >> bit_index;
+
+            tempData[0] = (T)(hi_val | lo_val);
+            break;
+        }
+        case CL_UNSIGNED_INT_RAW12_EXT: {
+            cl_uchar *dPtr = (cl_uchar *)ptr;
+            i = x % RAW12_EXT_CLUMP_NUM_PIXELS;
+            uint8_t bit_index = i << 2;
+            uint16_t hi_val = dPtr[i] << 4;
+            uint16_t lo_val = (dPtr[2] & (0xF << bit_index)) >> bit_index;
+
+            tempData[0] = (T)(hi_val | lo_val);
+            break;
+        }
     }
 
 
diff --git a/test_common/harness/integer_ops_test_info.h b/test_common/harness/integer_ops_test_info.h
index ad7b303b47..b4403b3272 100644
--- a/test_common/harness/integer_ops_test_info.h
+++ b/test_common/harness/integer_ops_test_info.h
@@ -20,6 +20,8 @@
 #include "conversions.h"
 #include "testHarness.h"
 
+#include <vector>
+
 // TODO: expand usage to other tests.
 
 template <typename T> struct TestInfo
diff --git a/test_common/harness/kernelHelpers.h b/test_common/harness/kernelHelpers.h
index 62a07e49b8..86a6919293 100644
--- a/test_common/harness/kernelHelpers.h
+++ b/test_common/harness/kernelHelpers.h
@@ -16,9 +16,6 @@
 #ifndef _kernelHelpers_h
 #define _kernelHelpers_h
 
-// Configuration
-#include "../config.hpp"
-
 #include "compat.h"
 #include "testHarness.h"
 
diff --git a/test_common/harness/os_helpers.cpp b/test_common/harness/os_helpers.cpp
index e866f3e8a9..c64c59011e 100644
--- a/test_common/harness/os_helpers.cpp
+++ b/test_common/harness/os_helpers.cpp
@@ -209,7 +209,7 @@ static std::string _err_msg(int err, int level)
 
 #if (defined(__ANDROID__) && __ANDROID_API__ < 23)                             \
     || ((_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600) && !_GNU_SOURCE)  \
-    || (defined(_GNU_SOURCE) && !defined(__GLIBC__))
+    || (defined(_GNU_SOURCE) && !defined(__GLIBC__) && !defined(__USE_GNU))
 
 // XSI version of strerror_r.
 #warning Not tested!
diff --git a/test_common/harness/rounding_mode.h b/test_common/harness/rounding_mode.h
index 6f52f0a00b..dfd9c3eeab 100644
--- a/test_common/harness/rounding_mode.h
+++ b/test_common/harness/rounding_mode.h
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
+// Copyright (c) 2017-2024 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -42,10 +42,11 @@ typedef enum
     kshort = 3,
     kuint = 4,
     kint = 5,
-    kfloat = 6,
-    kdouble = 7,
-    kulong = 8,
-    klong = 9,
+    khalf = 6,
+    kfloat = 7,
+    kdouble = 8,
+    kulong = 9,
+    klong = 10,
 
     // This goes last
     kTypeCount
diff --git a/test_conformance/SVM/CMakeLists.txt b/test_conformance/SVM/CMakeLists.txt
index 2ad2f821b3..57cbcac7c8 100644
--- a/test_conformance/SVM/CMakeLists.txt
+++ b/test_conformance/SVM/CMakeLists.txt
@@ -3,6 +3,7 @@ set(MODULE_NAME SVM)
 set(${MODULE_NAME}_SOURCES
     main.cpp
     test_allocate_shared_buffer.cpp
+    test_allocate_shared_buffer_negative.cpp
     test_byte_granularity.cpp
     test_cross_buffer_pointers.cpp
     test_enqueue_api.cpp
diff --git a/test_conformance/SVM/common.h b/test_conformance/SVM/common.h
index ab99b7eccb..f6a937c792 100644
--- a/test_conformance/SVM/common.h
+++ b/test_conformance/SVM/common.h
@@ -92,6 +92,10 @@ extern int    test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl
 extern int    test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int    test_svm_pointer_passing(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int    test_svm_allocate_shared_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID,
+                                                    cl_context context,
+                                                    cl_command_queue queue,
+                                                    int num_elements);
 extern int    test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int    test_svm_enqueue_api(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int    test_svm_migrate(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
diff --git a/test_conformance/SVM/main.cpp b/test_conformance/SVM/main.cpp
index 56fb24f1a1..819901a301 100644
--- a/test_conformance/SVM/main.cpp
+++ b/test_conformance/SVM/main.cpp
@@ -261,20 +261,21 @@ cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeSt
 }
 
 test_definition test_list[] = {
-    ADD_TEST( svm_byte_granularity),
-    ADD_TEST( svm_set_kernel_exec_info_svm_ptrs ),
-    ADD_TEST( svm_fine_grain_memory_consistency ),
-    ADD_TEST( svm_fine_grain_sync_buffers ),
-    ADD_TEST( svm_shared_address_space_fine_grain ),
-    ADD_TEST( svm_shared_sub_buffers ),
-    ADD_TEST( svm_shared_address_space_fine_grain_buffers ),
-    ADD_TEST( svm_allocate_shared_buffer ),
-    ADD_TEST( svm_shared_address_space_coarse_grain_old_api ),
-    ADD_TEST( svm_shared_address_space_coarse_grain_new_api ),
-    ADD_TEST( svm_cross_buffer_pointers_coarse_grain ),
-    ADD_TEST( svm_pointer_passing ),
-    ADD_TEST( svm_enqueue_api ),
-    ADD_TEST_VERSION( svm_migrate, Version(2, 1)),
+    ADD_TEST(svm_byte_granularity),
+    ADD_TEST(svm_set_kernel_exec_info_svm_ptrs),
+    ADD_TEST(svm_fine_grain_memory_consistency),
+    ADD_TEST(svm_fine_grain_sync_buffers),
+    ADD_TEST(svm_shared_address_space_fine_grain),
+    ADD_TEST(svm_shared_sub_buffers),
+    ADD_TEST(svm_shared_address_space_fine_grain_buffers),
+    ADD_TEST(svm_allocate_shared_buffer),
+    ADD_TEST(svm_allocate_shared_buffer_negative),
+    ADD_TEST(svm_shared_address_space_coarse_grain_old_api),
+    ADD_TEST(svm_shared_address_space_coarse_grain_new_api),
+    ADD_TEST(svm_cross_buffer_pointers_coarse_grain),
+    ADD_TEST(svm_pointer_passing),
+    ADD_TEST(svm_enqueue_api),
+    ADD_TEST_VERSION(svm_migrate, Version(2, 1)),
 };
 
 const int test_num = ARRAY_SIZE( test_list );
diff --git a/test_conformance/SVM/test_allocate_shared_buffer_negative.cpp b/test_conformance/SVM/test_allocate_shared_buffer_negative.cpp
new file mode 100644
index 0000000000..0706650330
--- /dev/null
+++ b/test_conformance/SVM/test_allocate_shared_buffer_negative.cpp
@@ -0,0 +1,102 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "common.h"
+
+const cl_mem_flags svm_flag_set[] = {
+    CL_MEM_READ_WRITE,
+    CL_MEM_WRITE_ONLY,
+    CL_MEM_READ_ONLY,
+    CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER,
+    CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
+    CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER,
+    CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
+    CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
+    CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
+    0
+};
+const char* svm_flag_set_names[] = {
+    "CL_MEM_READ_WRITE",
+    "CL_MEM_WRITE_ONLY",
+    "CL_MEM_READ_ONLY",
+    "CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER",
+    "CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER",
+    "CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER",
+    "CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
+    "CL_MEM_WRITE_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
+    "CL_MEM_READ_ONLY | CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS",
+    "0"
+};
+
+
+int test_svm_allocate_shared_buffer_negative(cl_device_id deviceID,
+                                             cl_context context2,
+                                             cl_command_queue queue,
+                                             int num_elements)
+{
+    clContextWrapper context = NULL;
+    clProgramWrapper program = NULL;
+    cl_uint num_devices = 0;
+    cl_int err = CL_SUCCESS;
+    clCommandQueueWrapper queues[MAXQ];
+
+    cl_device_svm_capabilities caps;
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES,
+                          sizeof(cl_device_svm_capabilities), &caps, NULL);
+    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
+
+    // under construction...
+    err = create_cl_objects(deviceID, NULL, &context, &program, &queues[0],
+                            &num_devices, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER);
+    if (err) return -1;
+
+    size_t size = 1024;
+
+    // iteration over flag combos
+    int num_flags = sizeof(svm_flag_set) / sizeof(cl_mem_flags);
+    for (int i = 0; i < num_flags; i++)
+    {
+        if (((svm_flag_set[i] & CL_MEM_SVM_FINE_GRAIN_BUFFER) != 0
+             && (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) == 0)
+            || ((svm_flag_set[i] & CL_MEM_SVM_ATOMICS) != 0
+                && (caps & CL_DEVICE_SVM_ATOMICS) == 0))
+        {
+            log_info("Skipping clSVMalloc with flags: %s\n",
+                     svm_flag_set_names[i]);
+            continue;
+        }
+
+        log_info("Testing clSVMalloc with flags: %s\n", svm_flag_set_names[i]);
+        cl_char* pBufData1 =
+            (cl_char*)clSVMAlloc(context, svm_flag_set[i], size, 0);
+        if (pBufData1 == NULL)
+        {
+            log_error("SVMalloc returned NULL");
+            return -1;
+        }
+
+        {
+            clMemWrapper buf1 = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+                                               2 * size, pBufData1, &err);
+            test_failure_error(err, CL_INVALID_BUFFER_SIZE,
+                               "clCreateBuffer did not return expected error"
+                               "CL_INVALID_BUFFER_SIZE");
+        }
+
+        clSVMFree(context, pBufData1);
+    }
+
+    return 0;
+}
diff --git a/test_conformance/allocations/allocation_execute.cpp b/test_conformance/allocations/allocation_execute.cpp
index 1762711067..692424ceb1 100644
--- a/test_conformance/allocations/allocation_execute.cpp
+++ b/test_conformance/allocations/allocation_execute.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -20,7 +20,8 @@
 
 
 const char *buffer_kernel_pattern = {
-    "__kernel void sample_test(%s __global uint *result, __global %s *array_sizes, uint per_item)\n"
+    "__kernel void sample_test(%s __global uint *result, __global %s "
+    "*array_sizes, uint per_item)\n"
     "{\n"
     "\tint tid = get_global_id(0);\n"
     "\tuint r = 0;\n"
@@ -29,7 +30,8 @@ const char *buffer_kernel_pattern = {
     "%s"
     "\t}\n"
     "\tresult[tid] = r;\n"
-    "}\n" };
+    "}\n"
+};
 
 const char *image_kernel_pattern = {
     "__kernel void sample_test(%s __global uint *result)\n"
@@ -40,7 +42,8 @@ const char *image_kernel_pattern = {
     "\tint x, y;\n"
     "%s"
     "\tresult[get_global_id(0)] += color.x + color.y + color.z + color.w;\n"
-    "}\n" };
+    "}\n"
+};
 
 const char *read_pattern = {
     "\tfor(y=0; y<get_image_height(image%d); y++)\n"
@@ -50,11 +53,11 @@ const char *read_pattern = {
     "\t\t\t}\n"
 };
 
-const char *offset_pattern =
-"\tconst uint4 offset = (uint4)(0,1,2,3);\n";
+const char *offset_pattern = "\tconst uint4 offset = (uint4)(0,1,2,3);\n";
 
 const char *sampler_pattern =
-"\tconst sampler_t sampler = CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE;\n";
+    "\tconst sampler_t sampler = CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST | "
+    "CLK_NORMALIZED_COORDS_FALSE;\n";
 
 
 const char *write_pattern = {
@@ -68,7 +71,8 @@ const char *write_pattern = {
 };
 
 
-int check_image(cl_command_queue queue, cl_mem mem) {
+int check_image(cl_command_queue queue, cl_mem mem)
+{
     int error;
     cl_mem_object_type type;
     size_t width, height;
@@ -76,7 +80,8 @@ int check_image(cl_command_queue queue, cl_mem mem) {
     cl_uint *data;
 
     error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
-    if (error) {
+    if (error)
+    {
         print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
         return -1;
     }
@@ -108,8 +113,9 @@ int check_image(cl_command_queue queue, cl_mem mem) {
     }
 
 
-    data = (cl_uint*)malloc(width*4*sizeof(cl_uint));
-    if (data == NULL) {
+    data = (cl_uint *)malloc(width * 4 * sizeof(cl_uint));
+    if (data == NULL)
+    {
         log_error("Failed to malloc host buffer for writing into image.\n");
         return FAILED_ABORT;
     }
@@ -119,19 +125,27 @@ int check_image(cl_command_queue queue, cl_mem mem) {
     region[0] = width;
     region[1] = 1;
     region[2] = 1;
-    for (origin[1] = 0; origin[1] < height; origin[1]++) {
-        error = clEnqueueReadImage(queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
-        if (error) {
+    for (origin[1] = 0; origin[1] < height; origin[1]++)
+    {
+        error = clEnqueueReadImage(queue, mem, CL_TRUE, origin, region, 0, 0,
+                                   data, 0, NULL, NULL);
+        if (error)
+        {
             print_error(error, "clEnqueueReadImage failed");
             free(data);
             return error;
         }
 
-        for (x=0; x<width; x++) {
-            for (j=0; j<4; j++) {
-                if (data[x*4+j] != (cl_uint)(x*origin[1]+j)) {
-                    log_error("Pixel %d, %d, component %d, expected %u, got %u.\n",
-                              (int)x, (int)origin[1], (int)j, (cl_uint)(x*origin[1]+j), data[x*4+j]);
+        for (x = 0; x < width; x++)
+        {
+            for (j = 0; j < 4; j++)
+            {
+                if (data[x * 4 + j] != (cl_uint)(x * origin[1] + j))
+                {
+                    log_error(
+                        "Pixel %d, %d, component %d, expected %u, got %u.\n",
+                        (int)x, (int)origin[1], (int)j,
+                        (cl_uint)(x * origin[1] + j), data[x * 4 + j]);
                     return -1;
                 }
             }
@@ -142,9 +156,11 @@ int check_image(cl_command_queue queue, cl_mem mem) {
 }
 
 
-#define NUM_OF_WORK_ITEMS (8192 * 32)
-
-int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id device_id, int test, cl_mem mems[], int number_of_mems_used, int verify_checksum) {
+int execute_kernel(cl_context context, cl_command_queue *queue,
+                   cl_device_id device_id, int test, cl_mem mems[],
+                   int number_of_mems_used, int verify_checksum,
+                   unsigned int number_of_work_itmes)
+{
 
     char *argument_string;
     char *access_string;
@@ -158,73 +174,97 @@ int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id dev
     cl_uint per_item;
     cl_uint per_item_uint;
     cl_uint final_result;
-    std::vector<cl_uint> returned_results(NUM_OF_WORK_ITEMS);
+    std::vector<cl_uint> returned_results(number_of_work_itmes);
     clEventWrapper event;
     cl_int event_status;
 
     // Allocate memory for the kernel source
-    argument_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*64);
-    access_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10));
-    kernel_string = (char*)malloc(sizeof(char)*MAX_NUMBER_TO_ALLOCATE*(strlen(read_pattern)+10+64)+1024);
+    argument_string =
+        (char *)malloc(sizeof(char) * MAX_NUMBER_TO_ALLOCATE * 64);
+    access_string = (char *)malloc(sizeof(char) * MAX_NUMBER_TO_ALLOCATE
+                                   * (strlen(read_pattern) + 10));
+    kernel_string = (char *)malloc(sizeof(char) * MAX_NUMBER_TO_ALLOCATE
+                                       * (strlen(read_pattern) + 10 + 64)
+                                   + 1024);
     argument_string[0] = '\0';
     access_string[0] = '\0';
     kernel_string[0] = '\0';
 
     // Zero the results.
-    for (i=0; i<NUM_OF_WORK_ITEMS; i++)
-        returned_results[i] = 0;
+    for (i = 0; i < number_of_work_itmes; i++) returned_results[i] = 0;
 
     // detect if device supports ulong/int64
-    //detect whether profile of the device is embedded
+    // detect whether profile of the device is embedded
     bool support64 = true;
     char profile[1024] = "";
-    error = clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
-    test_error(error, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n" );
-    if ((NULL != strstr(profile, "EMBEDDED_PROFILE")) &&
-        (!is_extension_available(device_id, "cles_khr_int64"))) {
-            support64 = false;
+    error = clGetDeviceInfo(device_id, CL_DEVICE_PROFILE, sizeof(profile),
+                            profile, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n");
+    if ((NULL != strstr(profile, "EMBEDDED_PROFILE"))
+        && (!is_extension_available(device_id, "cles_khr_int64")))
+    {
+        support64 = false;
     }
 
     // Build the kernel source
-    if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
-        for(i=0; i<number_of_mems_used; i++) {
-            sprintf(argument_string + strlen(argument_string), " __global uint *buffer%d, ", i);
-            sprintf(access_string + strlen( access_string), "\t\tif (i<array_sizes[%d]) r += buffer%d[i];\n", i, i);
+    if (test == BUFFER || test == BUFFER_NON_BLOCKING)
+    {
+        for (i = 0; i < number_of_mems_used; i++)
+        {
+            sprintf(argument_string + strlen(argument_string),
+                    " __global uint *buffer%d, ", i);
+            sprintf(access_string + strlen(access_string),
+                    "\t\tif (i<array_sizes[%d]) r += buffer%d[i];\n", i, i);
         }
         char type[10];
-        if (support64) {
+        if (support64)
+        {
             sprintf(type, "ulong");
         }
-        else {
+        else
+        {
             sprintf(type, "uint");
         }
-        sprintf(kernel_string, buffer_kernel_pattern, argument_string, type, type, type, type, type, type, access_string);
+        sprintf(kernel_string, buffer_kernel_pattern, argument_string, type,
+                type, type, type, type, type, access_string);
     }
-    else if (test == IMAGE_READ || test == IMAGE_READ_NON_BLOCKING) {
-        for(i=0; i<number_of_mems_used; i++) {
-            sprintf(argument_string + strlen(argument_string), " read_only image2d_t image%d, ", i);
-            sprintf(access_string + strlen(access_string), read_pattern, i, "%", i, i);
+    else if (test == IMAGE_READ || test == IMAGE_READ_NON_BLOCKING)
+    {
+        for (i = 0; i < number_of_mems_used; i++)
+        {
+            sprintf(argument_string + strlen(argument_string),
+                    " read_only image2d_t image%d, ", i);
+            sprintf(access_string + strlen(access_string), read_pattern, i, "%",
+                    i, i);
         }
-        sprintf(kernel_string, image_kernel_pattern, argument_string, sampler_pattern, access_string);
+        sprintf(kernel_string, image_kernel_pattern, argument_string,
+                sampler_pattern, access_string);
     }
-    else if (test == IMAGE_WRITE || test == IMAGE_WRITE_NON_BLOCKING) {
-        for(i=0; i<number_of_mems_used; i++) {
-            sprintf(argument_string + strlen(argument_string), " write_only image2d_t image%d, ", i);
-            sprintf(access_string + strlen( access_string), write_pattern, i, "%", i, i);
+    else if (test == IMAGE_WRITE || test == IMAGE_WRITE_NON_BLOCKING)
+    {
+        for (i = 0; i < number_of_mems_used; i++)
+        {
+            sprintf(argument_string + strlen(argument_string),
+                    " write_only image2d_t image%d, ", i);
+            sprintf(access_string + strlen(access_string), write_pattern, i,
+                    "%", i, i);
         }
-        sprintf(kernel_string, image_kernel_pattern, argument_string, offset_pattern, access_string);
+        sprintf(kernel_string, image_kernel_pattern, argument_string,
+                offset_pattern, access_string);
     }
     ptr = kernel_string;
 
     // Create the kernel
-    error = create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" );
+    error = create_single_kernel_helper(context, &program, &kernel, 1,
+                                        (const char **)&ptr, "sample_test");
 
     free(argument_string);
     free(access_string);
     free(kernel_string);
 
     result = check_allocation_error(context, device_id, error, queue);
-    if (result != SUCCEEDED) {
+    if (result != SUCCEEDED)
+    {
         if (result == FAILED_TOO_BIG)
             log_info("\t\tCreate kernel failed: %s.\n", IGetErrorString(error));
         else
@@ -233,80 +273,109 @@ int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id dev
     }
 
     // Set the arguments
-    for (i=0; i<number_of_mems_used; i++) {
+    for (i = 0; i < number_of_mems_used; i++)
+    {
         error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mems[i]);
         test_error(error, "clSetKernelArg failed");
     }
 
     // Set the result
-    result_mem = clCreateBuffer(
-        context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-        sizeof(cl_uint) * NUM_OF_WORK_ITEMS, returned_results.data(), &error);
+    result_mem =
+        clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+                       sizeof(cl_uint) * number_of_work_itmes,
+                       returned_results.data(), &error);
     test_error(error, "clCreateBuffer failed");
     error = clSetKernelArg(kernel, i, sizeof(result_mem), &result_mem);
     test_error(error, "clSetKernelArg failed");
 
     // Thread dimensions for execution
-    global_dims[0] = NUM_OF_WORK_ITEMS; global_dims[1] = 1; global_dims[2] = 1;
+    global_dims[0] = number_of_work_itmes;
+    global_dims[1] = 1;
+    global_dims[2] = 1;
 
-    // We have extra arguments for the buffer kernel because we need to pass in the buffer sizes
+    // We have extra arguments for the buffer kernel because we need to pass in
+    // the buffer sizes
     cl_ulong *ulSizes = NULL;
-    cl_uint  *uiSizes = NULL;
-    if (support64) {
-        ulSizes = (cl_ulong*)malloc(sizeof(cl_ulong)*number_of_mems_used);
+    cl_uint *uiSizes = NULL;
+    if (support64)
+    {
+        ulSizes = (cl_ulong *)malloc(sizeof(cl_ulong) * number_of_mems_used);
     }
-    else {
-        uiSizes = (cl_uint*)malloc(sizeof(cl_uint)*number_of_mems_used);
+    else
+    {
+        uiSizes = (cl_uint *)malloc(sizeof(cl_uint) * number_of_mems_used);
     }
     cl_ulong max_size = 0;
     clMemWrapper buffer_sizes;
-    if (test == BUFFER || test == BUFFER_NON_BLOCKING) {
-        for (i=0; i<number_of_mems_used; i++) {
+    if (test == BUFFER || test == BUFFER_NON_BLOCKING)
+    {
+        for (i = 0; i < number_of_mems_used; i++)
+        {
             size_t size;
-            error = clGetMemObjectInfo(mems[i], CL_MEM_SIZE, sizeof(size), &size, NULL);
-            test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
-            if (support64) {
-                ulSizes[i] = size/sizeof(cl_uint);
+            error = clGetMemObjectInfo(mems[i], CL_MEM_SIZE, sizeof(size),
+                                       &size, NULL);
+            test_error_abort(error,
+                             "clGetMemObjectInfo failed for CL_MEM_SIZE.");
+            if (support64)
+            {
+                ulSizes[i] = size / sizeof(cl_uint);
             }
-            else {
-                uiSizes[i] = (cl_uint)size/sizeof(cl_uint);
+            else
+            {
+                uiSizes[i] = (cl_uint)size / sizeof(cl_uint);
             }
-            if (size/sizeof(cl_uint) > max_size)
-                max_size = size/sizeof(cl_uint);
+            if (size / sizeof(cl_uint) > max_size)
+                max_size = size / sizeof(cl_uint);
         }
-        if (support64) {
-            buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_ulong)*number_of_mems_used, ulSizes, &error);
+        if (support64)
+        {
+            buffer_sizes = clCreateBuffer(
+                context, CL_MEM_COPY_HOST_PTR,
+                sizeof(cl_ulong) * number_of_mems_used, ulSizes, &error);
         }
-        else {
-            buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeof(cl_uint)*number_of_mems_used, uiSizes, &error);
+        else
+        {
+            buffer_sizes = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                                          sizeof(cl_uint) * number_of_mems_used,
+                                          uiSizes, &error);
         }
         test_error_abort(error, "clCreateBuffer failed");
-        error = clSetKernelArg(kernel, number_of_mems_used+1, sizeof(cl_mem), &buffer_sizes);
+        error = clSetKernelArg(kernel, number_of_mems_used + 1, sizeof(cl_mem),
+                               &buffer_sizes);
         test_error(error, "clSetKernelArg failed");
-        per_item = (cl_uint)ceil((double)max_size/global_dims[0]);
+        per_item = (cl_uint)ceil((double)max_size / global_dims[0]);
         if (per_item > CL_UINT_MAX)
-            log_error("Size is too large for a uint parameter to the kernel. Expect invalid results.\n");
+            log_error("Size is too large for a uint parameter to the kernel. "
+                      "Expect invalid results.\n");
         per_item_uint = (cl_uint)per_item;
-        error = clSetKernelArg(kernel, number_of_mems_used+2, sizeof(per_item_uint), &per_item_uint);
+        error = clSetKernelArg(kernel, number_of_mems_used + 2,
+                               sizeof(per_item_uint), &per_item_uint);
         test_error(error, "clSetKernelArg failed");
     }
-    if (ulSizes) {
+    if (ulSizes)
+    {
         free(ulSizes);
     }
-    if (uiSizes) {
+    if (uiSizes)
+    {
         free(uiSizes);
     }
 
-    size_t local_dims[3] = {1,1,1};
-    error = get_max_common_work_group_size(context, kernel, global_dims[0], &local_dims[0]);
+    size_t local_dims[3] = { 1, 1, 1 };
+    error = get_max_common_work_group_size(context, kernel, global_dims[0],
+                                           &local_dims[0]);
     test_error(error, "get_max_common_work_group_size failed");
 
     // Execute the kernel
-    error = clEnqueueNDRangeKernel(*queue, kernel, 1, NULL, global_dims, local_dims, 0, NULL, &event);
+    error = clEnqueueNDRangeKernel(*queue, kernel, 1, NULL, global_dims,
+                                   local_dims, 0, NULL, &event);
     result = check_allocation_error(context, device_id, error, queue);
-    if (result != SUCCEEDED) {
+    if (result != SUCCEEDED)
+    {
         if (result == FAILED_TOO_BIG)
-            log_info("\t\tExecute kernel failed: %s (global dim: %ld, local dim: %ld)\n", IGetErrorString(error), global_dims[0], local_dims[0]);
+            log_info("\t\tExecute kernel failed: %s (global dim: %ld, local "
+                     "dim: %ld)\n",
+                     IGetErrorString(error), global_dims[0], local_dims[0]);
         else
             print_error(error, "clEnqueueNDRangeKernel failed");
         return result;
@@ -317,7 +386,8 @@ int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id dev
 
     result = check_allocation_error(context, device_id, error, queue);
 
-    if (result != SUCCEEDED) {
+    if (result != SUCCEEDED)
+    {
         if (result == FAILED_TOO_BIG)
             log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
         else
@@ -326,13 +396,20 @@ int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id dev
     }
 
     // Verify that the event from the execution did not have an error
-    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
-    test_error_abort(error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
-    if (event_status < 0) {
-        result = check_allocation_error(context, device_id, event_status, queue);
-        if (result != SUCCEEDED) {
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(event_status), &event_status, NULL);
+    test_error_abort(
+        error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+    if (event_status < 0)
+    {
+        result =
+            check_allocation_error(context, device_id, event_status, queue);
+        if (result != SUCCEEDED)
+        {
             if (result == FAILED_TOO_BIG)
-                log_info("\t\tEvent returned from kernel execution indicates failure: %s.\n", IGetErrorString(event_status));
+                log_info("\t\tEvent returned from kernel execution indicates "
+                         "failure: %s.\n",
+                         IGetErrorString(event_status));
             else
                 print_error(event_status, "clEnqueueNDRangeKernel failed");
             return result;
@@ -340,33 +417,46 @@ int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id dev
     }
 
     // If we are not verifying the checksum return here
-    if (!verify_checksum) {
-        log_info("Note: Allocations were not initialized so kernel execution can not verify correct results.\n");
+    if (!verify_checksum)
+    {
+        log_info("Note: Allocations were not initialized so kernel execution "
+                 "can not verify correct results.\n");
         return SUCCEEDED;
     }
 
     // Verify the checksum.
     // Read back the result
     error = clEnqueueReadBuffer(*queue, result_mem, CL_TRUE, 0,
-                                sizeof(cl_uint) * NUM_OF_WORK_ITEMS,
+                                sizeof(cl_uint) * number_of_work_itmes,
                                 returned_results.data(), 0, NULL, NULL);
     test_error_abort(error, "clEnqueueReadBuffer failed");
     final_result = 0;
-    if (test == BUFFER || test == IMAGE_READ || test == BUFFER_NON_BLOCKING || test == IMAGE_READ_NON_BLOCKING) {
-        // For buffers or read images we are just looking at the sum of what each thread summed up
-        for (i=0; i<NUM_OF_WORK_ITEMS; i++) {
+    if (test == BUFFER || test == IMAGE_READ || test == BUFFER_NON_BLOCKING
+        || test == IMAGE_READ_NON_BLOCKING)
+    {
+        // For buffers or read images we are just looking at the sum of what
+        // each thread summed up
+        for (i = 0; i < number_of_work_itmes; i++)
+        {
             final_result += returned_results[i];
         }
-        if (final_result != checksum) {
-            log_error("\t\tChecksum failed to verify. Expected %u got %u.\n", checksum, final_result);
+        if (final_result != checksum)
+        {
+            log_error("\t\tChecksum failed to verify. Expected %u got %u.\n",
+                      checksum, final_result);
             return FAILED_ABORT;
         }
         log_info("\t\tChecksum verified (%u == %u).\n", checksum, final_result);
-    } else {
+    }
+    else
+    {
         // For write images we need to verify the values
-        for (i=0; i<number_of_mems_used; i++) {
-            if (check_image(*queue, mems[i])) {
-                log_error("\t\tImage contents failed to verify for image %d.\n", (int)i);
+        for (i = 0; i < number_of_mems_used; i++)
+        {
+            if (check_image(*queue, mems[i]))
+            {
+                log_error("\t\tImage contents failed to verify for image %d.\n",
+                          (int)i);
                 return FAILED_ABORT;
             }
         }
@@ -376,7 +466,8 @@ int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id dev
     // Finish the test
     error = clFinish(*queue);
     result = check_allocation_error(context, device_id, error, queue);
-    if (result != SUCCEEDED) {
+    if (result != SUCCEEDED)
+    {
         if (result == FAILED_TOO_BIG)
             log_info("\t\tclFinish failed: %s.\n", IGetErrorString(error));
         else
@@ -386,5 +477,3 @@ int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id dev
 
     return SUCCEEDED;
 }
-
-
diff --git a/test_conformance/allocations/allocation_execute.h b/test_conformance/allocations/allocation_execute.h
index 2c1b345109..353d051eaf 100644
--- a/test_conformance/allocations/allocation_execute.h
+++ b/test_conformance/allocations/allocation_execute.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -17,6 +17,7 @@
 #include "allocation_utils.h"
 
 
-int execute_kernel(cl_context context, cl_command_queue *queue, cl_device_id device_id, int test, cl_mem mems[], int number_of_mems_used, int verify_checksum);
-
-
+int execute_kernel(cl_context context, cl_command_queue *queue,
+                   cl_device_id device_id, int test, cl_mem mems[],
+                   int number_of_mems_used, int verify_checksum,
+                   unsigned int number_of_work_items);
diff --git a/test_conformance/allocations/allocation_fill.cpp b/test_conformance/allocations/allocation_fill.cpp
index b4ea379864..ac3c5b83e1 100644
--- a/test_conformance/allocations/allocation_fill.cpp
+++ b/test_conformance/allocations/allocation_fill.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -15,317 +15,387 @@
 //
 #include "allocation_fill.h"
 
-#define BUFFER_CHUNK_SIZE 8*1024*1024
+#define BUFFER_CHUNK_SIZE 8 * 1024 * 1024
 #define IMAGE_LINES 8
 
 #include "harness/compat.h"
 
-int fill_buffer_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, size_t size, MTdata d, cl_bool blocking_write) {
-     size_t i, j;
-  cl_uint *data;
-  int error, result;
-  cl_uint checksum_delta = 0;
-  cl_event event;
-
-  size_t size_to_use = BUFFER_CHUNK_SIZE;
-  if (size_to_use > size)
-    size_to_use = size;
-
-  data = (cl_uint*)malloc(size_to_use);
-  if (data == NULL) {
-    log_error("Failed to malloc host buffer for writing into buffer.\n");
-    return FAILED_ABORT;
-  }
-  for (i=0; i<size-size_to_use; i+=size_to_use) {
-    // Put values in the data, and keep a checksum as we go along.
-    for (j=0; j<size_to_use/sizeof(cl_uint); j++) {
-      data[j] = genrand_int32(d);
-      checksum_delta += data[j];
-    }
-    if (blocking_write) {
-      error = clEnqueueWriteBuffer(*queue, mem, CL_TRUE, i, size_to_use, data, 0, NULL, NULL);
-      result = check_allocation_error(context, device_id, error, queue);
-
-      if (result == FAILED_ABORT) {
-        print_error(error, "clEnqueueWriteBuffer failed.");
-      }
-
-      if (result != SUCCEEDED) {
-        clFinish(*queue);
-        free(data);
-        clReleaseMemObject(mem);
-        return result;
-      }
-    } else {
-      error = clEnqueueWriteBuffer(*queue, mem, CL_FALSE, i, size_to_use, data, 0, NULL, &event);
-      result = check_allocation_error(context, device_id, error, queue);
-
-      if (result == FAILED_ABORT) {
-        print_error(error, "clEnqueueWriteBuffer failed.");
-      }
-
-      if (result != SUCCEEDED) {
-        clFinish(*queue);
-        free(data);
-        clReleaseMemObject(mem);
-        return result;
-      }
-
-      error = clWaitForEvents(1, &event);
-      result = check_allocation_error(context, device_id, error, queue, &event);
-
-      if (result == FAILED_ABORT) {
-        print_error(error, "clWaitForEvents failed.");
-      }
-
-      if (result != SUCCEEDED) {
-        clFinish(*queue);
-        clReleaseEvent(event);
-        free(data);
-        clReleaseMemObject(mem);
-        return result;
-      }
-
-      clReleaseEvent(event);
+int fill_buffer_with_data(cl_context context, cl_device_id device_id,
+                          cl_command_queue *queue, cl_mem mem, size_t size,
+                          MTdata d, cl_bool blocking_write)
+{
+    size_t i, j;
+    cl_uint *data;
+    int error, result;
+    cl_uint checksum_delta = 0;
+    cl_event event;
+
+    size_t size_to_use = BUFFER_CHUNK_SIZE;
+    if (size_to_use > size) size_to_use = size;
+
+    data = (cl_uint *)malloc(size_to_use);
+    if (data == NULL)
+    {
+        log_error("Failed to malloc host buffer for writing into buffer.\n");
+        return FAILED_ABORT;
     }
-  }
-
-  // Deal with any leftover bits
-  if (i < size) {
-    // Put values in the data, and keep a checksum as we go along.
-    for (j=0; j<(size-i)/sizeof(cl_uint); j++) {
-      data[j] = (cl_uint)genrand_int32(d);
-      checksum_delta += data[j];
+    for (i = 0; i < size - size_to_use; i += size_to_use)
+    {
+        // Put values in the data, and keep a checksum as we go along.
+        for (j = 0; j < size_to_use / sizeof(cl_uint); j++)
+        {
+            data[j] = genrand_int32(d);
+            checksum_delta += data[j];
+        }
+        if (blocking_write)
+        {
+            error = clEnqueueWriteBuffer(*queue, mem, CL_TRUE, i, size_to_use,
+                                         data, 0, NULL, NULL);
+            result = check_allocation_error(context, device_id, error, queue);
+
+            if (result == FAILED_ABORT)
+            {
+                print_error(error, "clEnqueueWriteBuffer failed.");
+            }
+
+            if (result != SUCCEEDED)
+            {
+                clFinish(*queue);
+                free(data);
+                clReleaseMemObject(mem);
+                return result;
+            }
+        }
+        else
+        {
+            error = clEnqueueWriteBuffer(*queue, mem, CL_FALSE, i, size_to_use,
+                                         data, 0, NULL, &event);
+            result = check_allocation_error(context, device_id, error, queue);
+
+            if (result == FAILED_ABORT)
+            {
+                print_error(error, "clEnqueueWriteBuffer failed.");
+            }
+
+            if (result != SUCCEEDED)
+            {
+                clFinish(*queue);
+                free(data);
+                clReleaseMemObject(mem);
+                return result;
+            }
+
+            error = clWaitForEvents(1, &event);
+            result = check_allocation_error(context, device_id, error, queue,
+                                            &event);
+
+            if (result == FAILED_ABORT)
+            {
+                print_error(error, "clWaitForEvents failed.");
+            }
+
+            if (result != SUCCEEDED)
+            {
+                clFinish(*queue);
+                clReleaseEvent(event);
+                free(data);
+                clReleaseMemObject(mem);
+                return result;
+            }
+
+            clReleaseEvent(event);
+        }
     }
 
-    if (blocking_write) {
-      error = clEnqueueWriteBuffer(*queue, mem, CL_TRUE, i, size-i, data, 0, NULL, NULL);
-      result = check_allocation_error(context, device_id, error, queue);
-
-      if (result == FAILED_ABORT) {
-        print_error(error, "clEnqueueWriteBuffer failed.");
-      }
-
-      if (result != SUCCEEDED) {
-        clFinish(*queue);
-        clReleaseMemObject(mem);
-        free(data);
-        return result;
-      }
-    } else {
-      error = clEnqueueWriteBuffer(*queue, mem, CL_FALSE, i, size-i, data, 0, NULL, &event);
-      result = check_allocation_error(context, device_id, error, queue);
-
-      if (result == FAILED_ABORT) {
-        print_error(error, "clEnqueueWriteBuffer failed.");
-      }
-
-      if (result != SUCCEEDED) {
-        clFinish(*queue);
-        clReleaseMemObject(mem);
-        free(data);
-        return result;
-      }
-
-      error = clWaitForEvents(1, &event);
-      result = check_allocation_error(context, device_id, error, queue, &event);
-
-      if (result == FAILED_ABORT) {
-        print_error(error, "clWaitForEvents failed.");
-      }
-
-      if (result != SUCCEEDED) {
-        clFinish(*queue);
-        clReleaseEvent(event);
-        free(data);
-        clReleaseMemObject(mem);
-        return result;
-      }
-
-      clReleaseEvent(event);
+    // Deal with any leftover bits
+    if (i < size)
+    {
+        // Put values in the data, and keep a checksum as we go along.
+        for (j = 0; j < (size - i) / sizeof(cl_uint); j++)
+        {
+            data[j] = (cl_uint)genrand_int32(d);
+            checksum_delta += data[j];
+        }
+
+        if (blocking_write)
+        {
+            error = clEnqueueWriteBuffer(*queue, mem, CL_TRUE, i, size - i,
+                                         data, 0, NULL, NULL);
+            result = check_allocation_error(context, device_id, error, queue);
+
+            if (result == FAILED_ABORT)
+            {
+                print_error(error, "clEnqueueWriteBuffer failed.");
+            }
+
+            if (result != SUCCEEDED)
+            {
+                clFinish(*queue);
+                clReleaseMemObject(mem);
+                free(data);
+                return result;
+            }
+        }
+        else
+        {
+            error = clEnqueueWriteBuffer(*queue, mem, CL_FALSE, i, size - i,
+                                         data, 0, NULL, &event);
+            result = check_allocation_error(context, device_id, error, queue);
+
+            if (result == FAILED_ABORT)
+            {
+                print_error(error, "clEnqueueWriteBuffer failed.");
+            }
+
+            if (result != SUCCEEDED)
+            {
+                clFinish(*queue);
+                clReleaseMemObject(mem);
+                free(data);
+                return result;
+            }
+
+            error = clWaitForEvents(1, &event);
+            result = check_allocation_error(context, device_id, error, queue,
+                                            &event);
+
+            if (result == FAILED_ABORT)
+            {
+                print_error(error, "clWaitForEvents failed.");
+            }
+
+            if (result != SUCCEEDED)
+            {
+                clFinish(*queue);
+                clReleaseEvent(event);
+                free(data);
+                clReleaseMemObject(mem);
+                return result;
+            }
+
+            clReleaseEvent(event);
+        }
     }
-  }
 
-  free(data);
-  // Only update the checksum if this succeeded.
-  checksum += checksum_delta;
-  return SUCCEEDED;
+    free(data);
+    // Only update the checksum if this succeeded.
+    checksum += checksum_delta;
+    return SUCCEEDED;
 }
 
 
-int fill_image_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, size_t width, size_t height, MTdata d, cl_bool blocking_write) {
-  size_t origin[3], region[3], j;
-  int error, result;
-  cl_uint *data;
-  cl_uint checksum_delta = 0;
-  cl_event event;
-
-  size_t image_lines_to_use;
-  image_lines_to_use = IMAGE_LINES;
-  if (image_lines_to_use > height)
-      image_lines_to_use = height;
-
-  data = (cl_uint*)malloc(width*4*sizeof(cl_uint)*image_lines_to_use);
-  if (data == NULL) {
-    log_error("Failed to malloc host buffer for writing into image.\n");
-    return FAILED_ABORT;
-  }
-  origin[0] = 0;
-  origin[1] = 0;
-  origin[2] = 0;
-  region[0] = width;
-  region[1] = image_lines_to_use;
-  region[2] = 1;
-  for (origin[1] = 0; origin[1] < height - image_lines_to_use; origin[1] += image_lines_to_use) {
-    // Put values in the data, and keep a checksum as we go along.
-    for (j=0; j<width*4*image_lines_to_use; j++) {
-      data[j] = (cl_uint)genrand_int32(d);
-      checksum_delta += data[j];
+int fill_image_with_data(cl_context context, cl_device_id device_id,
+                         cl_command_queue *queue, cl_mem mem, size_t width,
+                         size_t height, MTdata d, cl_bool blocking_write)
+{
+    size_t origin[3], region[3], j;
+    int error, result;
+    cl_uint *data;
+    cl_uint checksum_delta = 0;
+    cl_event event;
+
+    size_t image_lines_to_use;
+    image_lines_to_use = IMAGE_LINES;
+    if (image_lines_to_use > height) image_lines_to_use = height;
+
+    data = (cl_uint *)malloc(width * 4 * sizeof(cl_uint) * image_lines_to_use);
+    if (data == NULL)
+    {
+        log_error("Failed to malloc host buffer for writing into image.\n");
+        return FAILED_ABORT;
     }
-
-    if (blocking_write) {
-      error = clEnqueueWriteImage(*queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
-      result = check_allocation_error(context, device_id, error, queue);
-
-      if (result == FAILED_ABORT) {
-        print_error(error, "clEnqueueWriteImage failed.");
-      }
-
-      if (result != SUCCEEDED) {
-        clFinish(*queue);
-        clReleaseMemObject(mem);
-        free(data);
-        return result;
-      }
-      result = clFinish(*queue);
-      if (result != SUCCEEDED)
-      {
-          print_error(error,
-                      "clFinish failed after successful enqueuing filling "
-                      "buffer with data.");
-          return result;
-      }
-    } else {
-      error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region, 0, 0, data, 0, NULL, &event);
-      result = check_allocation_error(context, device_id, error, queue);
-
-      if (result == FAILED_ABORT) {
-        print_error(error, "clEnqueueWriteImage failed.");
-      }
-
-      if (result != SUCCEEDED) {
-        clFinish(*queue);
-        clReleaseMemObject(mem);
-        free(data);
-        return result;
-      }
-
-      error = clWaitForEvents(1, &event);
-      result = check_allocation_error(context, device_id, error, queue, &event);
-
-      if (result == FAILED_ABORT) {
-        print_error(error, "clWaitForEvents failed.");
-      }
-
-      if (result != SUCCEEDED) {
-        clReleaseEvent(event);
-        free(data);
-        clReleaseMemObject(mem);
-        return result;
-      }
-
-      clReleaseEvent(event);
-    }
-  }
-
-  // Deal with any leftover bits
-  if (origin[1] < height) {
-    // Put values in the data, and keep a checksum as we go along.
-    for (j=0; j<width*4*(height-origin[1]); j++) {
-      data[j] = (cl_uint)genrand_int32(d);
-      checksum_delta += data[j];
+    origin[0] = 0;
+    origin[1] = 0;
+    origin[2] = 0;
+    region[0] = width;
+    region[1] = image_lines_to_use;
+    region[2] = 1;
+    for (origin[1] = 0; origin[1] < height - image_lines_to_use;
+         origin[1] += image_lines_to_use)
+    {
+        // Put values in the data, and keep a checksum as we go along.
+        for (j = 0; j < width * 4 * image_lines_to_use; j++)
+        {
+            data[j] = (cl_uint)genrand_int32(d);
+            checksum_delta += data[j];
+        }
+
+        if (blocking_write)
+        {
+            error = clEnqueueWriteImage(*queue, mem, CL_TRUE, origin, region, 0,
+                                        0, data, 0, NULL, NULL);
+            result = check_allocation_error(context, device_id, error, queue);
+
+            if (result == FAILED_ABORT)
+            {
+                print_error(error, "clEnqueueWriteImage failed.");
+            }
+
+            if (result != SUCCEEDED)
+            {
+                clFinish(*queue);
+                clReleaseMemObject(mem);
+                free(data);
+                return result;
+            }
+            result = clFinish(*queue);
+            if (result != SUCCEEDED)
+            {
+                print_error(
+                    error,
+                    "clFinish failed after successful enqueuing filling "
+                    "buffer with data.");
+                return result;
+            }
+        }
+        else
+        {
+            error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region,
+                                        0, 0, data, 0, NULL, &event);
+            result = check_allocation_error(context, device_id, error, queue);
+
+            if (result == FAILED_ABORT)
+            {
+                print_error(error, "clEnqueueWriteImage failed.");
+            }
+
+            if (result != SUCCEEDED)
+            {
+                clFinish(*queue);
+                clReleaseMemObject(mem);
+                free(data);
+                return result;
+            }
+
+            error = clWaitForEvents(1, &event);
+            result = check_allocation_error(context, device_id, error, queue,
+                                            &event);
+
+            if (result == FAILED_ABORT)
+            {
+                print_error(error, "clWaitForEvents failed.");
+            }
+
+            if (result != SUCCEEDED)
+            {
+                clReleaseEvent(event);
+                free(data);
+                clReleaseMemObject(mem);
+                return result;
+            }
+
+            clReleaseEvent(event);
+        }
     }
 
-    region[1] = height-origin[1];
-    if(blocking_write) {
-      error = clEnqueueWriteImage(*queue, mem, CL_TRUE, origin, region, 0, 0, data, 0, NULL, NULL);
-      result = check_allocation_error(context, device_id, error, queue);
-
-      if (result == FAILED_ABORT) {
-        print_error(error, "clEnqueueWriteImage failed.");
-      }
-
-      if (result != SUCCEEDED) {
-        clFinish(*queue);
-        clReleaseMemObject(mem);
-        free(data);
-        return result;
-      }
-    } else {
-      error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region, 0, 0, data, 0, NULL, &event);
-      result = check_allocation_error(context, device_id, error, queue);
-
-      if (result == FAILED_ABORT) {
-        print_error(error, "clEnqueueWriteImage failed.");
-      }
-
-      if (result != SUCCEEDED) {
-        clFinish(*queue);
-        clReleaseMemObject(mem);
-        free(data);
-        return result;
-      }
-
-      error = clWaitForEvents(1, &event);
-      result = check_allocation_error(context, device_id, error, queue, &event);
-
-      if (result == FAILED_ABORT) {
-        print_error(error, "clWaitForEvents failed.");
-      }
-
-      if (result != SUCCEEDED) {
-        clFinish(*queue);
-        clReleaseEvent(event);
-        free(data);
-        clReleaseMemObject(mem);
-        return result;
-      }
-
-      clReleaseEvent(event);
+    // Deal with any leftover bits
+    if (origin[1] < height)
+    {
+        // Put values in the data, and keep a checksum as we go along.
+        for (j = 0; j < width * 4 * (height - origin[1]); j++)
+        {
+            data[j] = (cl_uint)genrand_int32(d);
+            checksum_delta += data[j];
+        }
+
+        region[1] = height - origin[1];
+        if (blocking_write)
+        {
+            error = clEnqueueWriteImage(*queue, mem, CL_TRUE, origin, region, 0,
+                                        0, data, 0, NULL, NULL);
+            result = check_allocation_error(context, device_id, error, queue);
+
+            if (result == FAILED_ABORT)
+            {
+                print_error(error, "clEnqueueWriteImage failed.");
+            }
+
+            if (result != SUCCEEDED)
+            {
+                clFinish(*queue);
+                clReleaseMemObject(mem);
+                free(data);
+                return result;
+            }
+        }
+        else
+        {
+            error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region,
+                                        0, 0, data, 0, NULL, &event);
+            result = check_allocation_error(context, device_id, error, queue);
+
+            if (result == FAILED_ABORT)
+            {
+                print_error(error, "clEnqueueWriteImage failed.");
+            }
+
+            if (result != SUCCEEDED)
+            {
+                clFinish(*queue);
+                clReleaseMemObject(mem);
+                free(data);
+                return result;
+            }
+
+            error = clWaitForEvents(1, &event);
+            result = check_allocation_error(context, device_id, error, queue,
+                                            &event);
+
+            if (result == FAILED_ABORT)
+            {
+                print_error(error, "clWaitForEvents failed.");
+            }
+
+            if (result != SUCCEEDED)
+            {
+                clFinish(*queue);
+                clReleaseEvent(event);
+                free(data);
+                clReleaseMemObject(mem);
+                return result;
+            }
+
+            clReleaseEvent(event);
+        }
     }
-  }
 
-  free(data);
-  // Only update the checksum if this succeeded.
-  checksum += checksum_delta;
-  return SUCCEEDED;
+    free(data);
+    // Only update the checksum if this succeeded.
+    checksum += checksum_delta;
+    return SUCCEEDED;
 }
 
 
+int fill_mem_with_data(cl_context context, cl_device_id device_id,
+                       cl_command_queue *queue, cl_mem mem, MTdata d,
+                       cl_bool blocking_write)
+{
+    int error;
+    cl_mem_object_type type;
+    size_t size, width, height;
 
-int fill_mem_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, MTdata d, cl_bool blocking_write) {
-  int error;
-  cl_mem_object_type type;
-  size_t size, width, height;
-
-  error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
-  test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
+    error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
+    test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
 
-  if (type == CL_MEM_OBJECT_BUFFER) {
-    error = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size), &size, NULL);
-    test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
-    return fill_buffer_with_data(context, device_id, queue, mem, size, d, blocking_write);
-  } else if (type == CL_MEM_OBJECT_IMAGE2D) {
-    error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
-    test_error_abort(error, "clGetImageInfo failed for CL_IMAGE_WIDTH.");
-    error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
-    test_error_abort(error, "clGetImageInfo failed for CL_IMAGE_HEIGHT.");
-    return fill_image_with_data(context, device_id, queue, mem, width, height, d, blocking_write);
-  }
+    if (type == CL_MEM_OBJECT_BUFFER)
+    {
+        error = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size), &size, NULL);
+        test_error_abort(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
+        return fill_buffer_with_data(context, device_id, queue, mem, size, d,
+                                     blocking_write);
+    }
+    else if (type == CL_MEM_OBJECT_IMAGE2D)
+    {
+        error =
+            clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
+        test_error_abort(error, "clGetImageInfo failed for CL_IMAGE_WIDTH.");
+        error =
+            clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
+        test_error_abort(error, "clGetImageInfo failed for CL_IMAGE_HEIGHT.");
+        return fill_image_with_data(context, device_id, queue, mem, width,
+                                    height, d, blocking_write);
+    }
 
-  log_error("Invalid CL_MEM_TYPE: %d\n", type);
-  return FAILED_ABORT;
+    log_error("Invalid CL_MEM_TYPE: %d\n", type);
+    return FAILED_ABORT;
 }
-
-
-
diff --git a/test_conformance/allocations/allocation_fill.h b/test_conformance/allocations/allocation_fill.h
index 0c1085a166..2b8599ed7d 100644
--- a/test_conformance/allocations/allocation_fill.h
+++ b/test_conformance/allocations/allocation_fill.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,4 +16,6 @@
 #include "testBase.h"
 #include "allocation_utils.h"
 
-int fill_mem_with_data(cl_context context, cl_device_id device_id, cl_command_queue *queue, cl_mem mem, MTdata d, cl_bool blocking_write);
+int fill_mem_with_data(cl_context context, cl_device_id device_id,
+                       cl_command_queue *queue, cl_mem mem, MTdata d,
+                       cl_bool blocking_write);
diff --git a/test_conformance/allocations/allocation_functions.cpp b/test_conformance/allocations/allocation_functions.cpp
index 827ee1042d..ec0c83a17e 100644
--- a/test_conformance/allocations/allocation_functions.cpp
+++ b/test_conformance/allocations/allocation_functions.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -17,273 +17,379 @@
 #include "allocation_fill.h"
 
 
-static cl_image_format    image_format = { CL_RGBA, CL_UNSIGNED_INT32 };
+static cl_image_format image_format = { CL_RGBA, CL_UNSIGNED_INT32 };
 
-int allocate_buffer(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
-  int error;
-  // log_info("\t\tAttempting to allocate a %gMB array and fill with %s writes.\n", (size_to_allocate/(1024.0*1024.0)), (blocking_write ? "blocking" : "non-blocking"));
-  *mem = clCreateBuffer(context, CL_MEM_READ_WRITE, size_to_allocate, NULL, &error);
-  return check_allocation_error(context, device_id, error, queue);
+int allocate_buffer(cl_context context, cl_command_queue *queue,
+                    cl_device_id device_id, cl_mem *mem,
+                    size_t size_to_allocate, cl_bool blocking_write)
+{
+    int error;
+    // log_info("\t\tAttempting to allocate a %gMB array and fill with %s
+    // writes.\n", (size_to_allocate/(1024.0*1024.0)), (blocking_write ?
+    // "blocking" : "non-blocking"));
+    *mem = clCreateBuffer(context, CL_MEM_READ_WRITE, size_to_allocate, NULL,
+                          &error);
+    return check_allocation_error(context, device_id, error, queue);
 }
 
 
-int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t *width, size_t *height, size_t* max_size) {
-  size_t max_width, max_height, num_pixels, found_width, found_height;
-  int error;
+int find_good_image_size(cl_device_id device_id, size_t size_to_allocate,
+                         size_t *width, size_t *height, size_t *max_size)
+{
+    size_t max_width, max_height, num_pixels, found_width, found_height;
+    int error;
 
-  if (checkForImageSupport(device_id)) {
-    log_info("Can not allocate an image on this device because it does not support images.");
-    return FAILED_ABORT;
-  }
+    if (checkForImageSupport(device_id))
+    {
+        log_info("Can not allocate an image on this device because it does not "
+                 "support images.");
+        return FAILED_ABORT;
+    }
+
+    if (size_to_allocate == 0)
+    {
+        log_error("Trying to allocate a zero sized image.\n");
+        return FAILED_ABORT;
+    }
 
-  if (size_to_allocate == 0) {
-      log_error("Trying to allocate a zero sized image.\n");
-      return FAILED_ABORT;
-  }
+    error = clGetDeviceInfo(device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH,
+                            sizeof(max_width), &max_width, NULL);
+    test_error_abort(error, "clGetDeviceInfo failed.");
+    error = clGetDeviceInfo(device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
+                            sizeof(max_height), &max_height, NULL);
+    test_error_abort(error, "clGetDeviceInfo failed.");
 
-  error = clGetDeviceInfo( device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL );
-  test_error_abort(error, "clGetDeviceInfo failed.");
-  error = clGetDeviceInfo( device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( max_height ), &max_height, NULL );
-  test_error_abort(error, "clGetDeviceInfo failed.");
+    num_pixels = size_to_allocate / (sizeof(cl_uint) * 4);
 
-  num_pixels = size_to_allocate / (sizeof(cl_uint)*4);
+    // Use a 64-bit variable to avoid overflow in 32-bit architectures
+    long long unsigned max_pixels = (long long unsigned)max_width * max_height;
 
-  // Use a 64-bit variable to avoid overflow in 32-bit architectures
-  long long unsigned max_pixels = (long long unsigned)max_width * max_height;
+    if (num_pixels > max_pixels)
+    {
+        if (NULL != max_size)
+        {
+            *max_size = max_width * max_height * sizeof(cl_uint) * 4;
+        }
+        return FAILED_TOO_BIG;
+    }
 
-  if (num_pixels > max_pixels) {
-    if(NULL != max_size) {
-      *max_size = max_width * max_height * sizeof(cl_uint) * 4;
+    // We want a close-to-square aspect ratio.
+    // Note that this implicitly assumes that  max width >= max height
+    found_width = (int)sqrt((double)num_pixels);
+    if (found_width > max_width)
+    {
+        found_width = max_width;
     }
-    return FAILED_TOO_BIG;
-  }
-
-  // We want a close-to-square aspect ratio.
-  // Note that this implicitly assumes that  max width >= max height
-  found_width = (int)sqrt( (double) num_pixels );
-  if( found_width > max_width ) {
-    found_width = max_width;
-  }
-  if (found_width == 0)
-    found_width = 1;
-
-  found_height = (size_t)num_pixels/found_width;
-  if (found_height > max_height) {
-    found_height = max_height;
-  }
-  if (found_height == 0)
-    found_height = 1;
-
-  *width = found_width;
-  *height = found_height;
-
-  if(NULL != max_size) {
-    *max_size = found_width * found_height * sizeof(cl_uint) * 4;
-  }
-
-  return SUCCEEDED;
+    if (found_width == 0) found_width = 1;
+
+    found_height = (size_t)num_pixels / found_width;
+    if (found_height > max_height)
+    {
+        found_height = max_height;
+    }
+    if (found_height == 0) found_height = 1;
+
+    *width = found_width;
+    *height = found_height;
+
+    if (NULL != max_size)
+    {
+        *max_size = found_width * found_height * sizeof(cl_uint) * 4;
+    }
+
+    return SUCCEEDED;
 }
 
 
-int allocate_image2d_read(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
-  size_t width, height;
-  int error;
+int allocate_image2d_read(cl_context context, cl_command_queue *queue,
+                          cl_device_id device_id, cl_mem *mem,
+                          size_t size_to_allocate, cl_bool blocking_write)
+{
+    size_t width, height;
+    int error;
 
-  error = find_good_image_size(device_id, size_to_allocate, &width, &height, NULL);
-  if (error != SUCCEEDED)
-    return error;
+    error = find_good_image_size(device_id, size_to_allocate, &width, &height,
+                                 NULL);
+    if (error != SUCCEEDED) return error;
 
-  log_info("\t\tAttempting to allocate a %gMB read-only image (%d x %d) and fill with %s writes.\n",
-          (size_to_allocate/(1024.0*1024.0)), (int)width, (int)height, (blocking_write ? "blocking" : "non-blocking"));
-  *mem = create_image_2d(context, CL_MEM_READ_ONLY, &image_format, width, height, 0, NULL, &error);
+    log_info("\t\tAttempting to allocate a %gMB read-only image (%d x %d) and "
+             "fill with %s writes.\n",
+             (size_to_allocate / (1024.0 * 1024.0)), (int)width, (int)height,
+             (blocking_write ? "blocking" : "non-blocking"));
+    *mem = create_image_2d(context, CL_MEM_READ_ONLY, &image_format, width,
+                           height, 0, NULL, &error);
 
-  return check_allocation_error(context, device_id, error, queue);
+    return check_allocation_error(context, device_id, error, queue);
 }
 
 
-int allocate_image2d_write(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate, cl_bool blocking_write) {
-  size_t width, height;
-  int error;
+int allocate_image2d_write(cl_context context, cl_command_queue *queue,
+                           cl_device_id device_id, cl_mem *mem,
+                           size_t size_to_allocate, cl_bool blocking_write)
+{
+    size_t width, height;
+    int error;
 
-  error = find_good_image_size(device_id, size_to_allocate, &width, &height, NULL);
-  if (error != SUCCEEDED)
-    return error;
+    error = find_good_image_size(device_id, size_to_allocate, &width, &height,
+                                 NULL);
+    if (error != SUCCEEDED) return error;
 
-  //log_info("\t\tAttempting to allocate a %gMB write-only image (%d x %d) and fill with %s writes.\n",
-           //(size_to_allocate/(1024.0*1024.0)), (int)width, (int)height, (blocking_write ? "blocking" : "non-blocking"));
-  *mem = create_image_2d(context, CL_MEM_WRITE_ONLY, &image_format, width, height, 0, NULL, &error);
+    // log_info("\t\tAttempting to allocate a %gMB write-only image (%d x %d)
+    // and fill with %s writes.\n", (size_to_allocate/(1024.0*1024.0)),
+    //(int)width, (int)height, (blocking_write ? "blocking" : "non-blocking"));
+    *mem = create_image_2d(context, CL_MEM_WRITE_ONLY, &image_format, width,
+                           height, 0, NULL, &error);
 
-  return check_allocation_error(context, device_id, error, queue);
+    return check_allocation_error(context, device_id, error, queue);
 }
 
-int do_allocation(cl_context context, cl_command_queue *queue, cl_device_id device_id, size_t size_to_allocate, int type, cl_mem *mem) {
-  if (type == BUFFER) return allocate_buffer(context, queue, device_id, mem, size_to_allocate, true);
-  if (type == IMAGE_READ) return allocate_image2d_read(context, queue, device_id, mem, size_to_allocate, true);
-  if (type == IMAGE_WRITE) return allocate_image2d_write(context, queue, device_id, mem, size_to_allocate, true);
-  if (type == BUFFER_NON_BLOCKING) return allocate_buffer(context, queue, device_id, mem, size_to_allocate, false);
-  if (type == IMAGE_READ_NON_BLOCKING) return allocate_image2d_read(context, queue, device_id, mem, size_to_allocate, false);
-  if (type == IMAGE_WRITE_NON_BLOCKING) return allocate_image2d_write(context, queue, device_id, mem, size_to_allocate, false);
+int do_allocation(cl_context context, cl_command_queue *queue,
+                  cl_device_id device_id, size_t size_to_allocate, int type,
+                  cl_mem *mem)
+{
+    if (type == BUFFER)
+        return allocate_buffer(context, queue, device_id, mem, size_to_allocate,
+                               true);
+    if (type == IMAGE_READ)
+        return allocate_image2d_read(context, queue, device_id, mem,
+                                     size_to_allocate, true);
+    if (type == IMAGE_WRITE)
+        return allocate_image2d_write(context, queue, device_id, mem,
+                                      size_to_allocate, true);
+    if (type == BUFFER_NON_BLOCKING)
+        return allocate_buffer(context, queue, device_id, mem, size_to_allocate,
+                               false);
+    if (type == IMAGE_READ_NON_BLOCKING)
+        return allocate_image2d_read(context, queue, device_id, mem,
+                                     size_to_allocate, false);
+    if (type == IMAGE_WRITE_NON_BLOCKING)
+        return allocate_image2d_write(context, queue, device_id, mem,
+                                      size_to_allocate, false);
     log_error("Invalid allocation type: %d\n", type);
-  return FAILED_ABORT;
+    return FAILED_ABORT;
 }
 
 
-int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id device_id, int multiple_allocations, size_t size_to_allocate,
-                  int type, cl_mem mems[], int *number_of_mems, size_t *final_size, int force_fill, MTdata d) {
+int allocate_size(cl_context context, cl_command_queue *queue,
+                  cl_device_id device_id, int multiple_allocations,
+                  size_t size_to_allocate, int type, cl_mem mems[],
+                  int *number_of_mems, size_t *final_size, int force_fill,
+                  MTdata d)
+{
 
     cl_ulong max_individual_allocation_size, global_mem_size;
-  int error, result;
-  size_t amount_allocated;
-  size_t reduction_amount;
-  int current_allocation;
-  size_t allocation_this_time, actual_allocation;
-
-  // Set the number of mems used to 0 so if we fail to create even a single one we don't end up returning a garbage value
-  *number_of_mems = 0;
-
-  error = clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_individual_allocation_size), &max_individual_allocation_size, NULL);
-  test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
-  error = clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, NULL);
-  test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
-
-  if (global_mem_size > (cl_ulong)SIZE_MAX) {
-    global_mem_size = (cl_ulong)SIZE_MAX;
-  }
-
-//  log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
-//           max_individual_allocation_size, toMB(max_individual_allocation_size),
-//           global_mem_size, toMB(global_mem_size));
-
-  if (size_to_allocate > global_mem_size) {
-    log_error("Can not allocate more than the global memory size.\n");
-    return FAILED_ABORT;
-  }
-
-  amount_allocated = 0;
-  current_allocation = 0;
-
-  // If allocating for images, reduce the maximum allocation size to the maximum image size.
-  // If we don't do this, then the value of CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4 can be higher
-  // than the maximum image size on systems with 16GB or RAM or more. In this case, we
-  // succeed in allocating an image but its size is less than CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4
-  // (min_allocation_allowed) and thus we fail the allocation below.
-  if(type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING || type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) {
-    size_t width;
-    size_t height;
-    size_t max_size;
-    error = find_good_image_size(device_id, size_to_allocate, &width, &height, &max_size);
-    if (!(error == SUCCEEDED || error == FAILED_TOO_BIG))
-      return error;
-    if(max_size < max_individual_allocation_size)
-      max_individual_allocation_size = max_size;
-  }
-
-  reduction_amount = (size_t)max_individual_allocation_size/16;
-
-  if (type == BUFFER || type == BUFFER_NON_BLOCKING) log_info("\tAttempting to allocate a buffer of size %gMB.\n", toMB(size_to_allocate));
-  else if (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING) log_info("\tAttempting to allocate a read-only image of size %gMB.\n", toMB(size_to_allocate));
-  else if (type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING) log_info("\tAttempting to allocate a write-only image of size %gMB.\n", toMB(size_to_allocate));
-
-//  log_info("\t\t(Reduction size is %gMB per iteration, minimum allowable individual allocation size is %gMB.)\n",
-//           toMB(reduction_amount), toMB(min_allocation_allowed));
-//  if (force_fill && type != IMAGE_WRITE && type != IMAGE_WRITE_NON_BLOCKING) log_info("\t\t(Allocations will be filled with random data for checksum calculation.)\n");
-
-  // If we are only doing a single allocation, only allow 1
-  int max_to_allocate = multiple_allocations ? MAX_NUMBER_TO_ALLOCATE : 1;
-
-  // Make sure that the maximum number of images allocated is constrained by the
-  // maximum that may be passed to a kernel
-  if (type != BUFFER && type != BUFFER_NON_BLOCKING) {
-    cl_device_info param_name = (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING) ?
-      CL_DEVICE_MAX_READ_IMAGE_ARGS : CL_DEVICE_MAX_WRITE_IMAGE_ARGS;
-
-    cl_uint max_image_args;
-    error = clGetDeviceInfo(device_id, param_name, sizeof(max_image_args), &max_image_args, NULL);
-    test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX IMAGE_ARGS");
-
-    if ((int)max_image_args < max_to_allocate) {
-      log_info("\t\tMaximum number of images per kernel limited to %d\n",(int)max_image_args);
-      max_to_allocate =  max_image_args;
+    int error, result;
+    size_t amount_allocated;
+    size_t reduction_amount;
+    int current_allocation;
+    size_t allocation_this_time, actual_allocation;
+
+    // Set the number of mems used to 0 so if we fail to create even a single
+    // one we don't end up returning a garbage value
+    *number_of_mems = 0;
+
+    error = clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(max_individual_allocation_size),
+                            &max_individual_allocation_size, NULL);
+    test_error_abort(error,
+                     "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
+    error = clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE,
+                            sizeof(global_mem_size), &global_mem_size, NULL);
+    test_error_abort(error,
+                     "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
+
+    if (global_mem_size > (cl_ulong)SIZE_MAX)
+    {
+        global_mem_size = (cl_ulong)SIZE_MAX;
     }
-  }
 
+    //  log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB),
+    //  CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
+    //           max_individual_allocation_size,
+    //           toMB(max_individual_allocation_size), global_mem_size,
+    //           toMB(global_mem_size));
 
-  // Try to allocate the requested amount.
-  while (amount_allocated != size_to_allocate && current_allocation < max_to_allocate) {
-
-    // Determine how much more is needed
-    allocation_this_time = size_to_allocate - amount_allocated;
+    if (size_to_allocate > global_mem_size)
+    {
+        log_error("Can not allocate more than the global memory size.\n");
+        return FAILED_ABORT;
+    }
 
-    // Bound by the individual allocation size
-    if (allocation_this_time > max_individual_allocation_size)
-        allocation_this_time = (size_t)max_individual_allocation_size;
+    amount_allocated = 0;
+    current_allocation = 0;
+
+    // If allocating for images, reduce the maximum allocation size to the
+    // maximum image size. If we don't do this, then the value of
+    // CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4 can be higher than the maximum image
+    // size on systems with 16GB or RAM or more. In this case, we succeed in
+    // allocating an image but its size is less than
+    // CL_DEVICE_MAX_MEM_ALLOC_SIZE / 4 (min_allocation_allowed) and thus we
+    // fail the allocation below.
+    if (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING
+        || type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING)
+    {
+        size_t width;
+        size_t height;
+        size_t max_size;
+        error = find_good_image_size(device_id, size_to_allocate, &width,
+                                     &height, &max_size);
+        if (!(error == SUCCEEDED || error == FAILED_TOO_BIG)) return error;
+        if (max_size < max_individual_allocation_size)
+            max_individual_allocation_size = max_size;
+    }
 
-    // Allocate the largest object possible
-    result = FAILED_TOO_BIG;
-    //log_info("\t\tTrying sub-allocation %d at size %gMB.\n", current_allocation, toMB(allocation_this_time));
-    while (result == FAILED_TOO_BIG && allocation_this_time != 0) {
+    reduction_amount = (size_t)max_individual_allocation_size / 16;
+
+    if (type == BUFFER || type == BUFFER_NON_BLOCKING)
+        log_info("\tAttempting to allocate a buffer of size %gMB.\n",
+                 toMB(size_to_allocate));
+    else if (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING)
+        log_info("\tAttempting to allocate a read-only image of size %gMB.\n",
+                 toMB(size_to_allocate));
+    else if (type == IMAGE_WRITE || type == IMAGE_WRITE_NON_BLOCKING)
+        log_info("\tAttempting to allocate a write-only image of size %gMB.\n",
+                 toMB(size_to_allocate));
+
+    //  log_info("\t\t(Reduction size is %gMB per iteration, minimum allowable
+    //  individual allocation size is %gMB.)\n",
+    //           toMB(reduction_amount), toMB(min_allocation_allowed));
+    //  if (force_fill && type != IMAGE_WRITE && type !=
+    //  IMAGE_WRITE_NON_BLOCKING) log_info("\t\t(Allocations will be filled with
+    //  random data for checksum calculation.)\n");
+
+    // If we are only doing a single allocation, only allow 1
+    int max_to_allocate = multiple_allocations ? MAX_NUMBER_TO_ALLOCATE : 1;
+
+    // Make sure that the maximum number of images allocated is constrained by
+    // the maximum that may be passed to a kernel
+    if (type != BUFFER && type != BUFFER_NON_BLOCKING)
+    {
+        cl_device_info param_name =
+            (type == IMAGE_READ || type == IMAGE_READ_NON_BLOCKING)
+            ? CL_DEVICE_MAX_READ_IMAGE_ARGS
+            : CL_DEVICE_MAX_WRITE_IMAGE_ARGS;
+
+        cl_uint max_image_args;
+        error = clGetDeviceInfo(device_id, param_name, sizeof(max_image_args),
+                                &max_image_args, NULL);
+        test_error(error,
+                   "clGetDeviceInfo failed for CL_DEVICE_MAX IMAGE_ARGS");
+
+        if ((int)max_image_args < max_to_allocate)
+        {
+            log_info("\t\tMaximum number of images per kernel limited to %d\n",
+                     (int)max_image_args);
+            max_to_allocate = max_image_args;
+        }
+    }
 
-      // Create the object
-        result = do_allocation(context, queue, device_id, allocation_this_time, type, &mems[current_allocation]);
-      if (result == SUCCEEDED) {
-        // Allocation succeeded, another memory object was added to the array
-        *number_of_mems = (current_allocation+1);
 
-          // Verify the size is correct to within 1MB.
-        actual_allocation = get_actual_allocation_size(mems[current_allocation]);
-        if (fabs((double)allocation_this_time - (double)actual_allocation) > 1024.0*1024.0) {
-             log_error("Allocation not of expected size. Expected %gMB, got %gMB.\n", toMB(allocation_this_time), toMB( actual_allocation));
-          return FAILED_ABORT;
+    // Try to allocate the requested amount.
+    while (amount_allocated != size_to_allocate
+           && current_allocation < max_to_allocate)
+    {
+
+        // Determine how much more is needed
+        allocation_this_time = size_to_allocate - amount_allocated;
+
+        // Bound by the individual allocation size
+        if (allocation_this_time > max_individual_allocation_size)
+            allocation_this_time = (size_t)max_individual_allocation_size;
+
+        // Allocate the largest object possible
+        result = FAILED_TOO_BIG;
+        // log_info("\t\tTrying sub-allocation %d at size %gMB.\n",
+        // current_allocation, toMB(allocation_this_time));
+        while (result == FAILED_TOO_BIG && allocation_this_time != 0)
+        {
+
+            // Create the object
+            result =
+                do_allocation(context, queue, device_id, allocation_this_time,
+                              type, &mems[current_allocation]);
+            if (result == SUCCEEDED)
+            {
+                // Allocation succeeded, another memory object was added to the
+                // array
+                *number_of_mems = (current_allocation + 1);
+
+                // Verify the size is correct to within 1MB.
+                actual_allocation =
+                    get_actual_allocation_size(mems[current_allocation]);
+                if (fabs((double)allocation_this_time
+                         - (double)actual_allocation)
+                    > 1024.0 * 1024.0)
+                {
+                    log_error("Allocation not of expected size. Expected %gMB, "
+                              "got %gMB.\n",
+                              toMB(allocation_this_time),
+                              toMB(actual_allocation));
+                    return FAILED_ABORT;
+                }
+
+                // If we are filling the allocation for verification do so
+                if (force_fill)
+                {
+                    // log_info("\t\t\tWriting random values to object and
+                    // calculating checksum.\n");
+                    cl_bool blocking_write = true;
+                    if (type == BUFFER_NON_BLOCKING
+                        || type == IMAGE_READ_NON_BLOCKING
+                        || type == IMAGE_WRITE_NON_BLOCKING)
+                    {
+                        blocking_write = false;
+                    }
+                    result = fill_mem_with_data(context, device_id, queue,
+                                                mems[current_allocation], d,
+                                                blocking_write);
+                }
+            }
+
+            // If creation failed, try to create a smaller object
+            if (result == FAILED_TOO_BIG)
+            {
+                // log_info("\t\t\tAllocation %d failed at size %gMB. Trying
+                // smaller.\n", current_allocation, toMB(allocation_this_time));
+                if (allocation_this_time > reduction_amount)
+                    allocation_this_time -= reduction_amount;
+                else if (reduction_amount > 1)
+                {
+                    reduction_amount /= 2;
+                }
+                else
+                {
+                    allocation_this_time = 0;
+                }
+            }
         }
 
-        // If we are filling the allocation for verification do so
-        if (force_fill) {
-          //log_info("\t\t\tWriting random values to object and calculating checksum.\n");
-          cl_bool blocking_write = true;
-          if (type == BUFFER_NON_BLOCKING || type == IMAGE_READ_NON_BLOCKING || type == IMAGE_WRITE_NON_BLOCKING) {
-            blocking_write = false;
-          }
-          result = fill_mem_with_data(context, device_id, queue, mems[current_allocation], d, blocking_write);
-        }
-      }
-
-      // If creation failed, try to create a smaller object
-      if (result == FAILED_TOO_BIG) {
-        //log_info("\t\t\tAllocation %d failed at size %gMB. Trying smaller.\n", current_allocation, toMB(allocation_this_time));
-        if (allocation_this_time > reduction_amount)
-            allocation_this_time -= reduction_amount;
-        else if (reduction_amount > 1) {
-          reduction_amount /= 2;
-        }
-        else {
-          allocation_this_time = 0;
+        if (result == FAILED_ABORT)
+        {
+            log_error("\t\tAllocation failed.\n");
+            return FAILED_ABORT;
         }
 
-      }
-    }
+        if (!allocation_this_time)
+        {
+            log_info("\t\tFailed to allocate %gMB across several objects.\n",
+                     toMB(size_to_allocate));
+            return FAILED_TOO_BIG;
+        }
 
-    if (result == FAILED_ABORT) {
-      log_error("\t\tAllocation failed.\n");
-      return FAILED_ABORT;
-    }
+        // Otherwise we succeeded
+        if (result != SUCCEEDED)
+        {
+            log_error("Test logic error.");
+            exit(-1);
+        }
+        amount_allocated += allocation_this_time;
 
-    if (!allocation_this_time) {
-      log_info("\t\tFailed to allocate %gMB across several objects.\n", toMB(size_to_allocate));
-      return FAILED_TOO_BIG;
-    }
+        *final_size = amount_allocated;
 
-    // Otherwise we succeeded
-    if (result != SUCCEEDED) {
-      log_error("Test logic error.");
-      exit(-1);
+        current_allocation++;
     }
-    amount_allocated += allocation_this_time;
-
-    *final_size = amount_allocated;
-
-    current_allocation++;
-  }
 
-  log_info("\t\tSucceeded in allocating %gMB using %d memory objects.\n", toMB(amount_allocated), current_allocation);
-  return SUCCEEDED;
+    log_info("\t\tSucceeded in allocating %gMB using %d memory objects.\n",
+             toMB(amount_allocated), current_allocation);
+    return SUCCEEDED;
 }
diff --git a/test_conformance/allocations/allocation_functions.h b/test_conformance/allocations/allocation_functions.h
index 939a993bba..d93a09209c 100644
--- a/test_conformance/allocations/allocation_functions.h
+++ b/test_conformance/allocations/allocation_functions.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,9 +16,20 @@
 #include "testBase.h"
 #include "allocation_utils.h"
 
-int do_allocation(cl_context context, cl_command_queue *queue, cl_device_id device_id, size_t size_to_allocate, int type, cl_mem *mem);
-int allocate_buffer(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
-int allocate_image2d_read(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
-int allocate_image2d_write(cl_context context, cl_command_queue *queue, cl_device_id device_id, cl_mem *mem, size_t size_to_allocate);
-int allocate_size(cl_context context, cl_command_queue *queue, cl_device_id device_id, int multiple_allocations, size_t size_to_allocate,
-                  int type, cl_mem mems[], int *number_of_mems, size_t *final_size, int force_fill, MTdata d);
+int do_allocation(cl_context context, cl_command_queue *queue,
+                  cl_device_id device_id, size_t size_to_allocate, int type,
+                  cl_mem *mem);
+int allocate_buffer(cl_context context, cl_command_queue *queue,
+                    cl_device_id device_id, cl_mem *mem,
+                    size_t size_to_allocate);
+int allocate_image2d_read(cl_context context, cl_command_queue *queue,
+                          cl_device_id device_id, cl_mem *mem,
+                          size_t size_to_allocate);
+int allocate_image2d_write(cl_context context, cl_command_queue *queue,
+                           cl_device_id device_id, cl_mem *mem,
+                           size_t size_to_allocate);
+int allocate_size(cl_context context, cl_command_queue *queue,
+                  cl_device_id device_id, int multiple_allocations,
+                  size_t size_to_allocate, int type, cl_mem mems[],
+                  int *number_of_mems, size_t *final_size, int force_fill,
+                  MTdata d);
diff --git a/test_conformance/allocations/allocation_utils.cpp b/test_conformance/allocations/allocation_utils.cpp
index 7d6520b0e4..95575750f6 100644
--- a/test_conformance/allocations/allocation_utils.cpp
+++ b/test_conformance/allocations/allocation_utils.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -15,90 +15,116 @@
 //
 #include "allocation_utils.h"
 
-cl_command_queue reset_queue(cl_context context, cl_device_id device_id, cl_command_queue *queue, int *error)
+cl_command_queue reset_queue(cl_context context, cl_device_id device_id,
+                             cl_command_queue *queue, int *error)
 {
-  log_info("Invalid command queue. Releasing and recreating the command queue.\n");
-  clReleaseCommandQueue(*queue);
+    log_info(
+        "Invalid command queue. Releasing and recreating the command queue.\n");
+    clReleaseCommandQueue(*queue);
     *queue = clCreateCommandQueue(context, device_id, 0, error);
-  return *queue;
+    return *queue;
 }
 
-int check_allocation_error(cl_context context, cl_device_id device_id, int error, cl_command_queue *queue, cl_event *event) {
-  //log_info("check_allocation_error context=%p device_id=%p error=%d *queue=%p\n", context, device_id, error, *queue);
-  if (error == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST && event != 0)
-  {
-    // check for errors from clWaitForEvents (e.g after clEnqueueWriteBuffer)
-    cl_int eventError;
-    error = clGetEventInfo(*event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(error), &eventError, 0);
-    if (CL_SUCCESS != error)
+int check_allocation_error(cl_context context, cl_device_id device_id,
+                           int error, cl_command_queue *queue, cl_event *event)
+{
+    // log_info("check_allocation_error context=%p device_id=%p error=%d
+    // *queue=%p\n", context, device_id, error, *queue);
+    if (error == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST && event != 0)
+    {
+        // check for errors from clWaitForEvents (e.g after
+        // clEnqueueWriteBuffer)
+        cl_int eventError;
+        error = clGetEventInfo(*event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                               sizeof(error), &eventError, 0);
+        if (CL_SUCCESS != error)
+        {
+            log_error("Failed to get event execution status: %s\n",
+                      IGetErrorString(error));
+            return FAILED_ABORT;
+        }
+        if (eventError >= 0)
+        {
+            log_error("Non-negative event execution status after "
+                      "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: %s\n",
+                      IGetErrorString(error));
+            return FAILED_ABORT;
+        }
+        error = eventError;
+    }
+    if ((error == CL_MEM_OBJECT_ALLOCATION_FAILURE)
+        || (error == CL_OUT_OF_RESOURCES) || (error == CL_OUT_OF_HOST_MEMORY)
+        || (error == CL_INVALID_IMAGE_SIZE))
     {
-      log_error("Failed to get event execution status: %s\n", IGetErrorString(error));
-      return FAILED_ABORT;
+        return FAILED_TOO_BIG;
     }
-    if (eventError >= 0)
+    else if (error == CL_INVALID_COMMAND_QUEUE)
     {
-      log_error("Non-negative event execution status after CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: %s\n", IGetErrorString(error));
-      return FAILED_ABORT;
+        *queue = reset_queue(context, device_id, queue, &error);
+        if (CL_SUCCESS != error)
+        {
+            log_error(
+                "Failed to reset command queue after corrupted queue: %s\n",
+                IGetErrorString(error));
+            return FAILED_ABORT;
+        }
+        // Try again with smaller resources.
+        return FAILED_TOO_BIG;
     }
-    error = eventError;
-  }
-  if ((error == CL_MEM_OBJECT_ALLOCATION_FAILURE ) || (error == CL_OUT_OF_RESOURCES ) || (error == CL_OUT_OF_HOST_MEMORY) || (error == CL_INVALID_IMAGE_SIZE)) {
-    return FAILED_TOO_BIG;
-  } else if (error == CL_INVALID_COMMAND_QUEUE) {
-    *queue = reset_queue(context, device_id, queue, &error);
-    if (CL_SUCCESS != error)
+    else if (error != CL_SUCCESS)
     {
-      log_error("Failed to reset command queue after corrupted queue: %s\n", IGetErrorString(error));
-      return FAILED_ABORT;
+        log_error("Allocation failed with %s.\n", IGetErrorString(error));
+        return FAILED_ABORT;
     }
-    // Try again with smaller resources.
-    return FAILED_TOO_BIG;
-  } else if (error != CL_SUCCESS) {
-    log_error("Allocation failed with %s.\n", IGetErrorString(error));
-    return FAILED_ABORT;
-  }
-  return SUCCEEDED;
+    return SUCCEEDED;
 }
 
 
-double toMB(cl_ulong size_in) {
-  return (double)size_in/(1024.0*1024.0);
-}
-
-size_t get_actual_allocation_size(cl_mem mem) {
-  int error;
-  cl_mem_object_type type;
-  size_t size, width, height;
+double toMB(cl_ulong size_in) { return (double)size_in / (1024.0 * 1024.0); }
 
-  error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
-  if (error) {
-      print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
-    return 0;
-  }
+size_t get_actual_allocation_size(cl_mem mem)
+{
+    int error;
+    cl_mem_object_type type;
+    size_t size, width, height;
 
-  if (type == CL_MEM_OBJECT_BUFFER) {
-    error = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size), &size, NULL);
-    if (error) {
-      print_error(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
-      return 0;
+    error = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(type), &type, NULL);
+    if (error)
+    {
+        print_error(error, "clGetMemObjectInfo failed for CL_MEM_TYPE.");
+        return 0;
     }
-    return size;
-  } else if (type == CL_MEM_OBJECT_IMAGE2D) {
-    error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
-    if (error) {
-      print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
-      return 0;
+
+    if (type == CL_MEM_OBJECT_BUFFER)
+    {
+        error = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size), &size, NULL);
+        if (error)
+        {
+            print_error(error, "clGetMemObjectInfo failed for CL_MEM_SIZE.");
+            return 0;
+        }
+        return size;
     }
-    error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
-    if (error) {
-      print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
-      return 0;
+    else if (type == CL_MEM_OBJECT_IMAGE2D)
+    {
+        error =
+            clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
+        if (error)
+        {
+            print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
+            return 0;
+        }
+        error =
+            clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
+        if (error)
+        {
+            print_error(error,
+                        "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
+            return 0;
+        }
+        return width * height * 4 * sizeof(cl_uint);
     }
-    return width*height*4*sizeof(cl_uint);
-  }
 
-  log_error("Invalid CL_MEM_TYPE: %d\n", type);
-  return 0;
+    log_error("Invalid CL_MEM_TYPE: %d\n", type);
+    return 0;
 }
-
-
diff --git a/test_conformance/allocations/allocation_utils.h b/test_conformance/allocations/allocation_utils.h
index 2d165c1e05..241b139ced 100644
--- a/test_conformance/allocations/allocation_utils.h
+++ b/test_conformance/allocations/allocation_utils.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -20,7 +20,9 @@
 
 extern cl_uint checksum;
 
-int check_allocation_error(cl_context context, cl_device_id device_id, int error, cl_command_queue *queue, cl_event *event = 0);
+int check_allocation_error(cl_context context, cl_device_id device_id,
+                           int error, cl_command_queue *queue,
+                           cl_event *event = 0);
 double toMB(cl_ulong size_in);
 size_t get_actual_allocation_size(cl_mem mem);
 
diff --git a/test_conformance/allocations/main.cpp b/test_conformance/allocations/main.cpp
index 6ef83c680d..65d2699f3b 100644
--- a/test_conformance/allocations/main.cpp
+++ b/test_conformance/allocations/main.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -31,66 +31,86 @@ int g_multiple_allocations = 0;
 int g_execute_kernel = 1;
 
 static size_t g_max_size;
-static RandomSeed g_seed( gRandomSeed );
+static RandomSeed g_seed(gRandomSeed);
 
 cl_long g_max_individual_allocation_size;
 cl_long g_global_mem_size;
 
 cl_uint checksum;
 
-static void printUsage( const char *execName );
+static void printUsage(const char *execName);
 
-test_status init_cl( cl_device_id device ) {
+test_status init_cl(cl_device_id device)
+{
     int error;
 
-    error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(g_max_individual_allocation_size), &g_max_individual_allocation_size, NULL );
-    if ( error ) {
-        print_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(g_max_individual_allocation_size),
+                            &g_max_individual_allocation_size, NULL);
+    if (error)
+    {
+        print_error(error,
+                    "clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE");
         return TEST_FAIL;
     }
-    error = clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(g_global_mem_size), &g_global_mem_size, NULL );
-    if ( error ) {
-        print_error( error, "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
+    error =
+        clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,
+                        sizeof(g_global_mem_size), &g_global_mem_size, NULL);
+    if (error)
+    {
+        print_error(error,
+                    "clGetDeviceInfo failed for CL_DEVICE_GLOBAL_MEM_SIZE");
         return TEST_FAIL;
     }
 
-    log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
-             llu( g_max_individual_allocation_size ), toMB( g_max_individual_allocation_size ),
-             llu( g_global_mem_size ), toMB( g_global_mem_size ) );
+    log_info("Device reports CL_DEVICE_MAX_MEM_ALLOC_SIZE=%llu bytes (%gMB), "
+             "CL_DEVICE_GLOBAL_MEM_SIZE=%llu bytes (%gMB).\n",
+             llu(g_max_individual_allocation_size),
+             toMB(g_max_individual_allocation_size), llu(g_global_mem_size),
+             toMB(g_global_mem_size));
 
-    if( g_global_mem_size > (cl_ulong)SIZE_MAX )
+    if (g_global_mem_size > (cl_ulong)SIZE_MAX)
     {
         g_global_mem_size = (cl_ulong)SIZE_MAX;
     }
 
-    if( g_max_individual_allocation_size > g_global_mem_size )
+    if (g_max_individual_allocation_size > g_global_mem_size)
     {
-        log_error( "FAILURE:  CL_DEVICE_MAX_MEM_ALLOC_SIZE (%llu) is greater than the CL_DEVICE_GLOBAL_MEM_SIZE (%llu)\n",
-                   llu( g_max_individual_allocation_size ), llu( g_global_mem_size ) );
+        log_error("FAILURE:  CL_DEVICE_MAX_MEM_ALLOC_SIZE (%llu) is greater "
+                  "than the CL_DEVICE_GLOBAL_MEM_SIZE (%llu)\n",
+                  llu(g_max_individual_allocation_size),
+                  llu(g_global_mem_size));
         return TEST_FAIL;
     }
 
-    // We may need to back off the global_mem_size on unified memory devices to leave room for application and operating system code
-    // and associated data in the working set, so we dont start pathologically paging.
-    // Check to see if we are a unified memory device
+    // We may need to back off the global_mem_size on unified memory devices to
+    // leave room for application and operating system code and associated data
+    // in the working set, so we dont start pathologically paging. Check to see
+    // if we are a unified memory device
     cl_bool hasUnifiedMemory = CL_FALSE;
-    if( ( error = clGetDeviceInfo( device, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof( hasUnifiedMemory ), &hasUnifiedMemory, NULL ) ) )
+    if ((error = clGetDeviceInfo(device, CL_DEVICE_HOST_UNIFIED_MEMORY,
+                                 sizeof(hasUnifiedMemory), &hasUnifiedMemory,
+                                 NULL)))
     {
-        print_error( error, "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
+        print_error(error,
+                    "clGetDeviceInfo failed for CL_DEVICE_HOST_UNIFIED_MEMORY");
         return TEST_FAIL;
     }
     // we share unified memory so back off to 1/2 the global memory size.
-    if( CL_TRUE == hasUnifiedMemory )
+    if (CL_TRUE == hasUnifiedMemory)
     {
-        g_global_mem_size -= g_global_mem_size /2;
-        log_info( "Device shares memory with the host, so backing off the maximum combined allocation size to be %gMB to avoid rampant paging.\n",
-                  toMB( g_global_mem_size ) );
+        g_global_mem_size -= g_global_mem_size / 2;
+        log_info(
+            "Device shares memory with the host, so backing off the maximum "
+            "combined allocation size to be %gMB to avoid rampant paging.\n",
+            toMB(g_global_mem_size));
     }
     else
     {
-        // Lets just use 60% of total available memory as framework/driver may not allow using all of it
-        // e.g. vram on GPU is used by window server and even for this test, we need some space for context,
-        // queue, kernel code on GPU.
+        // Lets just use 60% of total available memory as framework/driver may
+        // not allow using all of it e.g. vram on GPU is used by window server
+        // and even for this test, we need some space for context, queue, kernel
+        // code on GPU.
         g_global_mem_size *= 0.60;
     }
     /* Cap the allocation size as the global size was deduced */
@@ -99,15 +119,16 @@ test_status init_cl( cl_device_id device ) {
         g_max_individual_allocation_size = g_global_mem_size;
     }
 
-    if( gReSeed )
+    if (gReSeed)
     {
-        g_seed = RandomSeed( gRandomSeed );
+        g_seed = RandomSeed(gRandomSeed);
     }
 
     return TEST_PASS;
 }
 
-int doTest( cl_device_id device, cl_context context, cl_command_queue queue, AllocType alloc_type )
+int doTest(cl_device_id device, cl_context context, cl_command_queue queue,
+           AllocType alloc_type)
 {
     int error;
     int failure_counts = 0;
@@ -116,117 +137,141 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All
     cl_mem mems[MAX_NUMBER_TO_ALLOCATE];
     int number_of_mems_used;
     cl_ulong max_individual_allocation_size = g_max_individual_allocation_size;
-    cl_ulong global_mem_size = g_global_mem_size ;
+    cl_ulong global_mem_size = g_global_mem_size;
+    unsigned int number_of_work_itmes = 8192 * 32;
     const bool allocate_image =
         (alloc_type != BUFFER) && (alloc_type != BUFFER_NON_BLOCKING);
 
-    static const char* alloc_description[] = {
-        "buffer(s)",
-        "read-only image(s)",
-        "write-only image(s)",
-        "buffer(s)",
-        "read-only image(s)",
-        "write-only image(s)",
+    static const char *alloc_description[] = {
+        "buffer(s)", "read-only image(s)", "write-only image(s)",
+        "buffer(s)", "read-only image(s)", "write-only image(s)",
     };
 
     // Skip image tests if we don't support images on the device
     if (allocate_image && checkForImageSupport(device))
     {
-        log_info( "Can not test image allocation because device does not support images.\n" );
+        log_info("Can not test image allocation because device does not "
+                 "support images.\n");
         return 0;
     }
 
     // This section was added in order to fix a bug in the test
-    // If CL_DEVICE_MAX_MEM_ALLOC_SIZE is much grater than CL_DEVICE_IMAGE2D_MAX_WIDTH * CL_DEVICE_IMAGE2D_MAX_HEIGHT
-    // The test will fail in image allocations as the size requested for the allocation will be much grater than the maximum size allowed for image
+    // If CL_DEVICE_MAX_MEM_ALLOC_SIZE is much grater than
+    // CL_DEVICE_IMAGE2D_MAX_WIDTH * CL_DEVICE_IMAGE2D_MAX_HEIGHT The test will
+    // fail in image allocations as the size requested for the allocation will
+    // be much grater than the maximum size allowed for image
     if (allocate_image)
     {
         size_t max_width, max_height;
 
-        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL );
-        test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_WIDTH" );
+        error = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH,
+                                sizeof(max_width), &max_width, NULL);
+        test_error_abort(
+            error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_WIDTH");
 
-        error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( max_height ), &max_height, NULL );
-        test_error_abort( error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_HEIGHT" );
+        error = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
+                                sizeof(max_height), &max_height, NULL);
+        test_error_abort(
+            error, "clGetDeviceInfo failed for CL_DEVICE_IMAGE2D_MAX_HEIGHT");
 
-        cl_ulong max_image2d_size = (cl_ulong)max_height * max_width * 4 * sizeof(cl_uint);
+        cl_ulong max_image2d_size =
+            (cl_ulong)max_height * max_width * 4 * sizeof(cl_uint);
 
-        if( max_individual_allocation_size > max_image2d_size )
+        if (max_individual_allocation_size > max_image2d_size)
         {
             max_individual_allocation_size = max_image2d_size;
         }
     }
 
-    // Pick the baseline size based on whether we are doing a single large or multiple allocations
-    g_max_size = g_multiple_allocations ? (size_t)global_mem_size : (size_t)max_individual_allocation_size;
+    // Pick the baseline size based on whether we are doing a single large or
+    // multiple allocations
+    g_max_size = g_multiple_allocations
+        ? (size_t)global_mem_size
+        : (size_t)max_individual_allocation_size;
 
     // Adjust based on the percentage
-    if( g_reduction_percentage != 100 )
+    if (g_reduction_percentage != 100)
     {
-        log_info( "NOTE: reducing max allocations to %d%%.\n", g_reduction_percentage );
-        g_max_size = (size_t)( (double)g_max_size * (double)g_reduction_percentage / 100.0 );
+        log_info("NOTE: reducing max allocations to %d%%.\n",
+                 g_reduction_percentage);
+        g_max_size = (size_t)((double)g_max_size
+                              * (double)g_reduction_percentage / 100.0);
+        number_of_work_itmes = 8192 * 2;
     }
 
     // Round to nearest MB.
     g_max_size &= (size_t)(0xFFFFFFFFFF00000ULL);
 
-    log_info( "** Target allocation size (rounded to nearest MB) is: %llu bytes (%gMB).\n", llu( g_max_size ), toMB( g_max_size ) );
-    log_info( "** Allocating %s to size %gMB.\n", alloc_description[alloc_type], toMB( g_max_size ) );
+    log_info("** Target allocation size (rounded to nearest MB) is: %llu bytes "
+             "(%gMB).\n",
+             llu(g_max_size), toMB(g_max_size));
+    log_info("** Allocating %s to size %gMB.\n", alloc_description[alloc_type],
+             toMB(g_max_size));
 
-    for( int count = 0; count < g_repetition_count; count++ )
+    for (int count = 0; count < g_repetition_count; count++)
     {
         current_test_size = g_max_size;
         error = FAILED_TOO_BIG;
-        log_info( "  => Allocation %d\n", count + 1 );
+        log_info("  => Allocation %d\n", count + 1);
 
-        while( ( error == FAILED_TOO_BIG ) && ( current_test_size > g_max_size / 8 ) )
+        while ((error == FAILED_TOO_BIG)
+               && (current_test_size > g_max_size / 8))
         {
             // Reset our checksum for each allocation
             checksum = 0;
 
             // Do the allocation
-            error = allocate_size( context, &queue, device, g_multiple_allocations, current_test_size, alloc_type,
-                                   mems, &number_of_mems_used, &final_size, g_write_allocations, g_seed );
+            error = allocate_size(context, &queue, device,
+                                  g_multiple_allocations, current_test_size,
+                                  alloc_type, mems, &number_of_mems_used,
+                                  &final_size, g_write_allocations, g_seed);
 
             // If we succeeded and we're supposed to execute a kernel, do so.
-            if( error == SUCCEEDED && g_execute_kernel )
+            if (error == SUCCEEDED && g_execute_kernel)
             {
-                log_info( "\tExecuting kernel with memory objects.\n" );
-                error = execute_kernel( context, &queue, device, alloc_type, mems, number_of_mems_used,
-                                        g_write_allocations );
+                log_info("\tExecuting kernel with memory objects.\n");
+                error =
+                    execute_kernel(context, &queue, device, alloc_type, mems,
+                                   number_of_mems_used, g_write_allocations,
+                                   number_of_work_itmes);
             }
 
-            // If we failed to allocate more than 1/8th of the requested amount return a failure.
-            if( final_size < (size_t)g_max_size / 8 )
+            // If we failed to allocate more than 1/8th of the requested amount
+            // return a failure.
+            if (final_size < (size_t)g_max_size / 8)
             {
-                log_error( "===> Allocation %d failed to allocate more than 1/8th of the requested size.\n", count + 1 );
+                log_error("===> Allocation %d failed to allocate more than "
+                          "1/8th of the requested size.\n",
+                          count + 1);
                 failure_counts++;
             }
 
             // Clean up.
-            for( int i = 0; i < number_of_mems_used; i++ )
+            for (int i = 0; i < number_of_mems_used; i++)
             {
-                clReleaseMemObject( mems[i] );
+                clReleaseMemObject(mems[i]);
             }
 
-            if( error == FAILED_ABORT )
+            if (error == FAILED_ABORT)
             {
-                log_error( "  => Allocation %d failed.\n", count + 1 );
+                log_error("  => Allocation %d failed.\n", count + 1);
                 failure_counts++;
             }
 
-            if( error == FAILED_TOO_BIG )
+            if (error == FAILED_TOO_BIG)
             {
                 current_test_size -= g_max_size / 16;
-                log_info( "\tFailed at this size; trying a smaller size of %gMB.\n", toMB( current_test_size ) );
+                log_info(
+                    "\tFailed at this size; trying a smaller size of %gMB.\n",
+                    toMB(current_test_size));
             }
         }
 
-        if( error == SUCCEEDED && current_test_size == g_max_size )
+        if (error == SUCCEEDED && current_test_size == g_max_size)
         {
             log_info("\tPASS: Allocation succeeded.\n");
         }
-        else if( error == SUCCEEDED && current_test_size > g_max_size / 8 )
+        else if (error == SUCCEEDED && current_test_size > g_max_size / 8)
         {
             log_info("\tPASS: Allocation succeeded at reduced size.\n");
         }
@@ -240,41 +285,47 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All
     return failure_counts;
 }
 
-int test_buffer(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_buffer(cl_device_id device, cl_context context, cl_command_queue queue,
+                int num_elements)
 {
-    return doTest( device, context, queue, BUFFER );
+    return doTest(device, context, queue, BUFFER);
 }
-int test_image2d_read(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_image2d_read(cl_device_id device, cl_context context,
+                      cl_command_queue queue, int num_elements)
 {
-    return doTest( device, context, queue, IMAGE_READ );
+    return doTest(device, context, queue, IMAGE_READ);
 }
-int test_image2d_write(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_image2d_write(cl_device_id device, cl_context context,
+                       cl_command_queue queue, int num_elements)
 {
-    return doTest( device, context, queue, IMAGE_WRITE );
+    return doTest(device, context, queue, IMAGE_WRITE);
 }
-int test_buffer_non_blocking(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_buffer_non_blocking(cl_device_id device, cl_context context,
+                             cl_command_queue queue, int num_elements)
 {
-    return doTest( device, context, queue, BUFFER_NON_BLOCKING );
+    return doTest(device, context, queue, BUFFER_NON_BLOCKING);
 }
-int test_image2d_read_non_blocking(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_image2d_read_non_blocking(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements)
 {
-    return doTest( device, context, queue, IMAGE_READ_NON_BLOCKING );
+    return doTest(device, context, queue, IMAGE_READ_NON_BLOCKING);
 }
-int test_image2d_write_non_blocking(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_image2d_write_non_blocking(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int num_elements)
 {
-    return doTest( device, context, queue, IMAGE_WRITE_NON_BLOCKING );
+    return doTest(device, context, queue, IMAGE_WRITE_NON_BLOCKING);
 }
 
 test_definition test_list[] = {
-    ADD_TEST( buffer ),
-    ADD_TEST( image2d_read ),
-    ADD_TEST( image2d_write ),
-    ADD_TEST( buffer_non_blocking ),
-    ADD_TEST( image2d_read_non_blocking ),
-    ADD_TEST( image2d_write_non_blocking ),
+    ADD_TEST(buffer),
+    ADD_TEST(image2d_read),
+    ADD_TEST(image2d_write),
+    ADD_TEST(buffer_non_blocking),
+    ADD_TEST(image2d_read_non_blocking),
+    ADD_TEST(image2d_write_non_blocking),
 };
 
-const int test_num = ARRAY_SIZE( test_list );
+const int test_num = ARRAY_SIZE(test_list);
 
 int main(int argc, const char *argv[])
 {
@@ -287,11 +338,11 @@ int main(int argc, const char *argv[])
         return 1;
     }
 
-    const char ** argList = (const char **)calloc( argc, sizeof( char*) );
+    const char **argList = (const char **)calloc(argc, sizeof(char *));
 
-    if( NULL == argList )
+    if (NULL == argList)
     {
-        log_error( "Failed to allocate memory for argList array.\n" );
+        log_error("Failed to allocate memory for argList array.\n");
         return 1;
     }
 
@@ -299,38 +350,40 @@ int main(int argc, const char *argv[])
     size_t argCount = 1;
 
     // Parse arguments
-    for( int i = 1; i < argc; i++ )
+    for (int i = 1; i < argc; i++)
     {
-        if( strcmp( argv[i], "multiple" ) == 0 )
+        if (strcmp(argv[i], "multiple") == 0)
             g_multiple_allocations = 1;
-        else if( strcmp( argv[i], "single" ) == 0 )
+        else if (strcmp(argv[i], "single") == 0)
             g_multiple_allocations = 0;
 
-        else if( ( r = (int)strtol( argv[i], &endPtr, 10 ) ) && ( endPtr != argv[i] ) && ( *endPtr == 0 ) )
+        else if ((r = (int)strtol(argv[i], &endPtr, 10)) && (endPtr != argv[i])
+                 && (*endPtr == 0))
         {
-            // By spec, that means the entire string was an integer, so take it as a repetition count
+            // By spec, that means the entire string was an integer, so take it
+            // as a repetition count
             g_repetition_count = r;
         }
 
-        else if( strchr( argv[i], '%' ) != NULL )
+        else if (strchr(argv[i], '%') != NULL)
         {
             // Reduction percentage (let strtol ignore the percentage)
-            g_reduction_percentage = (int)strtol( argv[i], NULL, 10 );
+            g_reduction_percentage = (int)strtol(argv[i], NULL, 10);
         }
 
-        else if( strcmp( argv[i], "do_not_force_fill" ) == 0 )
+        else if (strcmp(argv[i], "do_not_force_fill") == 0)
         {
             g_write_allocations = 0;
         }
 
-        else if( strcmp( argv[i], "do_not_execute" ) == 0 )
+        else if (strcmp(argv[i], "do_not_execute") == 0)
         {
             g_execute_kernel = 0;
         }
 
-        else if ( strcmp( argv[i], "--help" ) == 0 || strcmp( argv[i], "-h" ) == 0 )
+        else if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-h") == 0)
         {
-            printUsage( argv[0] );
+            printUsage(argv[0]);
             free(argList);
             return -1;
         }
@@ -342,35 +395,42 @@ int main(int argc, const char *argv[])
         }
     }
 
-    int ret = runTestHarnessWithCheck( argCount, argList, test_num, test_list, false, 0, init_cl );
+    int ret = runTestHarnessWithCheck(argCount, argList, test_num, test_list,
+                                      false, 0, init_cl);
 
     free(argList);
     return ret;
 }
 
-void printUsage( const char *execName )
+void printUsage(const char *execName)
 {
-    const char *p = strrchr( execName, '/' );
-    if( p != NULL )
-        execName = p + 1;
-
-    log_info( "Usage: %s [options] [test_names]\n", execName );
-    log_info( "Options:\n" );
-    log_info( "\trandomize - Uses random seed\n" );
-    log_info( "\tsingle - Tests using a single allocation as large as possible\n" );
-    log_info( "\tmultiple - Tests using as many allocations as possible\n" );
-    log_info( "\n" );
-    log_info( "\tnumReps - Optional integer specifying the number of repetitions to run and average the result (defaults to 1)\n" );
-    log_info( "\treduction%% - Optional integer, followed by a %% sign, that acts as a multiplier for the target amount of memory.\n" );
-    log_info( "\t             Example: target amount of 512MB and a reduction of 75%% will result in a target of 384MB.\n" );
-    log_info( "\n" );
-    log_info( "\tdo_not_force_fill - Disable explicitly write data to all memory objects after creating them.\n" );
-    log_info( "\t                    Without this, the kernel execution can not verify its checksum.\n" );
-    log_info( "\tdo_not_execute - Disable executing a kernel that accesses all of the memory objects.\n" );
-    log_info( "\n" );
-    log_info( "Test names (Allocation Types):\n" );
-    for( int i = 0; i < test_num; i++ )
+    const char *p = strrchr(execName, '/');
+    if (p != NULL) execName = p + 1;
+
+    log_info("Usage: %s [options] [test_names]\n", execName);
+    log_info("Options:\n");
+    log_info("\trandomize - Uses random seed\n");
+    log_info(
+        "\tsingle - Tests using a single allocation as large as possible\n");
+    log_info("\tmultiple - Tests using as many allocations as possible\n");
+    log_info("\n");
+    log_info("\tnumReps - Optional integer specifying the number of "
+             "repetitions to run and average the result (defaults to 1)\n");
+    log_info("\treduction%% - Optional integer, followed by a %% sign, that "
+             "acts as a multiplier for the target amount of memory.\n");
+    log_info("\t             Example: target amount of 512MB and a reduction "
+             "of 75%% will result in a target of 384MB.\n");
+    log_info("\n");
+    log_info("\tdo_not_force_fill - Disable explicitly write data to all "
+             "memory objects after creating them.\n");
+    log_info("\t                    Without this, the kernel execution can not "
+             "verify its checksum.\n");
+    log_info("\tdo_not_execute - Disable executing a kernel that accesses all "
+             "of the memory objects.\n");
+    log_info("\n");
+    log_info("Test names (Allocation Types):\n");
+    for (int i = 0; i < test_num; i++)
     {
-        log_info( "\t%s\n", test_list[i].name );
+        log_info("\t%s\n", test_list[i].name);
     }
 }
diff --git a/test_conformance/allocations/testBase.h b/test_conformance/allocations/testBase.h
index b48efe5188..d320907522 100644
--- a/test_conformance/allocations/testBase.h
+++ b/test_conformance/allocations/testBase.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -39,9 +39,10 @@
 #define FAILED_CORRUPTED_QUEUE -2
 #define FAILED_ABORT -1
 #define FAILED_TOO_BIG 1
-// On Windows macro `SUCCEEDED' is defined in `WinError.h'. It causes compiler warnings. Let us avoid them.
-#if defined( _WIN32 ) && defined( SUCCEEDED )
-    #undef SUCCEEDED
+// On Windows macro `SUCCEEDED' is defined in `WinError.h'. It causes compiler
+// warnings. Let us avoid them.
+#if defined(_WIN32) && defined(SUCCEEDED)
+#undef SUCCEEDED
 #endif
 #define SUCCEEDED 0
 
@@ -55,11 +56,16 @@ enum AllocType
     IMAGE_WRITE_NON_BLOCKING,
 };
 
-#define test_error_abort(errCode,msg)    test_error_ret_abort(errCode,msg,errCode)
-#define test_error_ret_abort(errCode,msg,retValue)    { if( errCode != CL_SUCCESS ) { print_error( errCode, msg ); return FAILED_ABORT ; } }
+#define test_error_abort(errCode, msg)                                         \
+    test_error_ret_abort(errCode, msg, errCode)
+#define test_error_ret_abort(errCode, msg, retValue)                           \
+    {                                                                          \
+        if (errCode != CL_SUCCESS)                                             \
+        {                                                                      \
+            print_error(errCode, msg);                                         \
+            return FAILED_ABORT;                                               \
+        }                                                                      \
+    }
 
 
 #endif // _testBase_h
-
-
-
diff --git a/test_conformance/api/CMakeLists.txt b/test_conformance/api/CMakeLists.txt
index 5b1f491ce3..96d12f435d 100644
--- a/test_conformance/api/CMakeLists.txt
+++ b/test_conformance/api/CMakeLists.txt
@@ -3,6 +3,7 @@ set(MODULE_NAME API)
 set(${MODULE_NAME}_SOURCES
          main.cpp
          negative_platform.cpp
+         negative_queue.cpp
          test_api_consistency.cpp
          test_bool.cpp
          test_retain.cpp
diff --git a/test_conformance/api/main.cpp b/test_conformance/api/main.cpp
index e0900df07c..cdbf1f799a 100644
--- a/test_conformance/api/main.cpp
+++ b/test_conformance/api/main.cpp
@@ -152,6 +152,11 @@ test_definition test_list[] = {
     ADD_TEST(work_group_suggested_local_size_1D),
     ADD_TEST(work_group_suggested_local_size_2D),
     ADD_TEST(work_group_suggested_local_size_3D),
+
+    ADD_TEST(negative_create_command_queue),
+    ADD_TEST_VERSION(negative_create_command_queue_with_properties,
+                     Version(2, 0)),
+    ADD_TEST(negative_create_command_queue_with_properties_khr),
 };
 
 const int test_num = ARRAY_SIZE(test_list);
diff --git a/test_conformance/api/negative_queue.cpp b/test_conformance/api/negative_queue.cpp
new file mode 100644
index 0000000000..f3b4fb2c90
--- /dev/null
+++ b/test_conformance/api/negative_queue.cpp
@@ -0,0 +1,174 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "testBase.h"
+#include "harness/typeWrappers.h"
+
+int test_negative_create_command_queue(cl_device_id deviceID,
+                                       cl_context context,
+                                       cl_command_queue queue, int num_elements)
+{
+    cl_command_queue_properties device_props = 0;
+    cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES,
+                                   sizeof(device_props), &device_props, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
+
+    // CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE is the only optional property to
+    // clCreateCommandQueue, CL_QUEUE_PROFILING_ENABLE is mandatory.
+    const bool out_of_order_device_support =
+        device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
+    if (out_of_order_device_support)
+    {
+        // Early return as we can't check correct error is returned for
+        // unsupported property.
+        return TEST_PASS;
+    }
+
+    // Try create a command queue with out-of-order property and check return
+    // code
+    cl_int test_error = CL_SUCCESS;
+    clCommandQueueWrapper test_queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &test_error);
+
+    test_failure_error_ret(
+        test_error, CL_INVALID_QUEUE_PROPERTIES,
+        "clCreateCommandQueue should return CL_INVALID_QUEUE_PROPERTIES if "
+        "values specified in properties are valid but are not supported by "
+        "the "
+        "device.",
+        TEST_FAIL);
+    return TEST_PASS;
+}
+
+int test_negative_create_command_queue_with_properties(cl_device_id deviceID,
+                                                       cl_context context,
+                                                       cl_command_queue queue,
+                                                       int num_elements)
+{
+    cl_command_queue_properties device_props = 0;
+    cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES,
+                                   sizeof(device_props), &device_props, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
+
+    cl_command_queue_properties device_on_host_props = 0;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
+                            sizeof(device_on_host_props), &device_on_host_props,
+                            NULL);
+    test_error(error,
+               "clGetDeviceInfo for CL_DEVICE_QUEUE_ON_HOST_PROPERTIES failed");
+
+    if (device_on_host_props != device_props)
+    {
+        log_error(
+            "ERROR: CL_DEVICE_QUEUE_PROPERTIES and "
+            "CL_DEVICE_QUEUE_ON_HOST_PROPERTIES properties should match\n");
+        return TEST_FAIL;
+    }
+
+    // CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE is the only optional host-queue
+    // property to clCreateCommandQueueWithProperties,
+    // CL_QUEUE_PROFILING_ENABLE is mandatory.
+    const bool out_of_order_device_support =
+        device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
+    if (out_of_order_device_support)
+    {
+        // Early return as we can't check correct error is returned for
+        // unsupported property.
+        return TEST_PASS;
+    }
+
+    // Try create a command queue with out-of-order property and check return
+    // code
+    cl_command_queue_properties queue_prop_def[] = {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
+    };
+
+    cl_int test_error = CL_SUCCESS;
+    clCommandQueueWrapper test_queue = clCreateCommandQueueWithProperties(
+        context, deviceID, queue_prop_def, &test_error);
+
+    test_failure_error_ret(test_error, CL_INVALID_QUEUE_PROPERTIES,
+                           "clCreateCommandQueueWithProperties should "
+                           "return CL_INVALID_QUEUE_PROPERTIES if "
+                           "values specified in properties are valid but "
+                           "are not supported by the "
+                           "device.",
+                           TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+int test_negative_create_command_queue_with_properties_khr(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_create_command_queue"))
+    {
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    cl_platform_id platform;
+    cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM,
+                                   sizeof(cl_platform_id), &platform, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
+
+    clCreateCommandQueueWithPropertiesKHR_fn
+        clCreateCommandQueueWithPropertiesKHR =
+            (clCreateCommandQueueWithPropertiesKHR_fn)
+                clGetExtensionFunctionAddressForPlatform(
+                    platform, "clCreateCommandQueueWithPropertiesKHR");
+    if (clCreateCommandQueueWithPropertiesKHR == NULL)
+    {
+        log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
+        return -1;
+    }
+
+    cl_command_queue_properties device_props = 0;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_PROPERTIES,
+                            sizeof(device_props), &device_props, NULL);
+    test_error(error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
+
+    // CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE is the only optional host-queue
+    // property to clCreateCommandQueueWithPropertiesKHR,
+    // CL_QUEUE_PROFILING_ENABLE is mandatory.
+    const bool out_of_order_device_support =
+        device_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
+    if (out_of_order_device_support)
+    {
+        // Early return as we can't check correct error is returned for
+        // unsupported property.
+        return TEST_PASS;
+    }
+
+    // Try create a command queue with out-of-order property and check return
+    // code
+    cl_queue_properties_khr queue_prop_def[] = {
+        CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0
+    };
+
+    cl_int test_error = CL_SUCCESS;
+    clCommandQueueWrapper test_khr_queue =
+        clCreateCommandQueueWithPropertiesKHR(context, deviceID, queue_prop_def,
+                                              &test_error);
+
+    test_failure_error_ret(test_error, CL_INVALID_QUEUE_PROPERTIES,
+                           "clCreateCommandQueueWithPropertiesKHR should "
+                           "return CL_INVALID_QUEUE_PROPERTIES if "
+                           "values specified in properties are valid but "
+                           "are not supported by the "
+                           "device.",
+                           TEST_FAIL);
+    return TEST_PASS;
+}
diff --git a/test_conformance/api/procs.h b/test_conformance/api/procs.h
index 320ad65a1e..22426be1b8 100644
--- a/test_conformance/api/procs.h
+++ b/test_conformance/api/procs.h
@@ -217,3 +217,14 @@ extern int test_work_group_suggested_local_size_3D(cl_device_id device,
                                                    cl_context context,
                                                    cl_command_queue queue,
                                                    int n_elems);
+
+extern int test_negative_create_command_queue(cl_device_id deviceID,
+                                              cl_context context,
+                                              cl_command_queue queue,
+                                              int num_elements);
+extern int test_negative_create_command_queue_with_properties(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_create_command_queue_with_properties_khr(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements);
diff --git a/test_conformance/api/test_kernel_arg_multi_setup.cpp b/test_conformance/api/test_kernel_arg_multi_setup.cpp
index de3dc15e06..79294bd65c 100644
--- a/test_conformance/api/test_kernel_arg_multi_setup.cpp
+++ b/test_conformance/api/test_kernel_arg_multi_setup.cpp
@@ -27,8 +27,6 @@ const char *multi_arg_kernel_source_pattern =
 "    dst3[tid] = src3[tid];\n"
 "}\n";
 
-#define MAX_ERROR_TOLERANCE 0.0005f
-
 int test_multi_arg_set(cl_device_id device, cl_context context, cl_command_queue queue,
                        ExplicitType vec1Type, int vec1Size,
                        ExplicitType vec2Type, int vec2Size,
diff --git a/test_conformance/api/test_native_kernel.cpp b/test_conformance/api/test_native_kernel.cpp
index 50505e226a..d9c93628b4 100644
--- a/test_conformance/api/test_native_kernel.cpp
+++ b/test_conformance/api/test_native_kernel.cpp
@@ -46,12 +46,7 @@ int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue
     }
 
     clMemWrapper streams[ 2 ];
-#if !(defined (_WIN32) && defined (_MSC_VER))
-    cl_int inBuffer[ n_elems ], outBuffer[ n_elems ];
-#else
-    cl_int* inBuffer  = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
-    cl_int* outBuffer = (cl_int *)_malloca( n_elems * sizeof(cl_int) );
-#endif
+    std::vector<cl_int> inBuffer(n_elems), outBuffer(n_elems);
     clEventWrapper finishEvent;
 
     struct arg_struct
@@ -63,11 +58,12 @@ int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue
 
 
     // Create some input values
-    generate_random_data( kInt, n_elems, seed, inBuffer );
-
+    generate_random_data(kInt, n_elems, seed, inBuffer.data());
 
     // Create I/O streams
-    streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, n_elems * sizeof(cl_int), inBuffer, &error );
+    streams[0] =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, n_elems * sizeof(cl_int),
+                       inBuffer.data(), &error);
     test_error( error, "Unable to create I/O stream" );
     streams[ 1 ] = clCreateBuffer( context, 0, n_elems * sizeof(cl_int), NULL, &error );
     test_error( error, "Unable to create I/O stream" );
@@ -97,15 +93,18 @@ int test_native_kernel(cl_device_id device, cl_context context, cl_command_queue
     test_error(error, "clWaitForEvents failed");
 
     // Now read the results and verify
-    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, n_elems * sizeof(cl_int), outBuffer, 0, NULL, NULL );
+    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
+                                n_elems * sizeof(cl_int), outBuffer.data(), 0,
+                                NULL, NULL);
     test_error( error, "Unable to read results" );
 
     for( int i = 0; i < n_elems; i++ )
     {
-        if( inBuffer[ i ] != outBuffer[ i ] )
+        if (inBuffer[i] != outBuffer[i])
         {
-            log_error( "ERROR: Data sample %d for native kernel did not validate (expected %d, got %d)\n",
-                      i, (int)inBuffer[ i ], (int)outBuffer[ i ] );
+            log_error("ERROR: Data sample %d for native kernel did not "
+                      "validate (expected %d, got %d)\n",
+                      i, (int)inBuffer[i], (int)outBuffer[i]);
             return 1;
         }
     }
diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp
index f07401077e..a1d8c0218e 100644
--- a/test_conformance/api/test_queries.cpp
+++ b/test_conformance/api/test_queries.cpp
@@ -507,20 +507,6 @@ int test_get_context_info(cl_device_id deviceID, cl_context context, cl_command_
     return -1;
 }
 
-#define TEST_MEM_OBJECT_PARAM( mem, paramName, val, expected, name, type, cast )    \
-error = clGetMemObjectInfo( mem, paramName, sizeof( val ), &val, &size );        \
-test_error( error, "Unable to get mem object " name );                            \
-if( val != expected )                                                                \
-{                                                                                    \
-log_error( "ERROR: Mem object " name " did not validate! (expected " type ", got " type ")\n", (cast)(expected), (cast)val );    \
-return -1;                                                                        \
-}            \
-if( size != sizeof( val ) )                \
-{                                        \
-log_error( "ERROR: Returned size of mem object " name " does not validate! (expected %d, got %d)\n", (int)sizeof( val ), (int)size );    \
-return -1;    \
-}
-
 void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
 {
     free( data );
diff --git a/test_conformance/api/test_wg_suggested_local_work_size.cpp b/test_conformance/api/test_wg_suggested_local_work_size.cpp
index 2b2a5404fd..6667ffda9a 100644
--- a/test_conformance/api/test_wg_suggested_local_work_size.cpp
+++ b/test_conformance/api/test_wg_suggested_local_work_size.cpp
@@ -24,19 +24,6 @@
 #include "procs.h"
 #include <CL/cl_ext.h>
 
-/** @brief Gets the number of elements of type s in a fixed length array of s */
-#define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
-#define test_error_ret_and_free(errCode, msg, retValue, ptr)                   \
-    {                                                                          \
-        auto errCodeResult = errCode;                                          \
-        if (errCodeResult != CL_SUCCESS)                                       \
-        {                                                                      \
-            print_error(errCodeResult, msg);                                   \
-            free(ptr);                                                         \
-            return retValue;                                                   \
-        }                                                                      \
-    }
-
 const char* wg_scan_local_work_group_size = R"(
     bool is_zero_linear_id()
     {
@@ -107,7 +94,6 @@ bool is_not_even(size_t a) { return (is_prime(a) || (a % 2 == 1)); }
 
 bool is_not_odd(size_t a) { return (is_prime(a) || (a % 2 == 0)); }
 
-#define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
 /* The value_range_nD contains numbers to be used for the experiments with 2D
    and 3D global work sizes. This is because we need smaller numbers so that the
    resulting number of work items is meaningful and does not become too large.
@@ -265,7 +251,7 @@ int do_test_work_group_suggested_local_size(
         // return error if no number is found due to the skip condition
         err = -1;
         unsigned int j = 0;
-        size_t num_elems = NELEMS(value_range_nD);
+        size_t num_elems = ARRAY_SIZE(value_range_nD);
         for (size_t i = start; i < end; i += incr)
         {
             if (skip_cond(i)) continue;
diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt
index 684a7d1d4b..bf1f3bd63a 100644
--- a/test_conformance/basic/CMakeLists.txt
+++ b/test_conformance/basic/CMakeLists.txt
@@ -26,7 +26,6 @@ set(${MODULE_NAME}_SOURCES
     test_arrayreadwrite.cpp
     test_arraycopy.cpp
     test_imagearraycopy.cpp
-    test_imagearraycopy3d.cpp
     test_imagecopy.cpp
     test_imagerandomcopy.cpp
     test_arrayimagecopy.cpp
diff --git a/test_conformance/basic/test_if.cpp b/test_conformance/basic/test_if.cpp
index c92ec32218..f2a8fa8299 100644
--- a/test_conformance/basic/test_if.cpp
+++ b/test_conformance/basic/test_if.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -21,146 +21,119 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include <algorithm>
+#include <vector>
 
 #include "procs.h"
 
-const char *conditional_kernel_code =
-"__kernel void test_if(__global int *src, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    if (src[tid] == 0)\n"
-"        dst[tid] = 0x12345678;\n"
-"    else if (src[tid] == 1)\n"
-"        dst[tid] = 0x23456781;\n"
-"    else if (src[tid] == 2)\n"
-"        dst[tid] = 0x34567812;\n"
-"    else if (src[tid] == 3)\n"
-"        dst[tid] = 0x45678123;\n"
-"    else if (src[tid] == 4)\n"
-"        dst[tid] = 0x56781234;\n"
-"    else if (src[tid] == 5)\n"
-"        dst[tid] = 0x67812345;\n"
-"    else if (src[tid] == 6)\n"
-"        dst[tid] = 0x78123456;\n"
-"    else if (src[tid] == 7)\n"
-"        dst[tid] = 0x81234567;\n"
-"    else\n"
-"        dst[tid] = 0x7FFFFFFF;\n"
-"\n"
-"}\n";
-
-const int results[] = {
-    0x12345678,
-    0x23456781,
-    0x34567812,
-    0x45678123,
-    0x56781234,
-    0x67812345,
-    0x78123456,
-    0x81234567,
-};
-
-int
-verify_if(int *inptr, int *outptr, int n)
+namespace {
+const char *conditional_kernel_code = R"(
+__kernel void test_if(__global int *src, __global int *dst)
 {
-    int     r, i;
+    int  tid = get_global_id(0);
+
+    if (src[tid] == 0)
+        dst[tid] = 0x12345678;
+    else if (src[tid] == 1)
+        dst[tid] = 0x23456781;
+    else if (src[tid] == 2)
+        dst[tid] = 0x34567812;
+    else if (src[tid] == 3)
+        dst[tid] = 0x45678123;
+    else if (src[tid] == 4)
+        dst[tid] = 0x56781234;
+    else if (src[tid] == 5)
+        dst[tid] = 0x67812345;
+    else if (src[tid] == 6)
+        dst[tid] = 0x78123456;
+    else if (src[tid] == 7)
+        dst[tid] = 0x81234567;
+    else
+        dst[tid] = 0x7FFFFFFF;
+}
+)";
 
-    for (i=0; i<n; i++)
-    {
-        if (inptr[i] <= 7)
-            r = results[inptr[i]];
+int verify_if(std::vector<cl_int> input, std::vector<cl_int> output)
+{
+    const cl_int results[] = {
+        0x12345678, 0x23456781, 0x34567812, 0x45678123,
+        0x56781234, 0x67812345, 0x78123456, 0x81234567,
+    };
+
+    auto predicate = [&results](cl_int a, cl_int b) {
+        if (a <= 7)
+            return b == results[a];
         else
-            r = 0x7FFFFFFF;
+            return b == 0x7FFFFFFF;
+    };
 
-        if (r != outptr[i])
-        {
-            log_error("IF test failed\n");
-            return -1;
-        }
+    if (!std::equal(input.begin(), input.end(), output.begin(), predicate))
+    {
+        log_error("IF test failed\n");
+        return -1;
     }
 
     log_info("IF test passed\n");
     return 0;
 }
 
-int test_if(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+void generate_random_inputs(std::vector<cl_int> &v)
 {
-    cl_mem streams[2];
-    cl_int *input_ptr, *output_ptr;
-    cl_program program;
-    cl_kernel kernel;
-    size_t threads[1];
-    int err, i;
-    MTdata d = init_genrand( gRandomSeed );
+    RandomSeed seed(gRandomSeed);
+
+    auto random_generator = [&seed]() {
+        return static_cast<cl_int>(get_random_float(0, 32, seed));
+    };
+
+    std::generate(v.begin(), v.end(), random_generator);
+}
+}
+int test_if(cl_device_id device, cl_context context, cl_command_queue queue,
+            int num_elements)
+{
+    clMemWrapper streams[2];
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    int err;
 
     size_t length = sizeof(cl_int) * num_elements;
-    input_ptr  = (cl_int*)malloc(length);
-    output_ptr = (cl_int*)malloc(length);
 
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
+    std::vector<cl_int> input(num_elements);
+    std::vector<cl_int> output(num_elements);
 
-    for (i=0; i<num_elements; i++)
-        input_ptr[i] = (int)get_random_float(0, 32, d);
 
-    free_mtdata(d); d = NULL;
+    streams[0] =
+        clCreateBuffer(context, CL_MEM_READ_WRITE, length, nullptr, &err);
+    test_error(err, "clCreateBuffer failed.");
+    streams[1] =
+        clCreateBuffer(context, CL_MEM_READ_WRITE, length, nullptr, &err);
+    test_error(err, "clCreateBuffer failed.");
 
-  err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueWriteBuffer failed\n");
-    return -1;
-  }
+    generate_random_inputs(input);
 
-  err = create_single_kernel_helper(context, &program, &kernel, 1, &conditional_kernel_code, "test_if" );
-  if (err)
-    return -1;
+    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length,
+                               input.data(), 0, nullptr, nullptr);
+    test_error(err, "clEnqueueWriteBuffer failed.");
 
-  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &conditional_kernel_code, "test_if");
+    test_error(err, "create_single_kernel_helper failed.");
 
-    threads[0] = (unsigned int)num_elements;
-  err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueNDRangeKernel failed\n");
-    return -1;
-  }
-
-  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clReadArray failed\n");
-    return -1;
-  }
-
-  err = verify_if(input_ptr, output_ptr, num_elements);
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    test_error(err, "clSetKernelArg failed.");
 
-    return err;
-}
+    size_t threads[] = { (size_t)num_elements };
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads, nullptr, 0,
+                                 nullptr, nullptr);
+    test_error(err, "clEnqueueNDRangeKernel failed.");
 
+    err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length,
+                              output.data(), 0, nullptr, nullptr);
+    test_error(err, "clEnqueueReadBuffer failed.");
 
+    err = verify_if(input, output);
+
+    return err;
+}
diff --git a/test_conformance/basic/test_imagearraycopy.cpp b/test_conformance/basic/test_imagearraycopy.cpp
index e3f2fb6d87..f29eac31c3 100644
--- a/test_conformance/basic/test_imagearraycopy.cpp
+++ b/test_conformance/basic/test_imagearraycopy.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -14,134 +14,172 @@
 // limitations under the License.
 //
 #include "harness/compat.h"
+#include "harness/imageHelpers.h"
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <vector>
 
 #include "procs.h"
 
-int test_imagearraycopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
+int test_imagearraycopy_single_format(cl_device_id device, cl_context context,
+                                      cl_command_queue queue,
+                                      cl_mem_flags flags,
+                                      cl_mem_object_type image_type,
+                                      const cl_image_format *format)
 {
-  cl_uchar    *imgptr, *bufptr;
-  clMemWrapper      image, buffer;
-  int        img_width = 512;
-  int        img_height = 512;
-  size_t    elem_size;
-  size_t    buffer_size;
-  int        i;
-  cl_int          err;
-  MTdata          d;
-  cl_event  copyevent;
-
-  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
-
-  image = create_image_2d(context, CL_MEM_READ_WRITE, format, img_width,
-                          img_height, 0, NULL, &err);
-  test_error(err, "create_image_2d failed");
-
-  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
-  test_error(err, "clGetImageInfo failed");
-
-  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
-
-  buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, &err);
-  test_error(err, "clCreateBuffer failed");
-
-  d = init_genrand( gRandomSeed );
-  imgptr = (cl_uchar*)malloc(buffer_size);
-  for (i=0; i<(int)buffer_size; i++) {
-     imgptr[i] = (cl_uchar)genrand_int32(d);
-  }
-  free_mtdata(d); d = NULL;
-
-  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
-  err = clEnqueueWriteImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 0, NULL, NULL );
-  test_error(err, "clEnqueueWriteBuffer failed");
-
-  err = clEnqueueCopyImageToBuffer( queue, image, buffer, origin, region, 0, 0, NULL, &copyevent );
-  test_error(err, "clEnqueueCopyImageToBuffer failed");
-
-  bufptr = (cl_uchar*)malloc(buffer_size);
-
-  err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 1, &copyevent, NULL);
-  test_error(err, "clEnqueueReadBuffer failed");
-
-  err = clReleaseEvent(copyevent);
-  test_error(err, "clReleaseEvent failed");
-
-  if (memcmp(imgptr, bufptr, buffer_size) != 0) {
-    log_error( "ERROR: Results did not validate!\n" );
-    unsigned char * inchar = (unsigned char*)imgptr;
-    unsigned char * outchar = (unsigned char*)bufptr;
-    int failuresPrinted = 0;
-    int i;
-    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
-        int failed = 0;
-        int j;
-        for (j=0; j<(int)elem_size; j++)
-            if (inchar[i+j] != outchar[i+j])
-                failed = 1;
-        char values[4096];
-        values[0] = 0;
-        if (failed) {
-            sprintf(values + strlen(values), "%d(0x%x) -> expected [", i, i);
-            int j;
-            for (j=0; j<(int)elem_size; j++)
-                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
-            sprintf(values + strlen(values), "] != actual [");
-            for (j=0; j<(int)elem_size; j++)
-                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
-            sprintf(values + strlen(values), "]");
-            log_error("%s\n", values);
-            failuresPrinted++;
-        }
-        if (failuresPrinted > 5) {
-            log_error("Not printing further failures...\n");
-            break;
+    clMemWrapper buffer, image;
+    const int img_width = 512;
+    const int img_height = 512;
+    const int img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
+    size_t elem_size;
+    size_t buffer_size;
+    cl_int err;
+    cl_event copyevent;
+
+    log_info("Testing %s %s\n",
+             GetChannelOrderName(format->image_channel_order),
+             GetChannelTypeName(format->image_channel_data_type));
+
+    if (CL_MEM_OBJECT_IMAGE2D == image_type)
+    {
+        image = create_image_2d(context, flags, format, img_width, img_height,
+                                0, nullptr, &err);
+    }
+    else
+    {
+        image = create_image_3d(context, flags, format, img_width, img_height,
+                                img_depth, 0, 0, nullptr, &err);
+    }
+    test_error(err, "create_image_xd failed");
+
+    err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t),
+                         &elem_size, nullptr);
+    test_error(err, "clGetImageInfo failed");
+
+    buffer_size =
+        sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
+
+    buffer =
+        clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, nullptr, &err);
+    test_error(err, "clCreateBuffer failed");
+
+
+    RandomSeed seed(gRandomSeed);
+    cl_uchar *imgptr =
+        static_cast<cl_uchar *>(create_random_data(kUChar, seed, buffer_size));
+
+    const size_t origin[3] = { 0, 0, 0 },
+                 region[3] = { img_width, img_height, img_depth };
+    err = clEnqueueWriteImage(queue, image, CL_TRUE, origin, region, 0, 0,
+                              imgptr, 0, nullptr, nullptr);
+    test_error(err, "clEnqueueWriteImage failed");
+
+    err = clEnqueueCopyImageToBuffer(queue, image, buffer, origin, region, 0, 0,
+                                     nullptr, &copyevent);
+    test_error(err, "clEnqueueCopyImageToBuffer failed");
+
+    cl_uchar *bufptr = static_cast<cl_uchar *>(malloc(buffer_size));
+
+    err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 1,
+                              &copyevent, nullptr);
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    err = clReleaseEvent(copyevent);
+    test_error(err, "clReleaseEvent failed");
+
+    if (memcmp(imgptr, bufptr, buffer_size) != 0)
+    {
+        log_error("ERROR: Results did not validate!\n");
+        auto inchar = static_cast<unsigned char *>(imgptr);
+        auto outchar = static_cast<unsigned char *>(bufptr);
+        int failuresPrinted = 0;
+        for (size_t i = 0; i < buffer_size; i += elem_size)
+        {
+            if (memcmp(&inchar[i], &outchar[i], elem_size) != 0)
+            {
+                log_error("%d(0x%x) -> expected [", i, i);
+                for (size_t j = 0; j < elem_size; j++)
+                    log_error("0x%02x ", inchar[i + j]);
+                log_error("] != actual [");
+                for (size_t j = 0; j < elem_size; j++)
+                    log_error("0x%02x ", outchar[i + j]);
+                log_error("]\n");
+                failuresPrinted++;
+            }
+            if (failuresPrinted > 5)
+            {
+                log_error("Not printing further failures...\n");
+                break;
+            }
         }
+        err = -1;
     }
-    err = -1;
-  }
 
-  free(imgptr);
-  free(bufptr);
+    free(imgptr);
+    free(bufptr);
 
-  if (err)
-    log_error("IMAGE to ARRAY copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
-              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
+    if (err)
+        log_error(
+            "IMAGE to ARRAY copy test failed for image_channel_order=0x%lx and "
+            "image_channel_data_type=0x%lx\n",
+            static_cast<unsigned long>(format->image_channel_order),
+            static_cast<unsigned long>(format->image_channel_data_type));
 
-  return err;
+    return err;
 }
 
-int test_imagearraycopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_imagearraycommon(cl_device_id device, cl_context context,
+                          cl_command_queue queue, cl_mem_flags flags,
+                          cl_mem_object_type image_type)
 {
-  cl_int          err;
-  cl_image_format *formats;
-  cl_uint         num_formats;
-  cl_uint         i;
+    cl_int err;
+    cl_uint num_formats;
+
+    err = clGetSupportedImageFormats(context, flags, image_type, 0, nullptr,
+                                     &num_formats);
+    test_error(err, "clGetSupportedImageFormats failed");
 
-  PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+    std::vector<cl_image_format> formats(num_formats);
 
-  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
-  test_error(err, "clGetSupportedImageFormats failed");
+    err = clGetSupportedImageFormats(context, flags, image_type, num_formats,
+                                     formats.data(), nullptr);
+    test_error(err, "clGetSupportedImageFormats failed");
 
-  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
+    for (const auto &format : formats)
+    {
+        err |= test_imagearraycopy_single_format(device, context, queue, flags,
+                                                 image_type, &format);
+    }
 
-  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
-  test_error(err, "clGetSupportedImageFormats failed");
+    if (err)
+        log_error("ARRAY to IMAGE%s copy test failed\n",
+                  convert_image_type_to_string(image_type));
+    else
+        log_info("ARRAY to IMAGE%s copy test passed\n",
+                 convert_image_type_to_string(image_type));
 
-  for (i = 0; i < num_formats; i++) {
-    err |= test_imagearraycopy_single_format(device, context, queue, &formats[i]);
-  }
+    return err;
+}
 
-  free(formats);
-  if (err)
-    log_error("IMAGE to ARRAY copy test failed\n");
-  else
-    log_info("IMAGE to ARRAY copy test passed\n");
+int test_imagearraycopy(cl_device_id device, cl_context context,
+                        cl_command_queue queue, int num_elements)
+{
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
 
-  return err;
+    return test_imagearraycommon(device, context, queue, CL_MEM_READ_WRITE,
+                                 CL_MEM_OBJECT_IMAGE2D);
 }
+
+
+int test_imagearraycopy3d(cl_device_id device, cl_context context,
+                          cl_command_queue queue, int num_elements)
+{
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)
+
+    return test_imagearraycommon(device, context, queue, CL_MEM_READ_ONLY,
+                                 CL_MEM_OBJECT_IMAGE3D);
+}
\ No newline at end of file
diff --git a/test_conformance/basic/test_imagearraycopy3d.cpp b/test_conformance/basic/test_imagearraycopy3d.cpp
deleted file mode 100644
index 60b8a58419..0000000000
--- a/test_conformance/basic/test_imagearraycopy3d.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-int test_imagearraycopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
-{
-  cl_uchar    *imgptr, *bufptr;
-  clMemWrapper      image, buffer;
-  int        img_width = 128;
-  int        img_height = 128;
-  int        img_depth = 32;
-  size_t    elem_size;
-  size_t    buffer_size;
-  int        i;
-  cl_int          err;
-  MTdata          d;
-
-  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
-
-  image = create_image_3d(context, CL_MEM_READ_ONLY, format, img_width,
-                          img_height, img_depth, 0, 0, NULL, &err);
-  test_error(err, "create_image_3d failed");
-
-  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
-  test_error(err, "clGetImageInfo failed");
-
-  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
-
-  buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, &err);
-  test_error(err, "clCreateBuffer failed");
-
-  d = init_genrand( gRandomSeed );
-  imgptr = (cl_uchar*)malloc(buffer_size);
-  for (i=0; i<(int)buffer_size; i++) {
-     imgptr[i] = (cl_uchar)genrand_int32(d);
-  }
-  free_mtdata(d); d = NULL;
-
-  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
-  err = clEnqueueWriteImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 0, NULL, NULL );
-  test_error(err, "clEnqueueWriteBuffer failed");
-
-  err = clEnqueueCopyImageToBuffer( queue, image, buffer, origin, region, 0, 0, NULL, NULL );
-  test_error(err, "clEnqueueCopyImageToBuffer failed");
-
-  bufptr = (cl_uchar*)malloc(buffer_size);
-
-  err = clEnqueueReadBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
-  test_error(err, "clEnqueueReadBuffer failed");
-
-  if (memcmp(imgptr, bufptr, buffer_size) != 0) {
-    log_error( "ERROR: Results did not validate!\n" );
-    unsigned char * inchar = (unsigned char*)imgptr;
-    unsigned char * outchar = (unsigned char*)bufptr;
-    int failuresPrinted = 0;
-    int i;
-    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
-        int failed = 0;
-        int j;
-        for (j=0; j<(int)elem_size; j++)
-            if (inchar[i+j] != outchar[i+j])
-                failed = 1;
-        char values[4096];
-        values[0] = 0;
-        if (failed) {
-            sprintf(values + strlen(values), "%d(0x%x) -> expected [", i, i);
-            int j;
-            for (j=0; j<(int)elem_size; j++)
-                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
-            sprintf(values + strlen(values), "] != actual [");
-            for (j=0; j<(int)elem_size; j++)
-                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
-            sprintf(values + strlen(values), "]");
-            log_error("%s\n", values);
-            failuresPrinted++;
-        }
-        if (failuresPrinted > 5) {
-            log_error("Not printing further failures...\n");
-            break;
-        }
-    }
-    err = -1;
-  }
-
-  free(imgptr);
-  free(bufptr);
-
-  if (err)
-    log_error("IMAGE3D to ARRAY copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
-              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
-
-  return err;
-}
-
-int test_imagearraycopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-  cl_int          err;
-  cl_image_format *formats;
-  cl_uint         num_formats;
-  cl_uint         i;
-
-  PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
-
-  err = clGetSupportedImageFormats(
-      context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
-  test_error(err, "clGetSupportedImageFormats failed");
-
-  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
-
-  err = clGetSupportedImageFormats(context, CL_MEM_READ_ONLY,
-                                   CL_MEM_OBJECT_IMAGE3D, num_formats, formats,
-                                   NULL);
-  test_error(err, "clGetSupportedImageFormats failed");
-
-  for (i = 0; i < num_formats; i++) {
-    err |= test_imagearraycopy3d_single_format(device, context, queue, &formats[i]);
-  }
-
-  free(formats);
-  if (err)
-    log_error("IMAGE3D to ARRAY copy test failed\n");
-  else
-    log_info("IMAGE3D to ARRAY copy test passed\n");
-
-  return err;
-}
diff --git a/test_conformance/basic/test_imagedim.cpp b/test_conformance/basic/test_imagedim.cpp
index 008c88b6af..f979aa8bb1 100644
--- a/test_conformance/basic/test_imagedim.cpp
+++ b/test_conformance/basic/test_imagedim.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -21,504 +21,277 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include <algorithm>
+#include <vector>
 
 #include "procs.h"
 
-static const char *image_dim_kernel_code =
-"\n"
-"__kernel void test_image_dim(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)\n"
-"{\n"
-"    int    tid_x = get_global_id(0);\n"
-"    int    tid_y = get_global_id(1);\n"
-"    float4 color;\n"
-"\n"
-"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));\n"
-"     write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
-"\n"
-"}\n";
-
-
-static unsigned char *generate_8888_image(size_t w, size_t h, MTdata d)
+namespace {
+const char *image_dim_kernel_code = R"(
+__kernel void test_image_dim(read_only image2d_t srcimg, write_only image2d_t dstimg, sampler_t sampler)
 {
-    unsigned char *ptr = new unsigned char[4 * w * h];
-    size_t i;
-
-    for (i = 0; i < w * h * 4; i++)
-    {
-        ptr[i] = (unsigned char)genrand_int32(d);
-    }
+    int    tid_x = get_global_id(0);
+    int    tid_y = get_global_id(1);
+    float4 color;
 
-    return ptr;
+    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y));
+    write_imagef(dstimg, (int2)(tid_x, tid_y), color);
 }
+)";
 
-static int verify_8888_image(unsigned char *image, unsigned char *outptr,
-                             size_t w, size_t h)
+void generate_random_inputs(std::vector<cl_uchar> &v)
 {
-    size_t i;
+    RandomSeed seed(gRandomSeed);
 
-    for (i = 0; i < w * h; i++)
-    {
-        if (outptr[i] != image[i])
-            return -1;
-    }
+    auto random_generator = [&seed]() { return genrand_int32(seed); };
 
-    return 0;
+    std::generate(v.begin(), v.end(), random_generator);
 }
 
-
-int
-test_imagedim_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+int get_max_image_dimensions(cl_device_id device, size_t &max_img_width,
+                             size_t &max_img_height)
 {
-    cl_mem streams[2];
-    cl_image_format img_format;
-    unsigned char *input_ptr, *output_ptr;
-    cl_program program;
-    cl_kernel kernel;
-    size_t threads[2];
+    int err = 0;
+
     cl_ulong max_mem_size;
-    size_t img_width, max_img_width;
-    size_t img_height, max_img_height;
-    size_t max_img_dim;
-    int i, j, i2, j2, err = 0;
     size_t max_image2d_width, max_image2d_height;
-    int total_errors = 0;
-    MTdata  d;
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
 
-    err = create_single_kernel_helper( context, &program, &kernel, 1, &image_dim_kernel_code, "test_image_dim" );
-    if (err)
-    {
-        log_error("create_program_and_kernel_with_sources failed\n");
-        return -1;
-    }
-
-    err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(max_mem_size), &max_mem_size, NULL);
-    if (err)
-    {
-        log_error("clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed (%d)\n", err);
-        return -1;
-    }
-    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_image2d_width), &max_image2d_width, NULL);
-    if (err)
-    {
-        log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed (%d)\n", err);
-        return -1;
-    }
-    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_image2d_width), &max_image2d_height, NULL);
-    if (err)
-    {
-        log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed (%d)\n", err);
-        return -1;
-    }
-    log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n",
-           max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0));
-
-    if (max_mem_size > (cl_ulong)SIZE_MAX) {
-        max_mem_size = (cl_ulong)SIZE_MAX;
-    }
-
-    cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
-    test_error(err, "clCreateSampler failed");
-
-    max_img_width = max_image2d_width;
-    max_img_height = max_image2d_height;
-
-    // determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
-  //  and we want to consume 1/4 of global memory (this is the minimum required to be
-  //  supported by the spec)
+    err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,
+                          sizeof(max_mem_size), &max_mem_size, nullptr);
+    test_error(err, "clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed");
+    err =
+        clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH,
+                        sizeof(max_image2d_width), &max_image2d_width, nullptr);
+    test_error(err, "clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed");
+    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
+                          sizeof(max_image2d_width), &max_image2d_height,
+                          nullptr);
+    test_error(err, "clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed");
+
+    log_info("Device reported max image sizes of %lu x %lu, and max mem size "
+             "of %gMB.\n",
+             max_image2d_width, max_image2d_height,
+             max_mem_size / (1024.0 * 1024.0));
+
+
+    max_mem_size = std::min(max_mem_size, (cl_ulong)SIZE_MAX);
+
+    // determine max image dim we can allocate - assume RGBA image, 4 bytes per
+    // pixel, and we want to consume 1/4 of global memory (this is the minimum
+    // required to be supported by the spec)
     max_mem_size /= 4; // use 1/4
     max_mem_size /= 4; // 4 bytes per pixel
-    max_img_dim = (size_t)sqrt((double)max_mem_size);
+
+    size_t max_img_dim =
+        static_cast<size_t>(sqrt(static_cast<double>(max_mem_size)));
     // convert to a power of 2
     {
-        unsigned int    n = (unsigned int)max_img_dim;
-        unsigned int    m = 0x80000000;
+        unsigned int n = static_cast<unsigned int>(max_img_dim);
+        unsigned int m = 0x80000000;
 
         // round-down to the nearest power of 2
-        while (m > n)
-            m >>= 1;
+        while (m > n) m >>= 1;
 
         max_img_dim = m;
     }
 
-    if (max_img_width > max_img_dim)
-        max_img_width = max_img_dim;
-    if (max_img_height > max_img_dim)
-        max_img_height = max_img_dim;
+    max_img_width = std::min(max_image2d_width, max_img_dim);
+    max_img_height = std::min(max_image2d_height, max_img_dim);
 
-    log_info("Adjusted maximum image size to test is %d x %d, which is a max mem size of %gMB.\n",
-                max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0));
+    log_info("Adjusted maximum image size to test is %d x %d, which is a max "
+             "mem size of %gMB.\n",
+             max_img_width, max_img_height,
+             (max_img_width * max_img_height * 4) / (1024.0 * 1024.0));
+    return err;
+}
 
-    d = init_genrand( gRandomSeed );
-    input_ptr = generate_8888_image(max_img_width, max_img_height, d);
+int test_imagedim_common(cl_context context, cl_command_queue queue,
+                         cl_kernel kernel, size_t *local_threads,
+                         size_t img_width, size_t img_height)
+{
 
-    output_ptr = new unsigned char[4 * max_img_width * max_img_height];
+    int err;
+    int total_errors = 0;
 
-    // test power of 2 width, height starting at 1 to 4K
-    for (i = 1, i2 = 0; i <= max_img_height; i <<= 1, i2++)
-    {
-        img_height = (1 << i2);
-        for (j = 1, j2 = 0; j <= max_img_width; j <<= 1, j2++)
-        {
-            img_width = (1 << j2);
-
-            img_format.image_channel_order = CL_RGBA;
-            img_format.image_channel_data_type = CL_UNORM_INT8;
-            streams[0] =
-                create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                img_width, img_height, 0, NULL, NULL);
-            if (!streams[0])
-            {
-                log_error("create_image_2d failed.  width = %d, height = %d\n", img_width, img_height);
-                delete[] input_ptr;
-                delete[] output_ptr;
-                free_mtdata(d);
-                return -1;
-            }
-            img_format.image_channel_order = CL_RGBA;
-            img_format.image_channel_data_type = CL_UNORM_INT8;
-            streams[1] =
-                create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
-                                img_width, img_height, 0, NULL, NULL);
-            if (!streams[1])
-            {
-                log_error("create_image_2d failed.  width = %d, height = %d\n", img_width, img_height);
-                clReleaseMemObject(streams[0]);
-                delete[] input_ptr;
-                delete[] output_ptr;
-                free_mtdata(d);
-                return -1;
-            }
+    clMemWrapper streams[2];
 
-            size_t origin[3] = {0,0,0};
-            size_t region[3] = {img_width, img_height, 1};
-            err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
-            if (err != CL_SUCCESS)
-            {
-                log_error("clWriteImage failed\n");
-                clReleaseMemObject(streams[0]);
-                clReleaseMemObject(streams[1]);
-                delete[] input_ptr;
-                delete[] output_ptr;
-                free_mtdata(d);
-                return -1;
-            }
+    std::vector<cl_uchar> input(4 * img_width * img_height);
+    std::vector<cl_uchar> output(4 * img_width * img_height);
 
-            err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-            err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-            err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
-            if (err != CL_SUCCESS)
-            {
-                log_error("clSetKernelArgs failed\n");
-                clReleaseMemObject(streams[0]);
-                clReleaseMemObject(streams[1]);
-                delete[] input_ptr;
-                delete[] output_ptr;
-                free_mtdata(d);
-                return -1;
-            }
+    generate_random_inputs(input);
 
-            threads[0] = (size_t)img_width;
-            threads[1] = (size_t)img_height;
-            log_info("Testing image dimensions %d x %d with local threads NULL.\n", img_width, img_height);
-            err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
-            if (err != CL_SUCCESS)
-            {
-                log_error("clEnqueueNDRangeKernel failed\n");
-                log_error("Image Dimension test failed.  image width = %d, image height = %d, local NULL\n",
-                            img_width, img_height);
-                clReleaseMemObject(streams[0]);
-                clReleaseMemObject(streams[1]);
-                delete[] input_ptr;
-                delete[] output_ptr;
-                free_mtdata(d);
-                return -1;
-            }
-            err = clEnqueueReadImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
-            if (err != CL_SUCCESS)
-            {
-                log_error("clReadImage failed\n");
-                log_error("Image Dimension test failed.  image width = %d, image height = %d, local NULL\n",
-                            img_width, img_height);
-                clReleaseMemObject(streams[0]);
-                clReleaseMemObject(streams[1]);
-                delete[] input_ptr;
-                delete[] output_ptr;
-                free_mtdata(d);
-                return -1;
-            }
-            err = verify_8888_image(input_ptr, output_ptr, img_width, img_height);
-            if (err)
-            {
-                total_errors++;
-                log_error("Image Dimension test failed.  image width = %d, image height = %d\n", img_width, img_height);
-            }
+    const cl_image_format img_format = { CL_RGBA, CL_UNORM_INT8 };
 
-            clReleaseMemObject(streams[0]);
-            clReleaseMemObject(streams[1]);
-        }
-    }
+    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
+                                 img_width, img_height, 0, nullptr, &err);
+    test_error(err, "create_image_2d failed");
+
+    streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format,
+                                 img_width, img_height, 0, nullptr, &err);
+    test_error(err, "create_image_2d failed");
+
+    size_t origin[3] = { 0, 0, 0 };
+    size_t region[3] = { img_width, img_height, 1 };
+    err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0,
+                              input.data(), 0, nullptr, nullptr);
+    test_error(err, "clEnqueueWriteImage failed");
 
-    // cleanup
-    delete[] input_ptr;
-    delete[] output_ptr;
-    free_mtdata(d);
-    clReleaseSampler(sampler);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
+    clSamplerWrapper sampler = clCreateSampler(
+        context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+    test_error(err, "clCreateSampler failed");
 
+    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+    err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+    test_error(err, "clSetKernelArg failed");
+
+    size_t threads[] = { img_width, img_height };
+    if (local_threads)
+        log_info(
+            "Testing image dimensions %d x %d with local threads %d x %d.\n",
+            img_width, img_height, local_threads[0], local_threads[1]);
+    else
+        log_info(
+            "Testing image dimensions %d x %d with local threads nullptr.\n",
+            img_width, img_height);
+    err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, threads,
+                                 local_threads, 0, nullptr, nullptr);
+    test_error(err, "clEnqueueNDRangeKernel failed");
+
+    err = clEnqueueReadImage(queue, streams[1], CL_TRUE, origin, region, 0, 0,
+                             output.data(), 0, nullptr, nullptr);
+    test_error(err, "clEnqueueReadImage failed");
+
+    if (0 != memcmp(input.data(), output.data(), 4 * img_width * img_height))
+    {
+        total_errors++;
+        log_error("Image Dimension test failed.  image width = %d, "
+                  "image height = %d\n",
+                  img_width, img_height);
+    }
     return total_errors;
 }
+}
 
+int test_imagedim_pow2(cl_device_id device, cl_context context,
+                       cl_command_queue queue, int n_elems)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
 
+    size_t max_img_width;
+    size_t max_img_height;
 
-int
-test_imagedim_non_pow2(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem streams[2];
-    cl_image_format img_format;
-    unsigned char *input_ptr, *output_ptr;
-    cl_program program;
-    cl_kernel kernel;
-    size_t threads[2], local_threads[2];
-    cl_ulong max_mem_size;
-    size_t img_width, max_img_width;
-    size_t img_height, max_img_height;
-    size_t max_img_dim;
-    int i, j, i2, j2, err = 0;
-    size_t max_image2d_width, max_image2d_height;
+    int err = 0;
     int total_errors = 0;
-    size_t max_local_workgroup_size[3];
-    MTdata d;
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
 
-    err = create_single_kernel_helper( context, &program, &kernel, 1, &image_dim_kernel_code, "test_image_dim" );
-    if (err)
-    {
-        log_error("create_program_and_kernel_with_sources failed\n");
-        return -1;
-    }
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
 
-    size_t work_group_size = 0;
-    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(work_group_size), &work_group_size, NULL);
-    test_error(err, "clGetKerenlWorkgroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &image_dim_kernel_code, "test_image_dim");
+    test_error(err, "create_single_kernel_helper failed");
 
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
-    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+    err = get_max_image_dimensions(device, max_img_width, max_img_height);
+    test_error(err, "get_max_image_dimensions failed");
 
-    err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,sizeof(max_mem_size), &max_mem_size, NULL);
-    if (err)
-    {
-        log_error("clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed (%d)\n", err);
-        return -1;
-    }
-    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(max_image2d_width), &max_image2d_width, NULL);
-    if (err)
-    {
-        log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_WIDTH failed (%d)\n", err);
-        return -1;
-    }
-    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(max_image2d_width), &max_image2d_height, NULL);
-    if (err)
+    // test power of 2 width, height starting at 1 to 4K
+    for (size_t i = 1, i2 = 0; i <= max_img_height; i <<= 1, i2++)
     {
-        log_error("clGetDeviceInfo for CL_DEVICE_IMAGE2D_MAX_HEIGHT failed (%d)\n", err);
-        return -1;
+        size_t img_height = (1 << i2);
+        for (size_t j = 1, j2 = 0; j <= max_img_width; j <<= 1, j2++)
+        {
+            size_t img_width = (1 << j2);
+
+            total_errors += test_imagedim_common(
+                context, queue, kernel, nullptr, img_width, img_height);
+        }
     }
-    log_info("Device reported max image sizes of %lu x %lu, and max mem size of %gMB.\n",
-           max_image2d_width, max_image2d_height, max_mem_size/(1024.0*1024.0));
 
-    cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
-    test_error(err, "clCreateSampler failed");
+    return total_errors;
+}
 
-    max_img_width = (int)max_image2d_width;
-    max_img_height = (int)max_image2d_height;
 
-  if (max_mem_size > (cl_ulong)SIZE_MAX) {
-    max_mem_size = (cl_ulong)SIZE_MAX;
-  }
+int test_imagedim_non_pow2(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int n_elems)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
 
-    // determine max image dim we can allocate - assume RGBA image, 4 bytes per pixel,
-    //  and we want to consume 1/4 of global memory (this is the minimum required to be
-    //  supported by the spec)
-    max_mem_size /= 4; // use 1/4
-    max_mem_size /= 4; // 4 bytes per pixel
-    max_img_dim = (int)sqrt((double)max_mem_size);
-    // convert to a power of 2
-    {
-        unsigned int    n = (unsigned int)max_img_dim;
-        unsigned int    m = 0x80000000;
+    size_t max_img_width;
+    size_t max_img_height;
+    size_t max_local_workgroup_size[3] = {};
+    size_t work_group_size = 0;
+    int err = 0;
+    int total_errors = 0;
 
-        // round-down to the nearest power of 2
-        while (m > n)
-            m >>= 1;
 
-        max_img_dim = (int)m;
-    }
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
 
-    if (max_img_width > max_img_dim)
-        max_img_width = max_img_dim;
-    if (max_img_height > max_img_dim)
-        max_img_height = max_img_dim;
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &image_dim_kernel_code, "test_image_dim");
+    test_error(err, "create_single_kernel_helper failed");
 
-    log_info("Adjusted maximum image size to test is %d x %d, which is a max mem size of %gMB.\n",
-            max_img_width, max_img_height, (max_img_width*max_img_height*4)/(1024.0*1024.0));
+    err = get_max_image_dimensions(device, max_img_width, max_img_height);
+    test_error(err, "get_max_image_dimensions failed");
+
+    err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
+                                   sizeof(work_group_size), &work_group_size,
+                                   nullptr);
+    test_error(err,
+               "clGetKernelWorkgroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE");
+
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+                          sizeof(max_local_workgroup_size),
+                          max_local_workgroup_size, nullptr);
+    test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
 
-    d = init_genrand( gRandomSeed );
-    input_ptr = generate_8888_image(max_img_width, max_img_height, d);
-    output_ptr = new unsigned char[4 * max_img_width * max_img_height];
+    // clamp max_local_workgroup_size to CL_KERNEL_WORK_GROUP_SIZE
+    for (auto &max_lws : max_local_workgroup_size)
+        max_lws = std::min(max_lws, work_group_size);
 
-    int plus_minus;
-    for (plus_minus = 0; plus_minus < 3; plus_minus++)
+    for (int plus_minus = 0; plus_minus < 3; plus_minus++)
     {
 
-    // test power of 2 width, height starting at 1 to 4K
-        for (i=2,i2=1; i<=max_img_height; i<<=1,i2++)
+        // test power of 2 width, height starting at 1 to 4K
+        for (size_t i = 2, i2 = 1; i <= max_img_height; i <<= 1, i2++)
         {
-            img_height = (1 << i2);
-            for (j=2,j2=1; j<=max_img_width; j<<=1,j2++)
+            size_t img_height = (1 << i2);
+            for (size_t j = 2, j2 = 1; j <= max_img_width; j <<= 1, j2++)
             {
-                img_width = (1 << j2);
+                size_t img_width = (1 << j2);
 
                 size_t effective_img_height = img_height;
                 size_t effective_img_width = img_width;
 
-                local_threads[0] = 1;
-                local_threads[1] = 1;
+                size_t local_threads[] = { 1, 1 };
 
-                switch (plus_minus) {
+                switch (plus_minus)
+                {
                     case 0:
-                      effective_img_height--;
-                      local_threads[0] = work_group_size > max_local_workgroup_size[0] ? max_local_workgroup_size[0] : work_group_size;
-                      while (img_width%local_threads[0] != 0)
-                        local_threads[0]--;
-                      break;
+                        effective_img_height--;
+                        local_threads[0] = max_local_workgroup_size[0];
+                        while (img_width % local_threads[0] != 0)
+                            local_threads[0]--;
+                        break;
                     case 1:
-                      effective_img_width--;
-                      local_threads[1] = work_group_size > max_local_workgroup_size[1] ? max_local_workgroup_size[1] : work_group_size;
-                      while (img_height%local_threads[1] != 0)
-                        local_threads[1]--;
-                      break;
+                        effective_img_width--;
+                        local_threads[1] = max_local_workgroup_size[1];
+                        while (img_height % local_threads[1] != 0)
+                            local_threads[1]--;
+                        break;
                     case 2:
-                      effective_img_width--;
-                      effective_img_height--;
-                      break;
-                    default:
-                      break;
-                }
-
-                img_format.image_channel_order = CL_RGBA;
-                img_format.image_channel_data_type = CL_UNORM_INT8;
-                streams[0] = create_image_2d(
-                    context, CL_MEM_READ_WRITE, &img_format,
-                    effective_img_width, effective_img_height, 0, NULL, NULL);
-                if (!streams[0])
-                {
-                    log_error("create_image_2d failed.  width = %d, height = %d\n", effective_img_width, effective_img_height);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
-                    free_mtdata(d);
-                    return -1;
-                }
-                img_format.image_channel_order = CL_RGBA;
-                img_format.image_channel_data_type = CL_UNORM_INT8;
-                streams[1] = create_image_2d(
-                    context, CL_MEM_READ_WRITE, &img_format,
-                    effective_img_width, effective_img_height, 0, NULL, NULL);
-                if (!streams[1])
-                {
-                    log_error("create_image_2d failed.  width = %d, height = %d\n", effective_img_width, effective_img_height);
-                    clReleaseMemObject(streams[0]);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
-                    free_mtdata(d);
-                    return -1;
-                }
-
-                  size_t origin[3] = {0,0,0};
-                  size_t region[3] = {effective_img_width, effective_img_height, 1};
-                  err = clEnqueueWriteImage(queue, streams[0], CL_FALSE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
-                if (err != CL_SUCCESS)
-                {
-                    log_error("clWriteImage failed\n");
-                    clReleaseMemObject(streams[0]);
-                    clReleaseMemObject(streams[1]);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
-                    free_mtdata(d);
-                    return -1;
-                }
-
-                err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-                err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-                err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
-                if (err != CL_SUCCESS)
-                {
-                    log_error("clSetKernelArgs failed\n");
-                    clReleaseMemObject(streams[0]);
-                    clReleaseMemObject(streams[1]);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
-                    free_mtdata(d);
-                    return -1;
-                }
-
-                threads[0] = (size_t)effective_img_width;
-                threads[1] = (size_t)effective_img_height;
-                log_info("Testing image dimensions %d x %d with local threads %d x %d.\n",
-                            effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
-                err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, local_threads, 0, NULL, NULL );
-                if (err != CL_SUCCESS)
-                {
-                    log_error("clEnqueueNDRangeKernel failed\n");
-                    log_error("Image Dimension test failed.  image width = %d, image height = %d, local %d x %d\n",
-                                effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
-                    clReleaseMemObject(streams[0]);
-                    clReleaseMemObject(streams[1]);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
-                    free_mtdata(d);
-                    return -1;
-                }
-                err = clEnqueueReadImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
-                if (err != CL_SUCCESS)
-                {
-                    log_error("clReadImage failed\n");
-                    log_error("Image Dimension test failed.  image width = %d, image height = %d, local %d x %d\n",
-                                effective_img_width, effective_img_height, (int)local_threads[0], (int)local_threads[1]);
-                    clReleaseMemObject(streams[0]);
-                    clReleaseMemObject(streams[1]);
-                    delete[] input_ptr;
-                    delete[] output_ptr;
-                    free_mtdata(d);
-                    return -1;
-                }
-                err = verify_8888_image(input_ptr, output_ptr, effective_img_width, effective_img_height);
-                if (err)
-                {
-                    total_errors++;
-                    log_error("Image Dimension test failed.  image width = %d, image height = %d\n", effective_img_width, effective_img_height);
+                        effective_img_width--;
+                        effective_img_height--;
+                        break;
+                    default: break;
                 }
 
-                clReleaseMemObject(streams[0]);
-                clReleaseMemObject(streams[1]);
+                total_errors += test_imagedim_common(
+                    context, queue, kernel, local_threads, effective_img_width,
+                    effective_img_height);
             }
         }
+    }
 
-  }
-
-  // cleanup
-  delete[] input_ptr;
-  delete[] output_ptr;
-  free_mtdata(d);
-  clReleaseSampler(sampler);
-  clReleaseKernel(kernel);
-  clReleaseProgram(program);
-
-  return total_errors;
+    return total_errors;
 }
-
-
-
-
diff --git a/test_conformance/basic/test_vector_swizzle.cpp b/test_conformance/basic/test_vector_swizzle.cpp
index fdbc891967..6bdf651dd1 100644
--- a/test_conformance/basic/test_vector_swizzle.cpp
+++ b/test_conformance/basic/test_vector_swizzle.cpp
@@ -516,8 +516,7 @@ static void makeReference(std::vector<T>& ref)
     // single channel lvalue
     for (size_t i = 0; i < N; i++)
     {
-        ref[dstIndex * S + i] = 0;
-        ++dstIndex;
+        ref[dstIndex++ * S + i] = 0;
     }
 
     // normal lvalue
diff --git a/test_conformance/buffers/test_sub_buffers.cpp b/test_conformance/buffers/test_sub_buffers.cpp
index d6ab111e1d..f1f07f84a3 100644
--- a/test_conformance/buffers/test_sub_buffers.cpp
+++ b/test_conformance/buffers/test_sub_buffers.cpp
@@ -16,6 +16,7 @@
 #include "procs.h"
 
 #include <algorithm>
+#include <vector>
 
 // Design:
 // To test sub buffers, we first create one main buffer. We then create several sub-buffers and
@@ -413,16 +414,13 @@ int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context
     size_t param_size;
     error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, 0, NULL, &param_size );
     test_error( error, "Error obtaining device name" );
+    std::vector<char> device_name(param_size);
 
-#if !(defined(_WIN32) && defined(_MSC_VER))
-    char device_name[param_size];
-#else
-    char* device_name = (char*)_malloca(param_size);
-#endif
     error = clGetDeviceInfo(otherDevice, CL_DEVICE_NAME, param_size, &device_name[0], NULL );
     test_error( error, "Error obtaining device name" );
 
-    log_info( "\tOther device obtained for dual device test is type %s\n", device_name );
+    log_info("\tOther device obtained for dual device test is type %s\n",
+             device_name.data());
 
     // Create a shared context for these two devices
     cl_device_id devices[ 2 ] = { deviceID, otherDevice };
@@ -453,7 +451,6 @@ int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context
     test_error( error, "Unable to get secondary device's address alignment" );
 
     cl_uint addressAlign1 = std::max(addressAlign1Bits, addressAlign2Bits) / 8;
-
     // Finally time to run!
     return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 );
 }
diff --git a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp
index f295387a1f..21db20dd8f 100644
--- a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp
+++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp
@@ -96,6 +96,26 @@ void init_cl_vk_ext(cl_platform_id opencl_platform, cl_uint num_devices,
                                      "clReImportSemaphoreSyncFdKHR!");
         }
     }
+
+    clEnqueueAcquireExternalMemObjectsKHRptr =
+        (pfnclEnqueueAcquireExternalMemObjectsKHR)
+            clGetExtensionFunctionAddressForPlatform(
+                opencl_platform, "clEnqueueAcquireExternalMemObjectsKHR");
+    if (nullptr == clEnqueueAcquireExternalMemObjectsKHRptr)
+    {
+        throw std::runtime_error("Failed to get the function pointer of "
+                                 "clEnqueueAcquireExternalMemObjectsKHR!");
+    }
+
+    clEnqueueReleaseExternalMemObjectsKHRptr =
+        (pfnclEnqueueReleaseExternalMemObjectsKHR)
+            clGetExtensionFunctionAddressForPlatform(
+                opencl_platform, "clEnqueueReleaseExternalMemObjectsKHR");
+    if (nullptr == clEnqueueReleaseExternalMemObjectsKHRptr)
+    {
+        throw std::runtime_error("Failed to get the function pointer of "
+                                 "clEnqueueReleaseExternalMemObjectsKHR!");
+    }
 }
 
 cl_int setMaxImageDimensions(cl_device_id deviceID, size_t &max_width,
@@ -508,7 +528,8 @@ cl_int check_external_memory_handle_type(
 
 cl_int check_external_semaphore_handle_type(
     cl_device_id deviceID,
-    cl_external_semaphore_handle_type_khr requiredHandleType)
+    cl_external_semaphore_handle_type_khr requiredHandleType,
+    cl_device_info queryParamName)
 {
     unsigned int i;
     cl_external_semaphore_handle_type_khr *handle_type;
@@ -516,18 +537,26 @@ cl_int check_external_semaphore_handle_type(
     cl_int errNum = CL_SUCCESS;
 
     errNum =
-        clGetDeviceInfo(deviceID, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
-                        0, NULL, &handle_type_size);
+        clGetDeviceInfo(deviceID, queryParamName, 0, NULL, &handle_type_size);
+
+    if (handle_type_size == 0)
+    {
+        log_error("Device does not support %s semaphore\n",
+                  queryParamName == CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR
+                      ? "importing"
+                      : "exporting");
+        return CL_INVALID_VALUE;
+    }
+
     handle_type =
         (cl_external_semaphore_handle_type_khr *)malloc(handle_type_size);
 
-    errNum =
-        clGetDeviceInfo(deviceID, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
-                        handle_type_size, handle_type, NULL);
+    errNum = clGetDeviceInfo(deviceID, queryParamName, handle_type_size,
+                             handle_type, NULL);
 
     test_error(
         errNum,
-        "Unable to query CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR \n");
+        "Unable to query supported device semaphore handle types list\n");
 
     for (i = 0; i < handle_type_size; i++)
     {
@@ -897,6 +926,8 @@ clExternalExportableSemaphore::clExternalExportableSemaphore(
 
     cl_int err = 0;
     cl_device_id devList[] = { deviceId, NULL };
+    cl_external_semaphore_handle_type_khr clSemaphoreHandleType =
+        getCLSemaphoreTypeFromVulkanType(externalSemaphoreHandleType);
     m_externalHandleType = externalSemaphoreHandleType;
     m_externalSemaphore = nullptr;
     m_device = deviceId;
@@ -908,9 +939,7 @@ clExternalExportableSemaphore::clExternalExportableSemaphore(
     };
     sema_props.push_back(
         (cl_semaphore_properties_khr)CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR);
-    sema_props.push_back(
-        (cl_semaphore_properties_khr)getCLSemaphoreTypeFromVulkanType(
-            externalSemaphoreHandleType));
+    sema_props.push_back((cl_semaphore_properties_khr)clSemaphoreHandleType);
     sema_props.push_back((cl_semaphore_properties_khr)
                              CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR);
     sema_props.push_back(
@@ -928,6 +957,83 @@ clExternalExportableSemaphore::clExternalExportableSemaphore(
         throw std::runtime_error(
             "clCreateSemaphoreWithPropertiesKHRptr failed! ");
     }
+
+    switch (m_externalHandleType)
+    {
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD: {
+            err = clGetSemaphoreHandleForTypeKHRptr(
+                m_externalSemaphore, m_device, clSemaphoreHandleType,
+                sizeof(int), &fd, nullptr);
+            if (err != CL_SUCCESS)
+            {
+                throw std::runtime_error("Failed to export OpenCL semaphore\n");
+            }
+
+            VkImportSemaphoreFdInfoKHR vkImportSemaphoreFdInfoKHR = {};
+            vkImportSemaphoreFdInfoKHR.sType =
+                VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR;
+            vkImportSemaphoreFdInfoKHR.semaphore = m_deviceSemaphore;
+            vkImportSemaphoreFdInfoKHR.fd = fd;
+            vkImportSemaphoreFdInfoKHR.pNext = nullptr;
+            vkImportSemaphoreFdInfoKHR.handleType =
+                VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+            vkImportSemaphoreFdInfoKHR.flags = 0;
+
+            if (vkImportSemaphoreFdKHR(m_deviceSemaphore.getDevice(),
+                                       &vkImportSemaphoreFdInfoKHR)
+                != VK_SUCCESS)
+            {
+                throw std::runtime_error(
+                    "Failed to import semaphore in Vulkan\n");
+            }
+            break;
+        }
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT: {
+            err = clGetSemaphoreHandleForTypeKHRptr(
+                m_externalSemaphore, m_device, clSemaphoreHandleType,
+                sizeof(void *), (void *)&handle, nullptr);
+            if (err != CL_SUCCESS)
+            {
+                throw std::runtime_error("Failed to export OpenCL semaphore\n");
+            }
+
+#ifdef _WIN32
+            VkImportSemaphoreWin32HandleInfoKHR
+                vkImportSemaphoreWin32HandleInfoKHR = {};
+            vkImportSemaphoreWin32HandleInfoKHR.sType =
+                VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR;
+            vkImportSemaphoreWin32HandleInfoKHR.pNext = nullptr;
+            vkImportSemaphoreWin32HandleInfoKHR.semaphore = m_deviceSemaphore;
+            vkImportSemaphoreWin32HandleInfoKHR.flags = 0;
+            vkImportSemaphoreWin32HandleInfoKHR.handleType =
+                (VkExternalSemaphoreHandleTypeFlagBits)m_externalHandleType;
+            vkImportSemaphoreWin32HandleInfoKHR.handle = (HANDLE)handle;
+            vkImportSemaphoreWin32HandleInfoKHR.name = nullptr;
+
+            if (vkImportSemaphoreWin32HandleKHR(
+                    m_deviceSemaphore.getDevice(),
+                    &vkImportSemaphoreWin32HandleInfoKHR)
+                != VK_SUCCESS)
+            {
+                throw std::runtime_error(
+                    "Failed to import semaphore in Vulkan\n");
+            }
+#else
+            log_error(
+                "Opaque D3DKMT and NT handles are only supported on Windows\n");
+            ASSERT(0);
+#endif
+            break;
+        }
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD:
+            // Do nothing, imported after each signal from OpenCL
+            break;
+        default:
+            log_error("Unsupported external semaphore handle type\n");
+            ASSERT(0);
+            break;
+    }
 }
 
 clExternalExportableSemaphore::~clExternalExportableSemaphore()
diff --git a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp
index 16389c44c7..43912d5a7e 100644
--- a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp
+++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp
@@ -26,6 +26,23 @@
 #include <OpenCL/cl_ext.h>
 #endif
 
+#define CREATE_OPENCL_SEMAPHORE(clSemaphore, vkSemaphore, ctx, handleType,     \
+                                devIdx, createExportable)                      \
+    if (!(createExportable                                                     \
+          && (check_external_semaphore_handle_type(                            \
+                  devIdx, getCLSemaphoreTypeFromVulkanType(handleType),        \
+                  CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR)                 \
+              == CL_SUCCESS)))                                                 \
+    {                                                                          \
+        clSemaphore = new clExternalImportableSemaphore(vkSemaphore, ctx,      \
+                                                        handleType, devIdx);   \
+    }                                                                          \
+    else                                                                       \
+    {                                                                          \
+        clSemaphore = new clExternalExportableSemaphore(vkSemaphore, ctx,      \
+                                                        handleType, devIdx);   \
+    }
+
 typedef cl_semaphore_khr (*pfnclCreateSemaphoreWithPropertiesKHR)(
     cl_context context, cl_semaphore_properties_khr *sema_props,
     cl_int *errcode_ret);
@@ -76,7 +93,9 @@ cl_int check_external_memory_handle_type(
     cl_external_memory_handle_type_khr requiredHandleType);
 cl_int check_external_semaphore_handle_type(
     cl_device_id deviceID,
-    cl_external_semaphore_handle_type_khr requiredHandleType);
+    cl_external_semaphore_handle_type_khr requiredHandleType,
+    cl_device_info queryParamName =
+        CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR);
 cl_int setMaxImageDimensions(cl_device_id deviceID, size_t &width,
                              size_t &height);
 
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp
index 70c0944ed4..412aa0a92d 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp
@@ -103,7 +103,8 @@
     VK_FUNC_DECL(vkGetPhysicalDeviceExternalSemaphorePropertiesKHR)
 #define VK_WINDOWS_FUNC_LIST                                                   \
     VK_FUNC_DECL(vkGetMemoryWin32HandleKHR)                                    \
-    VK_FUNC_DECL(vkGetSemaphoreWin32HandleKHR)
+    VK_FUNC_DECL(vkGetSemaphoreWin32HandleKHR)                                 \
+    VK_FUNC_DECL(vkImportSemaphoreWin32HandleKHR)
 
 #define vkEnumerateInstanceVersion _vkEnumerateInstanceVersion
 #define vkEnumerateInstanceExtensionProperties                                 \
@@ -198,5 +199,6 @@
     _vkGetPhysicalDeviceExternalSemaphorePropertiesKHR
 #define vkGetMemoryWin32HandleKHR _vkGetMemoryWin32HandleKHR
 #define vkGetSemaphoreWin32HandleKHR _vkGetSemaphoreWin32HandleKHR
+#define vkImportSemaphoreWin32HandleKHR _vkImportSemaphoreWin32HandleKHR
 
 #endif //_vulkan_api_list_hpp_
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp b/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp
index a5ca09018d..4b25e2cf18 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp
@@ -268,6 +268,57 @@ VulkanBufferList::~VulkanBufferList()
     }
 }
 
+//////////////////////////////////////
+// VulkanImage1DList implementation //
+//////////////////////////////////////
+
+VulkanImage1DList::VulkanImage1DList(const VulkanImage1DList &image1DList) {}
+
+VulkanImage1DList::VulkanImage1DList(
+    size_t numImages, std::vector<VulkanDeviceMemory *> &deviceMemory,
+    uint64_t baseOffset, uint64_t interImageOffset, const VulkanDevice &device,
+    VulkanFormat format, uint32_t width, uint32_t mipLevels,
+    VulkanImageTiling vulkanImageTiling,
+    VulkanExternalMemoryHandleType externalMemoryHandleType,
+    VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
+    VulkanSharingMode sharingMode)
+{
+    for (size_t i2DIdx = 0; i2DIdx < numImages; i2DIdx++)
+    {
+        VulkanImage1D *image2D = new VulkanImage1D(
+            device, format, width, vulkanImageTiling, mipLevels,
+            externalMemoryHandleType, imageCreateFlag, imageUsage, sharingMode);
+        add(*image2D);
+        deviceMemory[i2DIdx]->bindImage(
+            *image2D, baseOffset + (i2DIdx * interImageOffset));
+    }
+}
+
+VulkanImage1DList::VulkanImage1DList(
+    size_t numImages, const VulkanDevice &device, VulkanFormat format,
+    uint32_t width, VulkanImageTiling vulkanImageTiling, uint32_t mipLevels,
+    VulkanExternalMemoryHandleType externalMemoryHandleType,
+    VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
+    VulkanSharingMode sharingMode)
+{
+    for (size_t bIdx = 0; bIdx < numImages; bIdx++)
+    {
+        VulkanImage1D *image2D = new VulkanImage1D(
+            device, format, width, vulkanImageTiling, mipLevels,
+            externalMemoryHandleType, imageCreateFlag, imageUsage, sharingMode);
+        add(*image2D);
+    }
+}
+
+VulkanImage1DList::~VulkanImage1DList()
+{
+    for (size_t i2DIdx = 0; i2DIdx < m_wrapperList.size(); i2DIdx++)
+    {
+        VulkanImage1D &image2D = m_wrapperList[i2DIdx];
+        delete &image2D;
+    }
+}
+
 //////////////////////////////////////
 // VulkanImage2DList implementation //
 //////////////////////////////////////
@@ -319,6 +370,57 @@ VulkanImage2DList::~VulkanImage2DList()
     }
 }
 
+//////////////////////////////////////
+// VulkanImage3DList implementation //
+//////////////////////////////////////
+
+VulkanImage3DList::VulkanImage3DList(const VulkanImage3DList &image3DList) {}
+
+VulkanImage3DList::VulkanImage3DList(
+    size_t numImages, std::vector<VulkanDeviceMemory *> &deviceMemory,
+    uint64_t baseOffset, uint64_t interImageOffset, const VulkanDevice &device,
+    VulkanFormat format, uint32_t width, uint32_t height, uint32_t depth,
+    uint32_t mipLevels, VulkanImageTiling vulkanImageTiling,
+    VulkanExternalMemoryHandleType externalMemoryHandleType,
+    VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
+    VulkanSharingMode sharingMode)
+{
+    for (size_t i2DIdx = 0; i2DIdx < numImages; i2DIdx++)
+    {
+        VulkanImage3D *image3D = new VulkanImage3D(
+            device, format, width, height, depth, vulkanImageTiling, mipLevels,
+            externalMemoryHandleType, imageCreateFlag, imageUsage, sharingMode);
+        add(*image3D);
+        deviceMemory[i2DIdx]->bindImage(
+            *image3D, baseOffset + (i2DIdx * interImageOffset));
+    }
+}
+
+VulkanImage3DList::VulkanImage3DList(
+    size_t numImages, const VulkanDevice &device, VulkanFormat format,
+    uint32_t width, uint32_t height, uint32_t depth,
+    VulkanImageTiling vulkanImageTiling, uint32_t mipLevels,
+    VulkanExternalMemoryHandleType externalMemoryHandleType,
+    VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
+    VulkanSharingMode sharingMode)
+{
+    for (size_t bIdx = 0; bIdx < numImages; bIdx++)
+    {
+        VulkanImage3D *image3D = new VulkanImage3D(
+            device, format, width, height, depth, vulkanImageTiling, mipLevels,
+            externalMemoryHandleType, imageCreateFlag, imageUsage, sharingMode);
+        add(*image3D);
+    }
+}
+
+VulkanImage3DList::~VulkanImage3DList()
+{
+    for (size_t i2DIdx = 0; i2DIdx < m_wrapperList.size(); i2DIdx++)
+    {
+        VulkanImage3D &image3D = m_wrapperList[i2DIdx];
+        delete &image3D;
+    }
+}
 ////////////////////////////////////////
 // VulkanImageViewList implementation //
 ////////////////////////////////////////
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp b/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp
index 7dd099c090..2ec57256d7 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp
@@ -202,6 +202,35 @@ class VulkanBufferList : public VulkanList<VulkanBuffer, VkBuffer> {
     virtual ~VulkanBufferList();
 };
 
+class VulkanImage1DList : public VulkanList<VulkanImage1D, VkImage> {
+protected:
+    VulkanImage1DList(const VulkanImage1DList &image1DList);
+
+public:
+    VulkanImage1DList(
+        size_t numImages, std::vector<VulkanDeviceMemory *> &deviceMemory,
+        uint64_t baseOffset, uint64_t interImageOffset,
+        const VulkanDevice &device, VulkanFormat format, uint32_t width,
+        uint32_t mipLevels, VulkanImageTiling vulkanImageTiling,
+        VulkanExternalMemoryHandleType externalMemoryHandleType =
+            VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+        VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
+        VulkanImageUsage imageUsage =
+            VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST,
+        VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
+    VulkanImage1DList(
+        size_t numImages, const VulkanDevice &device, VulkanFormat format,
+        uint32_t width, VulkanImageTiling vulkanImageTiling,
+        uint32_t mipLevels = 1,
+        VulkanExternalMemoryHandleType externalMemoryHandleType =
+            VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+        VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
+        VulkanImageUsage imageUsage =
+            VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST,
+        VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
+    virtual ~VulkanImage1DList();
+};
+
 class VulkanImage2DList : public VulkanList<VulkanImage2D, VkImage> {
 protected:
     VulkanImage2DList(const VulkanImage2DList &image2DList);
@@ -232,6 +261,36 @@ class VulkanImage2DList : public VulkanList<VulkanImage2D, VkImage> {
     virtual ~VulkanImage2DList();
 };
 
+class VulkanImage3DList : public VulkanList<VulkanImage3D, VkImage> {
+protected:
+    VulkanImage3DList(const VulkanImage3DList &image3DList);
+
+public:
+    VulkanImage3DList(
+        size_t numImages, std::vector<VulkanDeviceMemory *> &deviceMemory,
+        uint64_t baseOffset, uint64_t interImageOffset,
+        const VulkanDevice &device, VulkanFormat format, uint32_t width,
+        uint32_t height, uint32_t depth, uint32_t mipLevels,
+        VulkanImageTiling vulkanImageTiling,
+        VulkanExternalMemoryHandleType externalMemoryHandleType =
+            VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+        VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
+        VulkanImageUsage imageUsage =
+            VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST,
+        VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
+    VulkanImage3DList(
+        size_t numImages, const VulkanDevice &device, VulkanFormat format,
+        uint32_t width, uint32_t height, uint32_t depth,
+        VulkanImageTiling vulkanImageTiling, uint32_t mipLevels = 1,
+        VulkanExternalMemoryHandleType externalMemoryHandleType =
+            VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+        VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
+        VulkanImageUsage imageUsage =
+            VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST,
+        VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
+    virtual ~VulkanImage3DList();
+};
+
 class VulkanImageViewList : public VulkanList<VulkanImageView, VkImageView> {
 protected:
     VulkanImageViewList(const VulkanImageViewList &imageViewList);
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
index b187181f3f..d175ba6fb4 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
@@ -1810,6 +1810,36 @@ const VulkanMemoryTypeList &VulkanImage::getMemoryTypeList() const
 
 VulkanImage::operator VkImage() const { return m_vkImage; }
 
+//////////////////////////////////
+// VulkanImage1D implementation //
+//////////////////////////////////
+
+VulkanImage1D::VulkanImage1D(const VulkanImage1D &image1D): VulkanImage(image1D)
+{}
+
+VulkanImage1D::VulkanImage1D(
+    const VulkanDevice &device, VulkanFormat format, uint32_t width,
+    VulkanImageTiling imageTiling, uint32_t numMipLevels,
+    VulkanExternalMemoryHandleType externalMemoryHandleType,
+    VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
+    VulkanSharingMode sharingMode)
+    : VulkanImage(device, VULKAN_IMAGE_TYPE_1D, format,
+                  VulkanExtent3D(width, 1, 1), numMipLevels, 1,
+                  externalMemoryHandleType, imageCreateFlag, imageTiling,
+                  imageUsage, sharingMode)
+{}
+
+VulkanImage1D::~VulkanImage1D() {}
+
+VulkanExtent3D VulkanImage1D::getExtent3D(uint32_t mipLevel) const
+{
+    uint32_t width = std::max(m_extent3D.getWidth() >> mipLevel, uint32_t(1));
+    uint32_t height = 1;
+    uint32_t depth = 1;
+
+    return VulkanExtent3D(width, height, depth);
+}
+
 //////////////////////////////////
 // VulkanImage2D implementation //
 //////////////////////////////////
@@ -1840,6 +1870,37 @@ VulkanExtent3D VulkanImage2D::getExtent3D(uint32_t mipLevel) const
     return VulkanExtent3D(width, height, depth);
 }
 
+//////////////////////////////////
+// VulkanImage3D implementation //
+//////////////////////////////////
+
+VulkanImage3D::VulkanImage3D(const VulkanImage3D &image3D): VulkanImage(image3D)
+{}
+
+VulkanImage3D::VulkanImage3D(
+    const VulkanDevice &device, VulkanFormat format, uint32_t width,
+    uint32_t height, uint32_t depth, VulkanImageTiling imageTiling,
+    uint32_t numMipLevels,
+    VulkanExternalMemoryHandleType externalMemoryHandleType,
+    VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
+    VulkanSharingMode sharingMode)
+    : VulkanImage(device, VULKAN_IMAGE_TYPE_3D, format,
+                  VulkanExtent3D(width, height, depth), numMipLevels, 1,
+                  externalMemoryHandleType, imageCreateFlag, imageTiling,
+                  imageUsage, sharingMode)
+{}
+
+VulkanImage3D::~VulkanImage3D() {}
+
+VulkanExtent3D VulkanImage3D::getExtent3D(uint32_t mipLevel) const
+{
+    uint32_t width = std::max(m_extent3D.getWidth() >> mipLevel, uint32_t(1));
+    uint32_t height = std::max(m_extent3D.getHeight() >> mipLevel, uint32_t(1));
+    uint32_t depth = std::max(m_extent3D.getDepth() >> mipLevel, uint32_t(1));
+
+    return VulkanExtent3D(width, height, depth);
+}
+
 ////////////////////////////////////
 // VulkanImageView implementation //
 ////////////////////////////////////
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
index a520dceea7..f9f547b829 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
@@ -494,6 +494,47 @@ class VulkanImage {
     operator VkImage() const;
 };
 
+class VulkanImage1D : public VulkanImage {
+protected:
+    VkImageView m_vkImageView;
+
+public:
+    VulkanImage1D(
+        const VulkanDevice &device, VulkanFormat format, uint32_t width,
+        VulkanImageTiling imageTiling, uint32_t numMipLevels = 1,
+        VulkanExternalMemoryHandleType externalMemoryHandleType =
+            VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+        VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
+        VulkanImageUsage imageUsage =
+            VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST,
+        VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
+    virtual ~VulkanImage1D();
+    virtual VulkanExtent3D getExtent3D(uint32_t mipLevel = 0) const;
+
+    VulkanImage1D(const VulkanImage1D &image1D);
+};
+
+class VulkanImage3D : public VulkanImage {
+protected:
+    VkImageView m_vkImageView;
+
+public:
+    VulkanImage3D(
+        const VulkanDevice &device, VulkanFormat format, uint32_t width,
+        uint32_t height, uint32_t depth, VulkanImageTiling imageTiling,
+        uint32_t numMipLevels = 1,
+        VulkanExternalMemoryHandleType externalMemoryHandleType =
+            VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+        VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
+        VulkanImageUsage imageUsage =
+            VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST,
+        VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
+    virtual ~VulkanImage3D();
+    virtual VulkanExtent3D getExtent3D(uint32_t mipLevel = 0) const;
+
+    VulkanImage3D(const VulkanImage3D &image3D);
+};
+
 class VulkanImage2D : public VulkanImage {
 protected:
     VkImageView m_vkImageView;
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp
index 86d7381322..5164ade5cc 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp
@@ -44,7 +44,9 @@ class VulkanBuffer;
 class VulkanOffset3D;
 class VulkanExtent3D;
 class VulkanImage;
+class VulkanImage1D;
 class VulkanImage2D;
+class VulkanImage3D;
 class VulkanImageView;
 class VulkanDeviceMemory;
 class VulkanSemaphore;
@@ -59,7 +61,9 @@ class VulkanQueueList;
 class VulkanCommandBufferList;
 class VulkanDescriptorSetLayoutList;
 class VulkanBufferList;
+class VulkanImage1DList;
 class VulkanImage2DList;
+class VulkanImage3DList;
 class VulkanImageViewList;
 class VulkanDeviceMemoryList;
 class VulkanSemaphoreList;
diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index 623c2dbfb7..ffd0d6a099 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -53,6 +53,7 @@ const char *known_extensions[] = {
     "cl_khr_extended_bit_ops",
     "cl_khr_integer_dot_product",
     "cl_khr_subgroup_rotate",
+    "cl_khr_kernel_clock",
     // API-only extensions after this point.  If you add above here, modify
     // first_API_extension below.
     "cl_khr_icd",
@@ -94,7 +95,7 @@ const char *known_extensions[] = {
 };
 
 size_t num_known_extensions = ARRAY_SIZE(known_extensions);
-size_t first_API_extension = 31;
+size_t first_API_extension = 32;
 
 const char *known_embedded_extensions[] = {
     "cles_khr_int64",
diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp
index e382b38e4c..478f19f4b9 100644
--- a/test_conformance/computeinfo/main.cpp
+++ b/test_conformance/computeinfo/main.cpp
@@ -188,6 +188,7 @@ config_info config_infos[] = {
     CONFIG_INFO(2, 0, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, cl_uint),
 
     CONFIG_INFO(1, 1, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint),
+    CONFIG_INFO(1, 1, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config),
     CONFIG_INFO(1, 1, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config),
     CONFIG_INFO(1, 1, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config),
     CONFIG_INFO(1, 1, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE,
@@ -1452,5 +1453,9 @@ int main(int argc, const char** argv)
         }
     }
 
-    return runTestHarness(argCount, argList, test_num, test_list, true, 0);
+    int error = runTestHarness(argCount, argList, test_num, test_list, true, 0);
+
+    free(argList);
+
+    return error;
 }
diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp
index 1020638af9..a44a363666 100644
--- a/test_conformance/conversions/basic_test_conversions.cpp
+++ b/test_conformance/conversions/basic_test_conversions.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
+// Copyright (c) 2017-2024 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -48,6 +48,7 @@
 
 #include <vector>
 #include <type_traits>
+#include <cmath>
 
 #include "basic_test_conversions.h"
 
@@ -86,9 +87,13 @@ int gWimpyReductionFactor = 128;
 int gSkipTesting = 0;
 int gForceFTZ = 0;
 int gIsRTZ = 0;
+int gForceHalfFTZ = 0;
+int gIsHalfRTZ = 0;
 uint32_t gSimdSize = 1;
 int gHasDouble = 0;
 int gTestDouble = 1;
+int gHasHalfs = 0;
+int gTestHalfs = 1;
 const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" };
 int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 };
 int gMinVectorSize = 0;
@@ -100,6 +105,8 @@ int argCount = 0;
 
 double SubtractTime(uint64_t endTime, uint64_t startTime);
 
+cl_half_rounding_mode DataInitInfo::halfRoundingMode = CL_HALF_RTE;
+cl_half_rounding_mode ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTE;
 
 // clang-format off
 // for readability sake keep this section unformatted
@@ -256,8 +263,30 @@ std::vector<double> DataInitInfo::specialValuesDouble = {
     MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30),
     MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10),
 };
-// clang-format on
 
+// A table of more difficult cases to get right
+std::vector<cl_half> DataInitInfo::specialValuesHalf = {
+    0xffff,
+    0x0000,
+    0x0001,
+    0x7c00, /*INFINITY*/
+    0xfc00, /*-INFINITY*/
+    0x8000, /*-0*/
+    0x7bff, /*HALF_MAX*/
+    0x0400, /*HALF_MIN*/
+    0x03ff, /* Largest denormal */
+    0x3c00, /* 1 */
+    0xbc00, /* -1 */
+    0x3555, /*nearest value to 1/3*/
+    0x3bff, /*largest number less than one*/
+    0xc000, /* -2 */
+    0xfbff, /* -HALF_MAX */
+    0x8400, /* -HALF_MIN */
+    0x4248, /* M_PI_H */
+    0xc248, /* -M_PI_H */
+    0xbbff, /* Largest negative fraction */
+};
+// clang-format on
 
 // Windows (since long double got deprecated) sets the x87 to 53-bit precision
 // (that's x87 default state).  This causes problems with the tests that
@@ -282,15 +311,32 @@ static inline void Force64BitFPUPrecision(void)
 #endif
 }
 
-
-template <typename InType, typename OutType>
-int CalcRefValsPat<InType, OutType>::check_result(void *test, uint32_t count,
-                                                  int vectorSize)
+template <typename InType, typename OutType, bool InFP, bool OutFP>
+int CalcRefValsPat<InType, OutType, InFP, OutFP>::check_result(void *test,
+                                                               uint32_t count,
+                                                               int vectorSize)
 {
     const cl_uchar *a = (const cl_uchar *)gAllowZ;
 
-    if (std::is_integral<OutType>::value)
-    { // char/uchar/short/ushort/int/uint/long/ulong
+    if (is_half<OutType, OutFP>())
+    {
+        const cl_half *t = (const cl_half *)test;
+        const cl_half *c = (const cl_half *)gRef;
+
+        for (uint32_t i = 0; i < count; i++)
+            if (t[i] != c[i] &&
+                // Allow nan's to be binary different
+                !((t[i] & 0x7fff) > 0x7C00 && (c[i] & 0x7fff) > 0x7C00)
+                && !(a[i] != (cl_uchar)0 && t[i] == (c[i] & 0x8000)))
+            {
+                vlog(
+                    "\nError for vector size %d found at 0x%8.8x:  *%a vs %a\n",
+                    vectorSize, i, HTF(c[i]), HTF(t[i]));
+                return i + 1;
+            }
+    }
+    else if (std::is_integral<OutType>::value)
+    { // char/uchar/short/ushort/half/int/uint/long/ulong
         const OutType *t = (const OutType *)test;
         const OutType *c = (const OutType *)gRef;
         for (uint32_t i = 0; i < count; i++)
@@ -388,6 +434,20 @@ cl_int CustomConversionsTest::Run()
             continue;
         }
 
+        // skip half if we don't have it
+        if (!gTestHalfs && (inType == khalf || outType == khalf))
+        {
+            if (gHasHalfs)
+            {
+                vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
+                           gTypeNames[outType], gSaturationNames[sat],
+                           gRoundingModeNames[round], gTypeNames[inType]);
+                vlog("\t\tcl_khr_fp16 enabled, but half testing turned "
+                     "off.\n");
+            }
+            continue;
+        }
+
         // skip longs on embedded
         if (!gHasLong
             && (inType == klong || outType == klong || inType == kulong
@@ -427,8 +487,8 @@ ConversionsTest::ConversionsTest(cl_device_id device, cl_context context,
                                  cl_command_queue queue)
     : context(context), device(device), queue(queue), num_elements(0),
       typeIterator({ cl_uchar(0), cl_char(0), cl_ushort(0), cl_short(0),
-                     cl_uint(0), cl_int(0), cl_float(0), cl_double(0),
-                     cl_ulong(0), cl_long(0) })
+                     cl_uint(0), cl_int(0), cl_half(0), cl_float(0),
+                     cl_double(0), cl_ulong(0), cl_long(0) })
 {}
 
 
@@ -445,11 +505,31 @@ cl_int ConversionsTest::Run()
 cl_int ConversionsTest::SetUp(int elements)
 {
     num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+    {
+        const cl_device_fp_config fpConfigHalf =
+            get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG);
+        if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0)
+        {
+            DataInitInfo::halfRoundingMode = CL_HALF_RTE;
+            ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTE;
+        }
+        else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0)
+        {
+            DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
+            ConversionsTest::defaultHalfRoundingMode = CL_HALF_RTZ;
+        }
+        else
+        {
+            log_error("Error while acquiring half rounding mode");
+            return TEST_FAIL;
+        }
+    }
+
     return CL_SUCCESS;
 }
 
-
-template <typename InType, typename OutType>
+template <typename InType, typename OutType, bool InFP, bool OutFP>
 void ConversionsTest::TestTypesConversion(const Type &inType,
                                           const Type &outType, int &testNumber,
                                           int startMinVectorSize)
@@ -470,7 +550,8 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
          sat = (SaturationMode)(sat + 1))
     {
         // skip illegal saturated conversions to float type
-        if (kSaturated == sat && (outType == kfloat || outType == kdouble))
+        if (kSaturated == sat
+            && (outType == kfloat || outType == kdouble || outType == khalf))
         {
             continue;
         }
@@ -507,6 +588,20 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
                 continue;
             }
 
+            // skip half if we don't have it
+            if (!gTestHalfs && (inType == khalf || outType == khalf))
+            {
+                if (gHasHalfs)
+                {
+                    vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
+                               gTypeNames[outType], gSaturationNames[sat],
+                               gRoundingModeNames[round], gTypeNames[inType]);
+                    vlog("\t\tcl_khr_fp16 enabled, but half testing turned "
+                         "off.\n");
+                }
+                continue;
+            }
+
             // Skip the implicit converts if the rounding mode is
             // not default or test is saturated
             if (0 == startMinVectorSize)
@@ -517,7 +612,8 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
                     gMinVectorSize = 0;
             }
 
-            if ((error = DoTest<InType, OutType>(outType, inType, sat, round)))
+            if ((error = DoTest<InType, OutType, InFP, OutFP>(outType, inType,
+                                                              sat, round)))
             {
                 vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
                            "FAILED ** \n",
@@ -529,8 +625,7 @@ void ConversionsTest::TestTypesConversion(const Type &inType,
     }
 }
 
-
-template <typename InType, typename OutType>
+template <typename InType, typename OutType, bool InFP, bool OutFP>
 int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
                             RoundingMode round)
 {
@@ -541,7 +636,7 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
     cl_uint threads = GetThreadCount();
 
     DataInitInfo info = { 0, 0, outType, inType, sat, round, threads };
-    DataInfoSpec<InType, OutType> init_info(info);
+    DataInfoSpec<InType, OutType, InFP, OutFP> init_info(info);
     WriteInputBufferInfo writeInputBufferInfo;
     int vectorSize;
     int error = 0;
@@ -564,7 +659,7 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
     for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
     {
         writeInputBufferInfo.calcInfo[vectorSize].reset(
-            new CalcRefValsPat<InType, OutType>());
+            new CalcRefValsPat<InType, OutType, InFP, OutFP>());
         writeInputBufferInfo.calcInfo[vectorSize]->program =
             conv_test::MakeProgram(
                 outType, inType, sat, round, vectorSize,
@@ -597,6 +692,11 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
         if (round == kDefaultRoundingMode && gIsRTZ)
             init_info.round = round = kRoundTowardZero;
     }
+    else if (std::is_same<OutType, cl_half>::value && OutFP)
+    {
+        if (round == kDefaultRoundingMode && gIsHalfRTZ)
+            init_info.round = round = kRoundTowardZero;
+    }
 
     // Figure out how many elements are in a work block
     // we handle 64-bit types a bit differently.
@@ -764,6 +864,10 @@ int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
                         vlog("Input value: 0x%8.8x ",
                              ((unsigned int *)gIn)[error - 1]);
                         break;
+                    case khalf:
+                        vlog("Input value: %a ",
+                             HTF(((cl_half *)gIn)[error - 1]));
+                        break;
                     case kfloat:
                         vlog("Input value: %a ", ((float *)gIn)[error - 1]);
                         break;
@@ -901,16 +1005,6 @@ double SubtractTime(uint64_t endTime, uint64_t startTime)
 }
 #endif
 
-////////////////////////////////////////////////////////////////////////////////
-
-static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count)
-{
-    cl_uint i;
-    for (i = 0; i < count; ++i)
-        allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0);
-}
-
-
 void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &ptr);
 
 void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
@@ -951,6 +1045,112 @@ void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &info)
     // destroyed automatically soon after we exit.
 }
 
+template <typename T> static bool isnan_fp(const T &v)
+{
+    if (std::is_same<T, cl_half>::value)
+    {
+        uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
+        uint16_t h_mant = ((cl_half)v) & 0x3FF;
+        return (h_exp == 0x1F && h_mant != 0);
+    }
+    else
+    {
+#if !defined(_WIN32)
+        return std::isnan(v);
+#else
+        return _isnan(v);
+#endif
+    }
+}
+
+template <typename InType>
+void ZeroNanToIntCases(cl_uint count, void *mapped, Type outType)
+{
+    InType *inp = (InType *)gIn;
+    for (auto j = 0; j < count; j++)
+    {
+        if (isnan_fp<InType>(inp[j]))
+            memset((char *)mapped + j * gTypeSizes[outType], 0,
+                   gTypeSizes[outType]);
+    }
+}
+
+template <typename InType, typename OutType>
+void FixNanToFltConversions(InType *inp, OutType *outp, cl_uint count)
+{
+    if (std::is_same<OutType, cl_half>::value)
+    {
+        for (auto j = 0; j < count; j++)
+            if (isnan_fp(inp[j]) && isnan_fp(outp[j]))
+                outp[j] = 0x7e00; // HALF_NAN
+    }
+    else
+    {
+        for (auto j = 0; j < count; j++)
+            if (isnan_fp(inp[j]) && isnan_fp(outp[j])) outp[j] = NAN;
+    }
+}
+
+void FixNanConversions(Type outType, Type inType, void *d, cl_uint count)
+{
+    if (outType != kfloat && outType != kdouble && outType != khalf)
+    {
+        if (inType == kfloat)
+            ZeroNanToIntCases<float>(count, d, outType);
+        else if (inType == kdouble)
+            ZeroNanToIntCases<double>(count, d, outType);
+        else if (inType == khalf)
+            ZeroNanToIntCases<cl_half>(count, d, outType);
+    }
+    else if (inType == kfloat || inType == kdouble || inType == khalf)
+    {
+        // outtype and intype is float or double or half.  NaN conversions for
+        // float/double/half could be any NaN
+        if (inType == kfloat)
+        {
+            float *inp = (float *)gIn;
+            if (outType == kdouble)
+            {
+                double *outp = (double *)d;
+                FixNanToFltConversions(inp, outp, count);
+            }
+            else if (outType == khalf)
+            {
+                cl_half *outp = (cl_half *)d;
+                FixNanToFltConversions(inp, outp, count);
+            }
+        }
+        else if (inType == kdouble)
+        {
+            double *inp = (double *)gIn;
+            if (outType == kfloat)
+            {
+                float *outp = (float *)d;
+                FixNanToFltConversions(inp, outp, count);
+            }
+            else if (outType == khalf)
+            {
+                cl_half *outp = (cl_half *)d;
+                FixNanToFltConversions(inp, outp, count);
+            }
+        }
+        else if (inType == khalf)
+        {
+            cl_half *inp = (cl_half *)gIn;
+            if (outType == kfloat)
+            {
+                float *outp = (float *)d;
+                FixNanToFltConversions(inp, outp, count);
+            }
+            else if (outType == kdouble)
+            {
+                double *outp = (double *)d;
+                FixNanToFltConversions(inp, outp, count);
+            }
+        }
+    }
+}
+
 
 void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
                                              void *data)
@@ -963,7 +1163,6 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
     Type outType =
         info->parent->outType; // the data type of the conversion result
     Type inType = info->parent->inType; // the data type of the conversion input
-    size_t j;
     cl_int error;
     cl_event doneBarrier = info->parent->doneBarrier;
 
@@ -985,51 +1184,7 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
 
     // Patch up NaNs conversions to integer to zero -- these can be converted to
     // any integer
-    if (outType != kfloat && outType != kdouble)
-    {
-        if (inType == kfloat)
-        {
-            float *inp = (float *)gIn;
-            for (j = 0; j < count; j++)
-            {
-                if (isnan(inp[j]))
-                    memset((char *)mapped + j * gTypeSizes[outType], 0,
-                           gTypeSizes[outType]);
-            }
-        }
-        if (inType == kdouble)
-        {
-            double *inp = (double *)gIn;
-            for (j = 0; j < count; j++)
-            {
-                if (isnan(inp[j]))
-                    memset((char *)mapped + j * gTypeSizes[outType], 0,
-                           gTypeSizes[outType]);
-            }
-        }
-    }
-    else if (inType == kfloat || inType == kdouble)
-    { // outtype and intype is float or double.  NaN conversions for float <->
-      // double can be any NaN
-        if (inType == kfloat && outType == kdouble)
-        {
-            float *inp = (float *)gIn;
-            double *outp = (double *)mapped;
-            for (j = 0; j < count; j++)
-            {
-                if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
-            }
-        }
-        if (inType == kdouble && outType == kfloat)
-        {
-            double *inp = (double *)gIn;
-            float *outp = (float *)mapped;
-            for (j = 0; j < count; j++)
-            {
-                if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
-            }
-        }
-    }
+    FixNanConversions(outType, inType, mapped, count);
 
     if (memcmp(mapped, gRef, count * gTypeSizes[outType]))
         info->result =
@@ -1077,12 +1232,8 @@ void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
     // CalcReferenceValuesComplete exit.
 }
 
-//
-
 namespace conv_test {
 
-////////////////////////////////////////////////////////////////////////////////
-
 cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
 {
     DataInitBase *info = (DataInitBase *)p;
@@ -1092,8 +1243,6 @@ cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
     return CL_SUCCESS;
 }
 
-////////////////////////////////////////////////////////////////////////////////
-
 cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
 {
     DataInitBase *info = (DataInitBase *)p;
@@ -1102,7 +1251,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
     Type inType = info->inType;
     Type outType = info->outType;
     RoundingMode round = info->round;
-    size_t j;
 
     Force64BitFPUPrecision();
 
@@ -1110,7 +1258,6 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
     void *a = (cl_uchar *)gAllowZ + job_id * count;
     void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType];
 
-
     if (outType != inType)
     {
         // create the reference while we wait
@@ -1144,7 +1291,33 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
         qcom_sat = info->sat;
 #endif
 
-        RoundingMode oldRound = set_round(round, outType);
+        RoundingMode oldRound;
+        if (outType == khalf)
+        {
+            oldRound = set_round(kRoundToNearestEven, kfloat);
+            switch (round)
+            {
+                default:
+                case kDefaultRoundingMode:
+                    DataInitInfo::halfRoundingMode =
+                        ConversionsTest::defaultHalfRoundingMode;
+                    break;
+                case kRoundToNearestEven:
+                    DataInitInfo::halfRoundingMode = CL_HALF_RTE;
+                    break;
+                case kRoundUp:
+                    DataInitInfo::halfRoundingMode = CL_HALF_RTP;
+                    break;
+                case kRoundDown:
+                    DataInitInfo::halfRoundingMode = CL_HALF_RTN;
+                    break;
+                case kRoundTowardZero:
+                    DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
+                    break;
+            }
+        }
+        else
+            oldRound = set_round(round, outType);
 
         if (info->sat)
             info->conv_array_sat(d, s, count);
@@ -1156,10 +1329,13 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
         // Decide if we allow a zero result in addition to the correctly rounded
         // one
         memset(a, 0, count);
-        if (gForceFTZ)
+        if (gForceFTZ && (inType == kfloat || outType == kfloat))
         {
-            if (inType == kfloat || outType == kfloat)
-                setAllowZ((uint8_t *)a, (uint32_t *)s, count);
+            info->set_allow_zero_array((uint8_t *)a, d, s, count);
+        }
+        if (gForceHalfFTZ && (inType == khalf || outType == khalf))
+        {
+            info->set_allow_zero_array((uint8_t *)a, d, s, count);
         }
     }
     else
@@ -1170,55 +1346,11 @@ cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
 
     // Patch up NaNs conversions to integer to zero -- these can be converted to
     // any integer
-    if (info->outType != kfloat && info->outType != kdouble)
-    {
-        if (inType == kfloat)
-        {
-            float *inp = (float *)s;
-            for (j = 0; j < count; j++)
-            {
-                if (isnan(inp[j]))
-                    memset((char *)d + j * gTypeSizes[outType], 0,
-                           gTypeSizes[outType]);
-            }
-        }
-        if (inType == kdouble)
-        {
-            double *inp = (double *)s;
-            for (j = 0; j < count; j++)
-            {
-                if (isnan(inp[j]))
-                    memset((char *)d + j * gTypeSizes[outType], 0,
-                           gTypeSizes[outType]);
-            }
-        }
-    }
-    else if (inType == kfloat || inType == kdouble)
-    { // outtype and intype is float or double.  NaN conversions for float <->
-      // double can be any NaN
-        if (inType == kfloat && outType == kdouble)
-        {
-            float *inp = (float *)s;
-            for (j = 0; j < count; j++)
-            {
-                if (isnan(inp[j])) ((double *)d)[j] = NAN;
-            }
-        }
-        if (inType == kdouble && outType == kfloat)
-        {
-            double *inp = (double *)s;
-            for (j = 0; j < count; j++)
-            {
-                if (isnan(inp[j])) ((float *)d)[j] = NAN;
-            }
-        }
-    }
+    FixNanConversions(outType, inType, d, count);
 
     return CL_SUCCESS;
 }
 
-////////////////////////////////////////////////////////////////////////////////
-
 uint64_t GetTime(void)
 {
 #if defined(__APPLE__)
@@ -1233,8 +1365,6 @@ uint64_t GetTime(void)
 #endif
 }
 
-////////////////////////////////////////////////////////////////////////////////
-
 // Note: not called reentrantly
 void WriteInputBufferComplete(void *data)
 {
@@ -1295,8 +1425,6 @@ void WriteInputBufferComplete(void *data)
     // automatically soon after we exit.
 }
 
-////////////////////////////////////////////////////////////////////////////////
-
 cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
                        RoundingMode round, int vectorSize, cl_kernel *outKernel)
 {
@@ -1308,6 +1436,9 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
     if (outType == kdouble || inType == kdouble)
         source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
 
+    if (outType == khalf || inType == khalf)
+        source << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+
     // Create the program. This is a bit complicated because we are trying to
     // avoid byte and short stores.
     if (0 == vectorSize)
@@ -1408,7 +1539,7 @@ cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
     *outKernel = NULL;
 
     const char *flags = NULL;
-    if (gForceFTZ) flags = "-cl-denorms-are-zero";
+    if (gForceFTZ || gForceHalfFTZ) flags = "-cl-denorms-are-zero";
 
     // build it
     std::string sourceString = source.str();
diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h
index 2314ee748b..c7b24b7089 100644
--- a/test_conformance/conversions/basic_test_conversions.h
+++ b/test_conformance/conversions/basic_test_conversions.h
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2023 The Khronos Group Inc.
+// Copyright (c) 2023-2024 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -30,6 +30,8 @@
     #include <CL/opencl.h>
 #endif
 
+#include <CL/cl_half.h>
+
 #include "harness/mt19937.h"
 #include "harness/testHarness.h"
 #include "harness/typeWrappers.h"
@@ -76,6 +78,8 @@ extern cl_mem gInBuffer;
 extern cl_mem gOutBuffers[];
 extern int gHasDouble;
 extern int gTestDouble;
+extern int gHasHalfs;
+extern int gTestHalfs;
 extern int gWimpyMode;
 extern int gWimpyReductionFactor;
 extern int gSkipTesting;
@@ -87,6 +91,8 @@ extern int gReportAverageTimes;
 extern int gStartTestNumber;
 extern int gEndTestNumber;
 extern int gIsRTZ;
+extern int gForceHalfFTZ;
+extern int gIsHalfRTZ;
 extern void *gIn;
 extern void *gRef;
 extern void *gAllowZ;
@@ -135,7 +141,7 @@ struct CalcRefValsBase
     cl_int result;
 };
 
-template <typename InType, typename OutType>
+template <typename InType, typename OutType, bool InFP, bool OutFP>
 struct CalcRefValsPat : CalcRefValsBase
 {
     int check_result(void *, uint32_t, int) override;
@@ -162,8 +168,12 @@ struct WriteInputBufferInfo
 };
 
 // Must be aligned with Type enums!
-using TypeIter = std::tuple<cl_uchar, cl_char, cl_ushort, cl_short, cl_uint,
-                            cl_int, cl_float, cl_double, cl_ulong, cl_long>;
+using TypeIter =
+    std::tuple<cl_uchar, cl_char, cl_ushort, cl_short, cl_uint, cl_int, cl_half,
+               cl_float, cl_double, cl_ulong, cl_long>;
+
+// hardcoded solution needed due to typeid confusing cl_ushort/cl_half
+constexpr bool isTypeFp[] = { 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0 };
 
 // Helper test fixture for constructing OpenCL objects used in testing
 // a variety of simple command-buffer enqueue scenarios.
@@ -179,13 +189,13 @@ struct ConversionsTest
     // Test body returning an OpenCL error code
     cl_int Run();
 
-    template <typename InType, typename OutType>
+    template <typename InType, typename OutType, bool InFP, bool OutFP>
     int DoTest(Type outType, Type inType, SaturationMode sat,
                RoundingMode round);
 
-    template <typename InType, typename OutType>
+    template <typename InType, typename OutType, bool InFP, bool OutFP>
     void TestTypesConversion(const Type &inType, const Type &outType, int &tn,
-                             const int smvs);
+                             int startMinVectorSize);
 
 protected:
     cl_context context;
@@ -195,6 +205,9 @@ struct ConversionsTest
     size_t num_elements;
 
     TypeIter typeIterator;
+
+public:
+    static cl_half_rounding_mode defaultHalfRoundingMode;
 };
 
 struct CustomConversionsTest : ConversionsTest
@@ -221,17 +234,18 @@ int MakeAndRunTest(cl_device_id device, cl_context context,
 
 struct TestType
 {
-    template <typename T> bool testType(Type in)
+    template <typename T, bool FP> bool testType(Type in)
     {
         switch (in)
         {
             default: return false;
             case kuchar: return std::is_same<cl_uchar, T>::value;
             case kchar: return std::is_same<cl_char, T>::value;
-            case kushort: return std::is_same<cl_ushort, T>::value;
+            case kushort: return std::is_same<cl_ushort, T>::value && !FP;
             case kshort: return std::is_same<cl_short, T>::value;
             case kuint: return std::is_same<cl_uint, T>::value;
             case kint: return std::is_same<cl_int, T>::value;
+            case khalf: return std::is_same<cl_half, T>::value && FP;
             case kfloat: return std::is_same<cl_float, T>::value;
             case kdouble: return std::is_same<cl_double, T>::value;
             case kulong: return std::is_same<cl_ulong, T>::value;
@@ -263,13 +277,15 @@ struct IterOverTypes : public TestType
               typename InType>
     void iterate_in_type(const InType &t)
     {
-        if (!testType<InType>(inType)) vlog_error("Unexpected data type!\n");
+        if (!testType<InType, isTypeFp[In]>(inType))
+            vlog_error("Unexpected data type!\n");
 
-        if (!testType<OutType>(outType)) vlog_error("Unexpected data type!\n");
+        if (!testType<OutType, isTypeFp[Out]>(outType))
+            vlog_error("Unexpected data type!\n");
 
         // run the conversions
-        test.TestTypesConversion<InType, OutType>(inType, outType, testNumber,
-                                                  startMinVectorSize);
+        test.TestTypesConversion<InType, OutType, isTypeFp[In], isTypeFp[Out]>(
+            inType, outType, testNumber, startMinVectorSize);
         inType = (Type)(inType + 1);
     }
 
@@ -337,11 +353,13 @@ struct IterOverSelectedTypes : public TestType
               typename InType>
     void iterate_in_type(const InType &t)
     {
-        if (testType<InType>(inType) && testType<OutType>(outType))
+        if (testType<InType, isTypeFp[In]>(inType)
+            && testType<OutType, isTypeFp[Out]>(outType))
         {
             // run selected conversion
             // testing of the result will happen afterwards
-            test.DoTest<InType, OutType>(outType, inType, saturation, rounding);
+            test.DoTest<InType, OutType, isTypeFp[In], isTypeFp[Out]>(
+                outType, inType, saturation, rounding);
         }
     }
 
diff --git a/test_conformance/conversions/conversions_data_info.h b/test_conformance/conversions/conversions_data_info.h
index d63bb07c8a..9d2cbc606c 100644
--- a/test_conformance/conversions/conversions_data_info.h
+++ b/test_conformance/conversions/conversions_data_info.h
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2023 The Khronos Group Inc.
+// Copyright (c) 2023-2024 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -28,8 +28,12 @@ extern bool qcom_sat;
 extern roundingMode qcom_rm;
 #endif
 
+#include <CL/cl_half.h>
+
+#include "harness/conversions.h"
 #include "harness/mt19937.h"
 #include "harness/rounding_mode.h"
+#include "harness/typeWrappers.h"
 
 #include <vector>
 
@@ -60,11 +64,17 @@ struct DataInitInfo
     RoundingMode round;
     cl_uint threads;
 
+    static cl_half_rounding_mode halfRoundingMode;
     static std::vector<uint32_t> specialValuesUInt;
     static std::vector<float> specialValuesFloat;
     static std::vector<double> specialValuesDouble;
+    static std::vector<cl_half> specialValuesHalf;
 };
 
+#define HFF(num) cl_half_from_float(num, DataInitInfo::halfRoundingMode)
+#define HTF(num) cl_half_to_float(num)
+#define HFD(num) cl_half_from_double(num, DataInitInfo::halfRoundingMode)
+
 struct DataInitBase : public DataInitInfo
 {
     virtual ~DataInitBase() = default;
@@ -73,9 +83,12 @@ struct DataInitBase : public DataInitInfo
     virtual void conv_array(void *out, void *in, size_t n) {}
     virtual void conv_array_sat(void *out, void *in, size_t n) {}
     virtual void init(const cl_uint &, const cl_uint &) {}
+    virtual void set_allow_zero_array(uint8_t *allow, void *out, void *in,
+                                      size_t n)
+    {}
 };
 
-template <typename InType, typename OutType>
+template <typename InType, typename OutType, bool InFP, bool OutFP>
 struct DataInfoSpec : public DataInitBase
 {
     explicit DataInfoSpec(const DataInitInfo &agg);
@@ -90,6 +103,9 @@ struct DataInfoSpec : public DataInitBase
     void conv(OutType *out, InType *in);
     void conv_sat(OutType *out, InType *in);
 
+    // Decide if we allow a zero result in addition to the correctly rounded one
+    void set_allow_zero(uint8_t *allow, OutType *out, InType *in);
+
     // min/max ranges for output type of data
     std::pair<OutType, OutType> ranges;
 
@@ -98,6 +114,16 @@ struct DataInfoSpec : public DataInitBase
 
     std::vector<MTdataHolder> mdv;
 
+    constexpr bool is_in_half() const
+    {
+        return (std::is_same<InType, cl_half>::value && InFP);
+    }
+
+    constexpr bool is_out_half() const
+    {
+        return (std::is_same<OutType, cl_half>::value && OutFP);
+    }
+
     void conv_array(void *out, void *in, size_t n) override
     {
         for (size_t i = 0; i < n; i++)
@@ -111,6 +137,12 @@ struct DataInfoSpec : public DataInitBase
     }
 
     void init(const cl_uint &, const cl_uint &) override;
+    void set_allow_zero_array(uint8_t *allow, void *out, void *in,
+                              size_t n) override
+    {
+        for (size_t i = 0; i < n; i++)
+            set_allow_zero(&allow[i], &((OutType *)out)[i], &((InType *)in)[i]);
+    }
     InType clamp(const InType &);
     inline float fclamp(float lo, float v, float hi)
     {
@@ -125,19 +157,22 @@ struct DataInfoSpec : public DataInitBase
     }
 };
 
-template <typename InType, typename OutType>
-DataInfoSpec<InType, OutType>::DataInfoSpec(const DataInitInfo &agg)
+template <typename InType, typename OutType, bool InFP, bool OutFP>
+DataInfoSpec<InType, OutType, InFP, OutFP>::DataInfoSpec(
+    const DataInitInfo &agg)
     : DataInitBase(agg), mdv(0)
 {
     if (std::is_same<cl_float, OutType>::value)
         ranges = std::make_pair(CL_FLT_MIN, CL_FLT_MAX);
     else if (std::is_same<cl_double, OutType>::value)
         ranges = std::make_pair(CL_DBL_MIN, CL_DBL_MAX);
+    else if (std::is_same<cl_half, OutType>::value && OutFP)
+        ranges = std::make_pair(HFF(CL_HALF_MIN), HFF(CL_HALF_MAX));
     else if (std::is_same<cl_uchar, OutType>::value)
         ranges = std::make_pair(0, CL_UCHAR_MAX);
     else if (std::is_same<cl_char, OutType>::value)
         ranges = std::make_pair(CL_CHAR_MIN, CL_CHAR_MAX);
-    else if (std::is_same<cl_ushort, OutType>::value)
+    else if (std::is_same<cl_ushort, OutType>::value && !OutFP)
         ranges = std::make_pair(0, CL_USHRT_MAX);
     else if (std::is_same<cl_short, OutType>::value)
         ranges = std::make_pair(CL_SHRT_MIN, CL_SHRT_MAX);
@@ -158,12 +193,12 @@ DataInfoSpec<InType, OutType>::DataInfoSpec(const DataInitInfo &agg)
         InType outMax = static_cast<InType>(ranges.second);
 
         InType eps = std::is_same<InType, cl_float>::value ? (InType) FLT_EPSILON : (InType) DBL_EPSILON;
-        if (std::is_integral<OutType>::value)
+        if (std::is_integral<OutType>::value && !OutFP)
         { // to char/uchar/short/ushort/int/uint/long/ulong
             if (sizeof(OutType)<=sizeof(cl_short))
             { // to char/uchar/short/ushort
                 clamp_ranges=
-                {{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
+                 {{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
                   {outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
                   {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, outMax-1.f},
                   {outMin-0.0f, outMax - outMax * 0.5f * eps },
@@ -249,11 +284,55 @@ DataInfoSpec<InType, OutType>::DataInfoSpec(const DataInitInfo &agg)
             }
         }
     }
+    else if (is_in_half())
+    {
+        float outMin = static_cast<float>(ranges.first);
+        float outMax = static_cast<float>(ranges.second);
+        float eps = CL_HALF_EPSILON;
+        cl_half_rounding_mode prev_half_round = DataInitInfo::halfRoundingMode;
+        DataInitInfo::halfRoundingMode = CL_HALF_RTZ;
+
+        if (std::is_integral<OutType>::value)
+        { // to char/uchar/short/ushort/int/uint/long/ulong
+            if (sizeof(OutType)<=sizeof(cl_char) || std::is_same<OutType, cl_short>::value)
+            { // to char/uchar
+                clamp_ranges=
+                 {{HFF(outMin-0.5f), HFF(outMax + 0.5f - outMax * 0.5f * eps)},
+                  {HFF(outMin-0.5f), HFF(outMax + 0.5f - outMax * 0.5f * eps)},
+                  {HFF(outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps), HFF(outMax-1.f)},
+                  {HFF(outMin-0.0f), HFF(outMax - outMax * 0.5f * eps) },
+                  {HFF(outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps), HFF(outMax - outMax * 0.5f * eps)}};
+            }
+            else
+            { // to ushort/int/uint/long/ulong
+                if (std::is_signed<OutType>::value)
+                {
+                    clamp_ranges=
+                    { {HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
+                      {HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
+                      {HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
+                      {HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)},
+                      {HFF(-CL_HALF_MAX), HFF(CL_HALF_MAX)}};
+                }
+                else
+                {
+                    clamp_ranges=
+                    { {HFF(outMin), HFF(CL_HALF_MAX)},
+                      {HFF(outMin), HFF(CL_HALF_MAX)},
+                      {HFF(outMin), HFF(CL_HALF_MAX)},
+                      {HFF(outMin), HFF(CL_HALF_MAX)},
+                      {HFF(outMin), HFF(CL_HALF_MAX)}};
+                }
+            }
+        }
+
+        DataInitInfo::halfRoundingMode = prev_half_round;
+    }
     // clang-format on
 }
 
-template <typename InType, typename OutType>
-float DataInfoSpec<InType, OutType>::round_to_int(float f)
+template <typename InType, typename OutType, bool InFP, bool OutFP>
+float DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int(float f)
 {
     static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23),
                                     -MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) };
@@ -281,8 +360,9 @@ float DataInfoSpec<InType, OutType>::round_to_int(float f)
     return f;
 }
 
-template <typename InType, typename OutType>
-long long DataInfoSpec<InType, OutType>::round_to_int_and_clamp(double f)
+template <typename InType, typename OutType, bool InFP, bool OutFP>
+long long
+DataInfoSpec<InType, OutType, InFP, OutFP>::round_to_int_and_clamp(double f)
 {
     static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52),
                                      MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
@@ -313,8 +393,8 @@ long long DataInfoSpec<InType, OutType>::round_to_int_and_clamp(double f)
     return (long long)f;
 }
 
-template <typename InType, typename OutType>
-OutType DataInfoSpec<InType, OutType>::absolute(const OutType &x)
+template <typename InType, typename OutType, bool InFP, bool OutFP>
+OutType DataInfoSpec<InType, OutType, InFP, OutFP>::absolute(const OutType &x)
 {
     union {
         cl_uint u;
@@ -331,17 +411,30 @@ OutType DataInfoSpec<InType, OutType>::absolute(const OutType &x)
     return u.f;
 }
 
-template <typename InType, typename OutType>
-void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
+template <typename T, bool fp> constexpr bool is_half()
 {
-    if (std::is_same<cl_float, InType>::value)
+    return (std::is_same<cl_half, T>::value && fp);
+}
+
+template <typename InType, typename OutType, bool InFP, bool OutFP>
+void DataInfoSpec<InType, OutType, InFP, OutFP>::conv(OutType *out, InType *in)
+{
+    if (std::is_same<cl_float, InType>::value || is_in_half())
     {
         cl_float inVal = *in;
+        if (std::is_same<cl_half, InType>::value)
+        {
+            inVal = HTF(*in);
+        }
 
         if (std::is_floating_point<OutType>::value)
         {
             *out = (OutType)inVal;
         }
+        else if (is_out_half())
+        {
+            *out = HFF(*in);
+        }
         else if (std::is_same<cl_ulong, OutType>::value)
         {
 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
@@ -376,6 +469,8 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
     {
         if (std::is_same<cl_float, OutType>::value)
             *out = (OutType)*in;
+        else if (is_out_half())
+            *out = static_cast<OutType>(HFD(*in));
         else
             *out = rint(*in);
     }
@@ -418,7 +513,7 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
             *out = (vi == 0 ? 0.0 : static_cast<OutType>(vi));
 #endif
         }
-        else if (std::is_same<cl_float, OutType>::value)
+        else if (std::is_same<cl_float, OutType>::value || is_out_half())
         {
             cl_float outVal = 0.f;
 
@@ -465,7 +560,9 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
 #endif
 #endif
 
-            *out = outVal;
+            *out = std::is_same<cl_half, OutType>::value
+                ? static_cast<OutType>(HFF(outVal))
+                : outVal;
         }
         else
         {
@@ -486,6 +583,8 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
             // Per IEEE-754-2008 5.4.1, 0 always converts to +0.0
             *out = (*in == 0 ? 0.0 : *in);
         }
+        else if (is_out_half())
+            *out = static_cast<OutType>(HFF(*in == 0 ? 0.f : *in));
         else
         {
             *out = (OutType)*in;
@@ -496,19 +595,26 @@ void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
 #define CLAMP(_lo, _x, _hi)                                                    \
     ((_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x)))
 
-template <typename InType, typename OutType>
-void DataInfoSpec<InType, OutType>::conv_sat(OutType *out, InType *in)
+template <typename InType, typename OutType, bool InFP, bool OutFP>
+void DataInfoSpec<InType, OutType, InFP, OutFP>::conv_sat(OutType *out,
+                                                          InType *in)
 {
-    if (std::is_floating_point<InType>::value)
+    if (std::is_floating_point<InType>::value || is_in_half())
     {
-        if (std::is_floating_point<OutType>::value)
-        { // in float/double, out float/double
-            *out = (OutType)(*in);
+        cl_float inVal = *in;
+        if (is_in_half()) inVal = HTF(*in);
+
+        if (std::is_floating_point<OutType>::value || is_out_half())
+        { // in half/float/double, out half/float/double
+            if (is_out_half())
+                *out = static_cast<OutType>(HFF(inVal));
+            else
+                *out = (OutType)(is_in_half() ? inVal : *in);
         }
-        else if ((std::is_same<InType, cl_float>::value)
+        else if ((std::is_same<InType, cl_float>::value || is_in_half())
                  && std::is_same<cl_ulong, OutType>::value)
         {
-            cl_float x = round_to_int(*in);
+            cl_float x = round_to_int(is_in_half() ? HTF(*in) : *in);
 
 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
             // VS2005 (at least) on x86 uses fistp to store the float as a
@@ -536,47 +642,57 @@ void DataInfoSpec<InType, OutType>::conv_sat(OutType *out, InType *in)
             }
 #else
             *out = x >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
-                ? 0xFFFFFFFFFFFFFFFFULL
-                : x < 0 ? 0 : (OutType)x;
+                ? (OutType)0xFFFFFFFFFFFFFFFFULL
+                : x < 0 ? 0
+                        : (OutType)x;
 #endif
         }
-        else if ((std::is_same<InType, cl_float>::value)
+        else if ((std::is_same<InType, cl_float>::value || is_in_half())
                  && std::is_same<cl_long, OutType>::value)
         {
-            cl_float f = round_to_int(*in);
+            cl_float f = round_to_int(is_in_half() ? HTF(*in) : *in);
             *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
-                ? 0x7FFFFFFFFFFFFFFFULL
+                ? (OutType)0x7FFFFFFFFFFFFFFFULL
                 : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
-                    ? 0x8000000000000000LL
-                    : (OutType)f;
+                ? (OutType)0x8000000000000000LL
+                : (OutType)f;
         }
         else if (std::is_same<InType, cl_double>::value
                  && std::is_same<cl_ulong, OutType>::value)
         {
             InType f = rint(*in);
             *out = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
-                ? 0xFFFFFFFFFFFFFFFFULL
-                : f < 0 ? 0 : (OutType)f;
+                ? (OutType)0xFFFFFFFFFFFFFFFFULL
+                : f < 0 ? 0
+                        : (OutType)f;
         }
         else if (std::is_same<InType, cl_double>::value
                  && std::is_same<cl_long, OutType>::value)
         {
             InType f = rint(*in);
             *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
-                ? 0x7FFFFFFFFFFFFFFFULL
+                ? (OutType)0x7FFFFFFFFFFFFFFFULL
                 : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
-                    ? 0x8000000000000000LL
-                    : (OutType)f;
+                ? (OutType)0x8000000000000000LL
+                : (OutType)f;
         }
         else
-        { // in float/double, out char/uchar/short/ushort/int/uint
-            *out =
-                CLAMP(ranges.first, round_to_int_and_clamp(*in), ranges.second);
+        { // in half/float/double, out char/uchar/short/ushort/int/uint
+            *out = CLAMP(ranges.first,
+                         round_to_int_and_clamp(is_in_half() ? inVal : *in),
+                         ranges.second);
         }
     }
     else if (std::is_integral<InType>::value
              && std::is_integral<OutType>::value)
     {
+        if (is_out_half())
+        {
+            *out = std::is_signed<InType>::value
+                ? static_cast<OutType>(HFF((cl_float)*in))
+                : absolute(static_cast<OutType>(HFF((cl_float)*in)));
+        }
+        else
         {
             if ((std::is_signed<InType>::value
                  && std::is_signed<OutType>::value)
@@ -614,14 +730,64 @@ void DataInfoSpec<InType, OutType>::conv_sat(OutType *out, InType *in)
     }
 }
 
-template <typename InType, typename OutType>
-void DataInfoSpec<InType, OutType>::init(const cl_uint &job_id,
-                                         const cl_uint &thread_id)
+template <typename InType, typename OutType, bool InFP, bool OutFP>
+void DataInfoSpec<InType, OutType, InFP, OutFP>::set_allow_zero(uint8_t *allow,
+                                                                OutType *out,
+                                                                InType *in)
+{
+    // from double
+    if (std::is_same<InType, cl_double>::value)
+        *allow |= IsDoubleSubnormal(*in);
+    // from float
+    if (std::is_same<InType, cl_float>::value) *allow |= IsFloatSubnormal(*in);
+    // from half
+    if (is_in_half()) *allow |= IsHalfSubnormal(*in);
+
+    // handle the cases that the converted result is subnormal
+    // from double
+    if (std::is_same<OutType, cl_double>::value)
+        *allow |= IsDoubleSubnormal(*out);
+    // from float
+    if (std::is_same<OutType, cl_float>::value)
+        *allow |= IsFloatSubnormal(*out);
+    // from half
+    if (is_out_half()) *allow |= IsHalfSubnormal(*out);
+}
+
+template <typename InType, typename OutType, bool InFP, bool OutFP>
+void DataInfoSpec<InType, OutType, InFP, OutFP>::init(const cl_uint &job_id,
+                                                      const cl_uint &thread_id)
 {
     uint64_t ulStart = start;
     void *pIn = (char *)gIn + job_id * size * gTypeSizes[inType];
 
-    if (std::is_integral<InType>::value)
+    if (is_in_half())
+    {
+        cl_half *o = (cl_half *)pIn;
+        int i;
+
+        if (gIsEmbedded)
+            for (i = 0; i < size; i++)
+                o[i] = (cl_half)genrand_int32(mdv[thread_id]);
+        else
+            for (i = 0; i < size; i++) o[i] = (cl_half)((i + ulStart) % 0xffff);
+
+        if (0 == ulStart)
+        {
+            size_t tableSize = specialValuesHalf.size()
+                * sizeof(decltype(specialValuesHalf)::value_type);
+            if (sizeof(InType) * size < tableSize)
+                tableSize = sizeof(InType) * size;
+            memcpy((char *)(o + i) - tableSize, &specialValuesHalf.front(),
+                   tableSize);
+        }
+
+        if (kUnsaturated == sat)
+        {
+            for (i = 0; i < size; i++) o[i] = clamp(o[i]);
+        }
+    }
+    else if (std::is_integral<InType>::value)
     {
         InType *o = (InType *)pIn;
         if (sizeof(InType) <= sizeof(cl_short))
@@ -776,10 +942,10 @@ void DataInfoSpec<InType, OutType>::init(const cl_uint &job_id,
     }
 }
 
-template <typename InType, typename OutType>
-InType DataInfoSpec<InType, OutType>::clamp(const InType &in)
+template <typename InType, typename OutType, bool InFP, bool OutFP>
+InType DataInfoSpec<InType, OutType, InFP, OutFP>::clamp(const InType &in)
 {
-    if (std::is_integral<OutType>::value)
+    if (std::is_integral<OutType>::value && !OutFP)
     {
         if (std::is_same<InType, cl_float>::value)
         {
@@ -791,6 +957,11 @@ InType DataInfoSpec<InType, OutType>::clamp(const InType &in)
             return dclamp(clamp_ranges[round].first, in,
                           clamp_ranges[round].second);
         }
+        else if (std::is_same<InType, cl_half>::value && InFP)
+        {
+            return HFF(fclamp(HTF(clamp_ranges[round].first), HTF(in),
+                              HTF(clamp_ranges[round].second)));
+        }
     }
     return in;
 }
diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index dab61dc500..122a841072 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
+// Copyright (c) 2017-2024 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -73,9 +73,9 @@ static void PrintUsage(void);
 test_status InitCL(cl_device_id device);
 
 
-const char *gTypeNames[kTypeCount] = { "uchar", "char", "ushort", "short",
-                                       "uint",  "int",  "float",  "double",
-                                       "ulong", "long" };
+const char *gTypeNames[kTypeCount] = { "uchar",  "char",  "ushort", "short",
+                                       "uint",   "int",   "half",   "float",
+                                       "double", "ulong", "long" };
 
 const char *gRoundingModeNames[kRoundingModeCount] = { "", "_rte", "_rtp",
                                                        "_rtn", "_rtz" };
@@ -83,9 +83,9 @@ const char *gRoundingModeNames[kRoundingModeCount] = { "", "_rte", "_rtp",
 const char *gSaturationNames[2] = { "", "_sat" };
 
 size_t gTypeSizes[kTypeCount] = {
-    sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short),
-    sizeof(cl_uint),  sizeof(cl_int),  sizeof(cl_float),  sizeof(cl_double),
-    sizeof(cl_ulong), sizeof(cl_long),
+    sizeof(cl_uchar),  sizeof(cl_char),  sizeof(cl_ushort), sizeof(cl_short),
+    sizeof(cl_uint),   sizeof(cl_int),   sizeof(cl_half),   sizeof(cl_float),
+    sizeof(cl_double), sizeof(cl_ulong), sizeof(cl_long),
 };
 
 char appName[64] = "ctest";
@@ -221,13 +221,17 @@ static int ParseArgs(int argc, const char **argv)
                 switch (*arg)
                 {
                     case 'd': gTestDouble ^= 1; break;
+                    case 'h': gTestHalfs ^= 1; break;
                     case 'l': gSkipTesting ^= 1; break;
                     case 'm': gMultithread ^= 1; break;
                     case 'w': gWimpyMode ^= 1; break;
                     case '[':
                         parseWimpyReductionFactor(arg, gWimpyReductionFactor);
                         break;
-                    case 'z': gForceFTZ ^= 1; break;
+                    case 'z':
+                        gForceFTZ ^= 1;
+                        gForceHalfFTZ ^= 1;
+                        break;
                     case 't': gTimeResults ^= 1; break;
                     case 'a': gReportAverageTimes ^= 1; break;
                     case '1':
@@ -355,7 +359,6 @@ static void PrintUsage(void)
 }
 
 
-
 test_status InitCL(cl_device_id device)
 {
     int error, i;
@@ -412,6 +415,50 @@ test_status InitCL(cl_device_id device)
     }
     gTestDouble &= gHasDouble;
 
+    if (is_extension_available(device, "cl_khr_fp16"))
+    {
+        gHasHalfs = 1;
+
+        cl_device_fp_config floatCapabilities = 0;
+        if ((error = clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG,
+                                     sizeof(floatCapabilities),
+                                     &floatCapabilities, NULL)))
+            floatCapabilities = 0;
+
+        if (0 == (CL_FP_DENORM & floatCapabilities)) gForceHalfFTZ ^= 1;
+
+        if (0 == (floatCapabilities & CL_FP_ROUND_TO_NEAREST))
+        {
+            char profileStr[128] = "";
+            // Verify that we are an embedded profile device
+            if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE,
+                                         sizeof(profileStr), profileStr, NULL)))
+            {
+                vlog_error("FAILURE: Could not get device profile: error %d\n",
+                           error);
+                return TEST_FAIL;
+            }
+
+            if (strcmp(profileStr, "EMBEDDED_PROFILE"))
+            {
+                vlog_error(
+                    "FAILURE: non-embedded profile device does not support "
+                    "CL_FP_ROUND_TO_NEAREST\n");
+                return TEST_FAIL;
+            }
+
+            if (0 == (floatCapabilities & CL_FP_ROUND_TO_ZERO))
+            {
+                vlog_error("FAILURE: embedded profile device supports neither "
+                           "CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n");
+                return TEST_FAIL;
+            }
+
+            gIsHalfRTZ = 1;
+        }
+    }
+    gTestHalfs &= gHasHalfs;
+
     // detect whether profile of the device is embedded
     char profile[1024] = "";
     if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile),
@@ -492,8 +539,12 @@ test_status InitCL(cl_device_id device)
     vlog("\tSubnormal values supported for floats? %s\n",
          no_yes[0 != (CL_FP_DENORM & floatCapabilities)]);
     vlog("\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ]);
+    vlog("\tTesting with FTZ mode ON for halfs? %s\n",
+         no_yes[0 != gForceHalfFTZ]);
     vlog("\tTesting with default RTZ mode for floats? %s\n",
          no_yes[0 != gIsRTZ]);
+    vlog("\tTesting with default RTZ mode for halfs? %s\n",
+         no_yes[0 != gIsHalfRTZ]);
     vlog("\tHas Double? %s\n", no_yes[0 != gHasDouble]);
     if (gHasDouble) vlog("\tTest Double? %s\n", no_yes[0 != gTestDouble]);
     vlog("\tHas Long? %s\n", no_yes[0 != gHasLong]);
@@ -503,5 +554,3 @@ test_status InitCL(cl_device_id device)
     vlog("\n");
     return TEST_PASS;
 }
-
-
diff --git a/test_conformance/events/test_event_dependencies.cpp b/test_conformance/events/test_event_dependencies.cpp
index 4efe1a6562..3cc183bd82 100644
--- a/test_conformance/events/test_event_dependencies.cpp
+++ b/test_conformance/events/test_event_dependencies.cpp
@@ -97,7 +97,7 @@ int test_event_enqueue_wait_for_events_run_test(
         error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, two_device_ids,
                                &number_returned);
         test_error(error, "clGetDeviceIDs for CL_DEVICE_TYPE_ALL failed.");
-        if (number_returned != 2)
+        if (number_returned < 2)
         {
             log_info("Failed to obtain two devices. Test can not run.\n");
             free(two_device_ids);
diff --git a/test_conformance/extensions/CMakeLists.txt b/test_conformance/extensions/CMakeLists.txt
index 3187174f22..77aa2f18eb 100644
--- a/test_conformance/extensions/CMakeLists.txt
+++ b/test_conformance/extensions/CMakeLists.txt
@@ -2,6 +2,7 @@ add_subdirectory( cl_ext_cxx_for_opencl )
 add_subdirectory( cl_khr_command_buffer )
 add_subdirectory( cl_khr_dx9_media_sharing )
 add_subdirectory( cl_khr_semaphore )
+add_subdirectory( cl_khr_kernel_clock )
 if(VULKAN_IS_SUPPORTED)
     add_subdirectory( cl_khr_external_semaphore )
 endif()
diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
index 7825a24896..560938a2e6 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
@@ -17,9 +17,15 @@ set(${MODULE_NAME}_SOURCES
     command_buffer_test_event_info.cpp
     command_buffer_finalize.cpp
     negative_command_buffer_finalize.cpp
+    negative_command_buffer_svm_mem.cpp
+    negative_command_buffer_copy_image.cpp
     negative_command_buffer_retain_release.cpp
     negative_command_buffer_create.cpp
+    negative_command_buffer_copy.cpp
+    negative_command_nd_range_kernel.cpp
     negative_command_buffer_get_info.cpp
+    negative_command_buffer_barrier.cpp
+    negative_command_buffer_fill.cpp
     negative_command_buffer_enqueue.cpp
 )
 
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_printf.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_printf.cpp
index eef3e3558b..80fac2ada1 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_printf.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_printf.cpp
@@ -426,7 +426,7 @@ struct CommandBufferPrintfTest : public BasicCommandBufferTest
                 std::max(min_pattern_length, rand() % max_pattern_length);
 
             std::vector<cl_char> pattern(pattern_length + 1, pattern_character);
-            pattern[pattern_length] = '\0';
+            pattern.back() = '\0';
             simul_passes[i] = { pattern,
                                 { cl_int(i * offset), cl_int(pattern_length) },
                                 std::vector<cl_char>(num_elements
diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp
index 314301f85d..ea8fddbe8b 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/main.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp
@@ -64,10 +64,30 @@ test_definition test_list[] = {
     ADD_TEST(event_info_reference_count),
     ADD_TEST(finalize_invalid),
     ADD_TEST(finalize_empty),
+    // Command-buffer negative tests
     ADD_TEST(negative_retain_command_buffer_invalid_command_buffer),
     ADD_TEST(negative_release_command_buffer_invalid_command_buffer),
     ADD_TEST(negative_finalize_command_buffer_invalid_command_buffer),
     ADD_TEST(negative_finalize_command_buffer_not_recording_state),
+    ADD_TEST(negative_command_buffer_command_fill_buffer_queue_not_null),
+    ADD_TEST(negative_command_buffer_command_fill_buffer_context_not_same),
+    ADD_TEST(
+        negative_command_buffer_command_fill_buffer_sync_points_null_or_num_zero),
+    ADD_TEST(
+        negative_command_buffer_command_fill_buffer_invalid_command_buffer),
+    ADD_TEST(
+        negative_command_buffer_command_fill_buffer_finalized_command_buffer),
+    ADD_TEST(
+        negative_command_buffer_command_fill_buffer_mutable_handle_not_null),
+    ADD_TEST(negative_command_buffer_command_fill_image_queue_not_null),
+    ADD_TEST(negative_command_buffer_command_fill_image_context_not_same),
+    ADD_TEST(
+        negative_command_buffer_command_fill_image_sync_points_null_or_num_zero),
+    ADD_TEST(negative_command_buffer_command_fill_image_invalid_command_buffer),
+    ADD_TEST(
+        negative_command_buffer_command_fill_image_finalized_command_buffer),
+    ADD_TEST(
+        negative_command_buffer_command_fill_image_mutable_handle_not_null),
     ADD_TEST(negative_create_command_buffer_num_queues),
     ADD_TEST(negative_create_command_buffer_null_queues),
     ADD_TEST(negative_create_command_buffer_repeated_properties),
@@ -75,6 +95,34 @@ test_definition test_list[] = {
     ADD_TEST(negative_create_command_buffer_queue_without_min_properties),
     ADD_TEST(
         negative_create_command_buffer_device_does_not_support_out_of_order_queue),
+    ADD_TEST(negative_command_ndrange_queue_not_null),
+    ADD_TEST(negative_command_ndrange_kernel_with_different_context),
+    ADD_TEST(negative_command_ndrange_kernel_sync_points_null_or_num_zero),
+    ADD_TEST(negative_command_ndrange_kernel_invalid_command_buffer),
+    ADD_TEST(negative_command_ndrange_kernel_invalid_properties),
+    ADD_TEST(negative_command_ndrange_kernel_command_buffer_finalized),
+    ADD_TEST(negative_command_ndrange_kernel_mutable_handle_not_null),
+    ADD_TEST(negative_command_ndrange_kernel_not_support_printf),
+    ADD_TEST(negative_command_ndrange_kernel_with_enqueue_call),
+    ADD_TEST(negative_command_buffer_command_copy_buffer_queue_not_null),
+    ADD_TEST(negative_command_buffer_command_copy_buffer_different_contexts),
+    ADD_TEST(
+        negative_command_buffer_command_copy_buffer_sync_points_null_or_num_zero),
+    ADD_TEST(
+        negative_command_buffer_command_copy_buffer_invalid_command_buffer),
+    ADD_TEST(
+        negative_command_buffer_command_copy_buffer_finalized_command_buffer),
+    ADD_TEST(
+        negative_command_buffer_command_copy_buffer_mutable_handle_not_null),
+    ADD_TEST(negative_command_buffer_command_copy_image_queue_not_null),
+    ADD_TEST(negative_command_buffer_command_copy_image_different_contexts),
+    ADD_TEST(
+        negative_command_buffer_command_copy_image_sync_points_null_or_num_zero),
+    ADD_TEST(negative_command_buffer_command_copy_image_invalid_command_buffer),
+    ADD_TEST(
+        negative_command_buffer_command_copy_image_finalized_command_buffer),
+    ADD_TEST(
+        negative_command_buffer_command_copy_image_mutable_handle_not_null),
     ADD_TEST(negative_get_command_buffer_info_invalid_command_buffer),
     ADD_TEST(negative_get_command_buffer_info_not_supported_param_name),
     ADD_TEST(negative_get_command_buffer_info_queues),
@@ -82,6 +130,22 @@ test_definition test_list[] = {
     ADD_TEST(negative_get_command_buffer_info_state),
     ADD_TEST(negative_get_command_buffer_info_prop_array),
     ADD_TEST(negative_get_command_buffer_info_context),
+    ADD_TEST(negative_command_buffer_command_svm_queue_not_null),
+    ADD_TEST(negative_command_buffer_command_svm_sync_points_null_or_num_zero),
+    ADD_TEST(negative_command_buffer_command_svm_invalid_command_buffer),
+    ADD_TEST(negative_command_buffer_command_svm_finalized_command_buffer),
+    ADD_TEST(negative_command_buffer_command_svm_mutable_handle_not_null),
+    ADD_TEST(negative_command_buffer_copy_image_queue_not_null),
+    ADD_TEST(negative_command_buffer_copy_image_context_not_same),
+    ADD_TEST(negative_command_buffer_copy_image_sync_points_null_or_num_zero),
+    ADD_TEST(negative_command_buffer_copy_image_invalid_command_buffer),
+    ADD_TEST(negative_command_buffer_copy_image_finalized_command_buffer),
+    ADD_TEST(negative_command_buffer_copy_image_mutable_handle_not_null),
+    ADD_TEST(negative_command_buffer_barrier_not_null_queue),
+    ADD_TEST(negative_command_buffer_barrier_invalid_command_buffer),
+    ADD_TEST(negative_command_buffer_barrier_buffer_finalized),
+    ADD_TEST(negative_command_buffer_barrier_mutable_handle_not_null),
+    ADD_TEST(negative_command_buffer_barrier_sync_points_null_or_num_zero),
     ADD_TEST(negative_enqueue_command_buffer_invalid_command_buffer),
     ADD_TEST(negative_enqueue_command_buffer_not_finalized),
     ADD_TEST(
diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_barrier.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_barrier.cpp
new file mode 100644
index 0000000000..6e682aa381
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_barrier.cpp
@@ -0,0 +1,201 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "basic_command_buffer.h"
+#include "procs.h"
+#include <vector>
+
+//--------------------------------------------------------------------------
+namespace {
+
+// CL_INVALID_COMMAND_QUEUE if command_queue is not NULL.
+struct CommandBufferBarrierNotNullQueue : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandBarrierWithWaitListKHR(
+            command_buffer, queue, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_QUEUE,
+                               "clCommandBarrierWithWaitListKHR should return "
+                               "CL_INVALID_COMMAND_QUEUE",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        if (BasicCommandBufferTest::Skip()) return true;
+        return is_extension_available(device,
+                                      "cl_khr_command_buffer_multi_device");
+    }
+};
+
+// CL_INVALID_COMMAND_BUFFER_KHR if command_buffer is not a valid
+// command-buffer.
+struct CommandBufferBarrierInvalidCommandBuffer : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandBarrierWithWaitListKHR(
+            nullptr, queue, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_BUFFER_KHR,
+                               "clCommandBarrierWithWaitListKHR should return "
+                               "CL_INVALID_COMMAND_BUFFER_KHR",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_OPERATION if command_buffer has been finalized.
+struct CommandBufferBarrierBufferFinalized : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clCommandBarrierWithWaitListKHR(command_buffer, nullptr, 0,
+                                                nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_OPERATION,
+                               "clCommandBarrierWithWaitListKHR should return "
+                               "CL_INVALID_OPERATION",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_VALUE if mutable_handle is not NULL.
+struct CommandBufferBarrierMutableHandleNotNull : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_mutable_command_khr mutable_handle;
+
+        cl_int error = clCommandBarrierWithWaitListKHR(
+            command_buffer, nullptr, 0, nullptr, nullptr, &mutable_handle);
+
+        test_failure_error_ret(error, CL_INVALID_VALUE,
+                               "clCommandBarrierWithWaitListKHR should return "
+                               "CL_INVALID_VALUE",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_SYNC_POINT_WAIT_LIST_KHR if sync_point_wait_list is NULL and
+// num_sync_points_in_wait_list is > 0, or sync_point_wait_list is not NULL and
+// num_sync_points_in_wait_list is 0, or if synchronization-point objects in
+// sync_point_wait_list are not valid synchronization-points.
+struct CommandBufferBarrierSyncPointsNullOrNumZero
+    : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_sync_point_khr invalid_point = 0;
+
+        cl_int error = clCommandBarrierWithWaitListKHR(
+            command_buffer, nullptr, 1, &invalid_point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandBarrierWithWaitListKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        error = clCommandBarrierWithWaitListKHR(command_buffer, nullptr, 1,
+                                                nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandBarrierWithWaitListKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        cl_sync_point_khr point;
+        error =
+            clCommandCopyBufferKHR(command_buffer, nullptr, in_mem, out_mem, 0,
+                                   0, data_size(), 0, nullptr, &point, nullptr);
+        test_error(error, "clCommandCopyBufferKHR failed");
+
+        error = clCommandBarrierWithWaitListKHR(command_buffer, nullptr, 0,
+                                                &point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandBarrierWithWaitListKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+};
+
+int test_negative_command_buffer_barrier_not_null_queue(cl_device_id device,
+                                                        cl_context context,
+                                                        cl_command_queue queue,
+                                                        int num_elements)
+{
+    return MakeAndRunTest<CommandBufferBarrierNotNullQueue>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_barrier_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferBarrierInvalidCommandBuffer>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_barrier_buffer_finalized(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferBarrierBufferFinalized>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_barrier_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferBarrierMutableHandleNotNull>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_barrier_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferBarrierSyncPointsNullOrNumZero>(
+        device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_copy.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_copy.cpp
new file mode 100644
index 0000000000..211ffc4d63
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_copy.cpp
@@ -0,0 +1,667 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "basic_command_buffer.h"
+#include "procs.h"
+
+//--------------------------------------------------------------------------
+template <bool check_image_support>
+struct CommandBufferCopyBaseTest : BasicCommandBufferTest
+{
+    CommandBufferCopyBaseTest(cl_device_id device, cl_context context,
+                              cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue)
+    {}
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        in_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, data_size, nullptr,
+                                &error);
+        test_error(error, "clCreateBuffer failed");
+
+        out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, data_size, nullptr,
+                                 &error);
+        test_error(error, "Unable to create buffer");
+
+        if (check_image_support)
+        {
+            image = create_image_2d(context, CL_MEM_READ_WRITE, &formats,
+                                    img_width, img_height, 0, NULL, &error);
+            test_error(error, "create_image_2d failed");
+
+            buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, data_size,
+                                    nullptr, &error);
+            test_error(error, "Unable to create buffer");
+        }
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        bool command_buffer_multi_device = is_extension_available(
+            device, "cl_khr_command_buffer_multi_device");
+        if (check_image_support)
+        {
+            cl_bool image_support;
+
+            cl_int error =
+                clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT,
+                                sizeof(image_support), &image_support, nullptr);
+            test_error(error,
+                       "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
+
+            return (!image_support || BasicCommandBufferTest::Skip()
+                    || command_buffer_multi_device);
+        }
+        return BasicCommandBufferTest::Skip() || command_buffer_multi_device;
+    }
+
+protected:
+    const size_t img_width = 512;
+    const size_t img_height = 512;
+    const size_t origin[3] = { 0, 0, 0 };
+    const size_t region[3] = { img_width, img_height, 1 };
+    const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 };
+    clMemWrapper image;
+    clMemWrapper buffer;
+    const size_t data_size = img_width * img_height * sizeof(cl_char);
+    clMemWrapper in_mem;
+    clMemWrapper out_mem;
+};
+
+namespace {
+
+// CL_INVALID_COMMAND_QUEUE if command_queue is not NULL.
+struct CommandBufferCopyBufferQueueNotNull
+    : public CommandBufferCopyBaseTest<false>
+{
+    using CommandBufferCopyBaseTest::CommandBufferCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error =
+            clCommandCopyBufferKHR(command_buffer, queue, in_mem, out_mem, 0, 0,
+                                   data_size, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_QUEUE,
+                               "clCommandCopyBufferKHR should return "
+                               "CL_INVALID_COMMAND_QUEUE",
+                               TEST_FAIL);
+
+        error = clCommandCopyBufferRectKHR(
+            command_buffer, queue, in_mem, out_mem, origin, origin, region, 0,
+            0, 0, 0, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_QUEUE,
+                               "clCommandCopyBufferRectKHR should return "
+                               "CL_INVALID_COMMAND_QUEUE",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_COMMAND_QUEUE if command_queue is not NULL.
+struct CommandBufferCopyImageQueueNotNull
+    : public CommandBufferCopyBaseTest<true>
+{
+    using CommandBufferCopyBaseTest::CommandBufferCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandCopyImageToBufferKHR(
+            command_buffer, queue, image, buffer, origin, region, 0, 0, nullptr,
+            nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_QUEUE,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_COMMAND_QUEUE",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_CONTEXT if the context associated with command_queue,
+// command_buffer, src_buffer, and dst_buffer are not the same.
+struct CommandBufferCopyBufferDifferentContexts
+    : public CommandBufferCopyBaseTest<false>
+{
+    using CommandBufferCopyBaseTest::CommandBufferCopyBaseTest;
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = CommandBufferCopyBaseTest::SetUp(elements);
+        test_error(error, "CommandBufferCopyBaseTest::SetUp failed");
+
+        context1 = clCreateContext(0, 1, &device, nullptr, nullptr, &error);
+        test_error(error, "Failed to create context");
+
+        in_mem_ctx = clCreateBuffer(context1, CL_MEM_READ_ONLY, data_size,
+                                    nullptr, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        out_mem_ctx = clCreateBuffer(context1, CL_MEM_WRITE_ONLY, data_size,
+                                     nullptr, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandCopyBufferKHR(
+            command_buffer, nullptr, in_mem_ctx, out_mem, 0, 0, data_size, 0,
+            nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandCopyBufferKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+
+        error = clCommandCopyBufferRectKHR(
+            command_buffer, nullptr, in_mem_ctx, out_mem, origin, origin,
+            region, 0, 0, 0, 0, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandCopyBufferRectKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+
+        error = clCommandCopyBufferKHR(command_buffer, nullptr, in_mem,
+                                       out_mem_ctx, 0, 0, data_size, 0, nullptr,
+                                       nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandCopyBufferKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+        error = clCommandCopyBufferRectKHR(
+            command_buffer, nullptr, in_mem, out_mem_ctx, origin, origin,
+            region, 0, 0, 0, 0, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandCopyBufferRectKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+
+        return CL_SUCCESS;
+    }
+    clMemWrapper in_mem_ctx = nullptr;
+    clMemWrapper out_mem_ctx = nullptr;
+    clContextWrapper context1 = nullptr;
+};
+
+// CL_INVALID_CONTEXT if the context associated with command_queue,
+// command_buffer, src_buffer, and dst_buffer are not the same.
+struct CommandBufferCopyImageDifferentContexts
+    : public CommandBufferCopyBaseTest<true>
+{
+    using CommandBufferCopyBaseTest::CommandBufferCopyBaseTest;
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = CommandBufferCopyBaseTest::SetUp(elements);
+        test_error(error, "CommandBufferCopyBaseTest::SetUp failed");
+
+        context1 = clCreateContext(0, 1, &device, nullptr, nullptr, &error);
+        test_error(error, "Failed to create context");
+
+        image_ctx = create_image_2d(context1, CL_MEM_READ_WRITE, &formats,
+                                    img_width, img_height, 0, NULL, &error);
+        test_error(error, "create_image_2d failed");
+
+        buffer_ctx = clCreateBuffer(context1, CL_MEM_READ_WRITE, data_size,
+                                    nullptr, &error);
+        test_error(error, "Unable to create buffer");
+
+
+        return CL_SUCCESS;
+    }
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandCopyImageToBufferKHR(
+            command_buffer, nullptr, image_ctx, buffer, origin, region, 0, 0,
+            nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+
+        error = clCommandCopyImageToBufferKHR(command_buffer, nullptr, image,
+                                              buffer_ctx, origin, region, 0, 0,
+                                              nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+
+        return CL_SUCCESS;
+    }
+    clMemWrapper image_ctx = nullptr;
+    clMemWrapper buffer_ctx = nullptr;
+    clContextWrapper context1 = nullptr;
+};
+
+// CL_INVALID_SYNC_POINT_WAIT_LIST_KHR if sync_point_wait_list is NULL and
+// num_sync_points_in_wait_list is > 0,
+// or sync_point_wait_list is not NULL and num_sync_points_in_wait_list is 0,
+// or if synchronization-point objects in sync_point_wait_list are not valid
+// synchronization-points.
+struct CommandBufferCopyBufferSyncPointsNullOrNumZero
+    : public CommandBufferCopyBaseTest<false>
+{
+    using CommandBufferCopyBaseTest::CommandBufferCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_sync_point_khr invalid_point = 0;
+
+        cl_int error = clCommandCopyBufferKHR(command_buffer, nullptr, in_mem,
+                                              out_mem, 0, 0, data_size, 1,
+                                              &invalid_point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyBufferKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+        error = clCommandCopyBufferRectKHR(
+            command_buffer, nullptr, in_mem, out_mem, origin, origin, region, 0,
+            0, 0, 0, 1, &invalid_point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyBufferRectKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        error =
+            clCommandCopyBufferKHR(command_buffer, nullptr, in_mem, out_mem, 0,
+                                   0, data_size, 1, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyBufferKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        error = clCommandCopyBufferRectKHR(
+            command_buffer, nullptr, in_mem, out_mem, origin, origin, region, 0,
+            0, 0, 0, 1, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyBufferRectKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        cl_sync_point_khr point;
+        error = clCommandBarrierWithWaitListKHR(command_buffer, nullptr, 0,
+                                                nullptr, &point, nullptr);
+        test_error(error, "clCommandBarrierWithWaitListKHR failed");
+
+        error =
+            clCommandCopyBufferKHR(command_buffer, nullptr, in_mem, out_mem, 0,
+                                   0, data_size, 0, &point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyBufferKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+        error = clCommandCopyBufferRectKHR(
+            command_buffer, nullptr, in_mem, out_mem, origin, origin, region, 0,
+            0, 0, 0, 0, &point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyBufferRectKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_SYNC_POINT_WAIT_LIST_KHR if sync_point_wait_list is NULL and
+// num_sync_points_in_wait_list is > 0,
+// or sync_point_wait_list is not NULL and num_sync_points_in_wait_list is 0,
+// or if synchronization-point objects in sync_point_wait_list are not valid
+// synchronization-points.
+struct CommandBufferCopyImageSyncPointsNullOrNumZero
+    : public CommandBufferCopyBaseTest<true>
+{
+    using CommandBufferCopyBaseTest::CommandBufferCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_sync_point_khr invalid_point = 0;
+
+        cl_int error = clCommandCopyImageToBufferKHR(
+            command_buffer, nullptr, image, buffer, origin, region, 0, 1,
+            &invalid_point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        error = clCommandCopyImageToBufferKHR(command_buffer, nullptr, image,
+                                              buffer, origin, region, 0, 1,
+                                              nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        cl_sync_point_khr point;
+        error = clCommandBarrierWithWaitListKHR(command_buffer, nullptr, 0,
+                                                nullptr, &point, nullptr);
+        test_error(error, "clCommandBarrierWithWaitListKHR failed");
+
+        error = clCommandCopyImageToBufferKHR(command_buffer, nullptr, image,
+                                              buffer, origin, region, 0, 0,
+                                              &point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_COMMAND_BUFFER_KHR if command_buffer is not a valid
+// command-buffer.
+struct CommandBufferCopyBufferInvalidCommandBuffer
+    : public CommandBufferCopyBaseTest<false>
+{
+    using CommandBufferCopyBaseTest::CommandBufferCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error =
+            clCommandCopyBufferKHR(nullptr, nullptr, in_mem, out_mem, 0, 0,
+                                   data_size, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_BUFFER_KHR,
+                               "clCommandCopyBufferKHR should return "
+                               "CL_INVALID_COMMAND_BUFFER_KHR",
+                               TEST_FAIL);
+
+        error = clCommandCopyBufferRectKHR(nullptr, nullptr, in_mem, out_mem,
+                                           origin, origin, region, 0, 0, 0, 0,
+                                           0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_BUFFER_KHR,
+                               "clCommandCopyBufferRectKHR should return "
+                               "CL_INVALID_COMMAND_BUFFER_KHR",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_COMMAND_BUFFER_KHR if command_buffer is not a valid
+// command-buffer.
+struct CommandBufferCopyImageInvalidCommandBuffer
+    : public CommandBufferCopyBaseTest<true>
+{
+    using CommandBufferCopyBaseTest::CommandBufferCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandCopyImageToBufferKHR(
+            nullptr, nullptr, image, buffer, origin, region, 0, 0, nullptr,
+            nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_BUFFER_KHR,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_COMMAND_BUFFER_KHR",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_OPERATION if command_buffer has been finalized.
+struct CommandBufferCopyBufferFinalizedCommandBuffer
+    : public CommandBufferCopyBaseTest<false>
+{
+    using CommandBufferCopyBaseTest::CommandBufferCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error =
+            clCommandCopyBufferKHR(command_buffer, nullptr, in_mem, out_mem, 0,
+                                   0, data_size, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_OPERATION,
+                               "clCommandCopyBufferKHR should return "
+                               "CL_INVALID_OPERATION",
+                               TEST_FAIL);
+
+
+        error = clCommandCopyBufferRectKHR(
+            command_buffer, nullptr, in_mem, out_mem, origin, origin, region, 0,
+            0, 0, 0, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_OPERATION,
+                               "clCommandCopyBufferRectKHR should return "
+                               "CL_INVALID_OPERATION",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_OPERATION if command_buffer has been finalized.
+struct CommandBufferCopyImageFinalizedCommandBuffer
+    : public CommandBufferCopyBaseTest<true>
+{
+    using CommandBufferCopyBaseTest::CommandBufferCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+
+        error = clCommandCopyImageToBufferKHR(command_buffer, nullptr, image,
+                                              buffer, origin, region, 0, 0,
+                                              nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_OPERATION,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_OPERATION",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_VALUE if mutable_handle is not NULL.
+struct CommandBufferCopyBufferMutableHandleNotNull
+    : public CommandBufferCopyBaseTest<false>
+{
+    using CommandBufferCopyBaseTest::CommandBufferCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_mutable_command_khr mutable_handle;
+
+        cl_int error = clCommandCopyBufferKHR(
+            command_buffer, nullptr, in_mem, out_mem, 0, 0, data_size, 0,
+            nullptr, nullptr, &mutable_handle);
+
+        test_failure_error_ret(error, CL_INVALID_VALUE,
+                               "clCommandCopyBufferKHR should return "
+                               "CL_INVALID_VALUE",
+                               TEST_FAIL);
+
+
+        error = clCommandCopyBufferRectKHR(
+            command_buffer, nullptr, in_mem, out_mem, origin, origin, region, 0,
+            0, 0, 0, 0, nullptr, nullptr, &mutable_handle);
+
+        test_failure_error_ret(error, CL_INVALID_VALUE,
+                               "clCommandCopyBufferRectKHR should return "
+                               "CL_INVALID_VALUE",
+                               TEST_FAIL);
+
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_VALUE if mutable_handle is not NULL.
+struct CommandBufferCopyImageMutableHandleNotNull
+    : public CommandBufferCopyBaseTest<true>
+{
+    using CommandBufferCopyBaseTest::CommandBufferCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_mutable_command_khr mutable_handle;
+
+        cl_int error = clCommandCopyImageToBufferKHR(
+            command_buffer, nullptr, image, buffer, origin, region, 0, 0,
+            nullptr, nullptr, &mutable_handle);
+
+        test_failure_error_ret(error, CL_INVALID_VALUE,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_VALUE",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+}
+
+int test_negative_command_buffer_command_copy_buffer_queue_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyBufferQueueNotNull>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_copy_image_queue_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyImageQueueNotNull>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_copy_buffer_different_contexts(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyBufferDifferentContexts>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_copy_image_different_contexts(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyImageDifferentContexts>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_copy_buffer_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyBufferSyncPointsNullOrNumZero>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_copy_image_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyImageSyncPointsNullOrNumZero>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_copy_buffer_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyBufferInvalidCommandBuffer>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_copy_image_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyImageInvalidCommandBuffer>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_copy_buffer_finalized_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyBufferFinalizedCommandBuffer>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_copy_image_finalized_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyImageFinalizedCommandBuffer>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_copy_buffer_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyBufferMutableHandleNotNull>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_copy_image_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyImageMutableHandleNotNull>(
+        device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_copy_image.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_copy_image.cpp
new file mode 100644
index 0000000000..80bb3b0245
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_copy_image.cpp
@@ -0,0 +1,426 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "basic_command_buffer.h"
+#include "procs.h"
+#include <vector>
+
+//--------------------------------------------------------------------------
+
+struct CommandCopyBaseTest : BasicCommandBufferTest
+{
+    CommandCopyBaseTest(cl_device_id device, cl_context context,
+                        cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue)
+    {}
+
+    cl_int SetUp(int elements) override
+    {
+        num_elements = elements;
+        origin[0] = origin[1] = origin[2] = 0;
+        region[0] = elements / 64;
+        region[1] = 64;
+        region[2] = 1;
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        src_image = create_image_2d(context, CL_MEM_READ_ONLY, &formats,
+                                    elements / 64, 64, 0, NULL, &error);
+        test_error(error, "create_image_2d failed");
+
+        dst_image = create_image_2d(context, CL_MEM_WRITE_ONLY, &formats,
+                                    elements / 64, 64, 0, NULL, &error);
+        test_error(error, "create_image_2d failed");
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        cl_bool image_support;
+
+        cl_int error =
+            clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT,
+                            sizeof(image_support), &image_support, nullptr);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
+
+        return (!image_support || BasicCommandBufferTest::Skip());
+    }
+
+protected:
+    clMemWrapper src_image;
+    clMemWrapper dst_image;
+    const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 };
+    size_t origin[3];
+    size_t region[3];
+};
+
+namespace {
+
+// CL_INVALID_COMMAND_QUEUE if command_queue is not NULL.
+struct CommandBufferCopyImageQueueNotNull : public CommandCopyBaseTest
+{
+    using CommandCopyBaseTest::CommandCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandCopyImageKHR(command_buffer, queue, src_image,
+                                             dst_image, origin, origin, region,
+                                             0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_QUEUE,
+                               "clCommandCopyImageKHR should return "
+                               "CL_INVALID_COMMAND_QUEUE",
+                               TEST_FAIL);
+
+        error = clCommandCopyImageToBufferKHR(command_buffer, queue, src_image,
+                                              out_mem, origin, region, 0, 0,
+                                              nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_QUEUE,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_COMMAND_QUEUE",
+                               TEST_FAIL);
+
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        return CommandCopyBaseTest::Skip()
+            || is_extension_available(device,
+                                      "cl_khr_command_buffer_multi_device");
+    }
+};
+
+// CL_INVALID_CONTEXT if the context associated with command_queue,
+// command_buffer, src_image, and dst_image are not the same.
+struct CommandBufferCopyImageContextNotSame : public CommandCopyBaseTest
+{
+    using CommandCopyBaseTest::CommandCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandCopyImageKHR(
+            command_buffer, nullptr, src_image_ctx, dst_image, origin, origin,
+            region, 0, 0, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandCopyImageKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+        error = clCommandCopyImageToBufferKHR(
+            command_buffer, nullptr, src_image_ctx, out_mem, origin, region, 0,
+            0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+        error = clCommandCopyImageKHR(command_buffer, nullptr, src_image,
+                                      dst_image_ctx, origin, origin, region, 0,
+                                      nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandCopyImageKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+        error = clCommandCopyImageToBufferKHR(
+            command_buffer, nullptr, src_image, dst_image_ctx, origin, region,
+            0, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+
+        command_buffer = clCreateCommandBufferKHR(1, &queue1, 0, &error);
+        test_error(error, "clCreateCommandBufferKHR failed");
+
+        error = clCommandCopyImageKHR(command_buffer, nullptr, src_image,
+                                      dst_image, origin, origin, region, 0,
+                                      nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandCopyImageKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+        error = clCommandCopyImageToBufferKHR(
+            command_buffer, nullptr, src_image, out_mem, origin, region, 0, 0,
+            nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = CommandCopyBaseTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        context1 = clCreateContext(0, 1, &device, nullptr, nullptr, &error);
+        test_error(error, "Failed to create context");
+
+        src_image_ctx = create_image_2d(context1, CL_MEM_READ_ONLY, &formats,
+                                        elements / 64, 64, 0, NULL, &error);
+        test_error(error, "create_image_2d failed");
+
+        dst_image_ctx = create_image_2d(context1, CL_MEM_WRITE_ONLY, &formats,
+                                        elements / 64, 64, 0, NULL, &error);
+        test_error(error, "create_image_2d failed");
+
+        queue1 = clCreateCommandQueue(context1, device, 0, &error);
+        test_error(error, "clCreateCommandQueue failed");
+
+        return CL_SUCCESS;
+    }
+
+    clContextWrapper context1;
+    clCommandQueueWrapper queue1;
+    clMemWrapper src_image_ctx;
+    clMemWrapper dst_image_ctx;
+};
+
+// CL_INVALID_SYNC_POINT_WAIT_LIST_KHR if sync_point_wait_list is NULL and
+// num_sync_points_in_wait_list is > 0, or sync_point_wait_list is not NULL and
+// num_sync_points_in_wait_list is 0, or if synchronization-point objects in
+// sync_point_wait_list are not valid synchronization-points.
+struct CommandBufferCopySyncPointsNullOrNumZero : public CommandCopyBaseTest
+{
+    using CommandCopyBaseTest::CommandCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_sync_point_khr invalid_point = 0;
+
+        cl_int error = clCommandCopyImageKHR(
+            command_buffer, nullptr, src_image, dst_image, origin, origin,
+            region, 1, &invalid_point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyImageKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+        error = clCommandCopyImageToBufferKHR(
+            command_buffer, nullptr, src_image, out_mem, origin, region, 0, 1,
+            &invalid_point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        error = clCommandCopyImageKHR(command_buffer, nullptr, src_image,
+                                      dst_image, origin, origin, region, 1,
+                                      nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyImageKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+        error = clCommandCopyImageToBufferKHR(
+            command_buffer, nullptr, src_image, out_mem, origin, region, 0, 1,
+            nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        cl_sync_point_khr point;
+        error = clCommandBarrierWithWaitListKHR(command_buffer, nullptr, 0,
+                                                nullptr, &point, nullptr);
+        test_error(error, "clCommandBarrierWithWaitListKHR failed");
+
+        error = clCommandCopyImageKHR(command_buffer, nullptr, src_image,
+                                      dst_image, origin, origin, region, 0,
+                                      &point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyImageKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+        error = clCommandCopyImageToBufferKHR(
+            command_buffer, nullptr, src_image, out_mem, origin, region, 0, 0,
+            &point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_COMMAND_BUFFER_KHR if command_buffer is not a valid
+// command-buffer.
+struct CommandBufferCopyImageInvalidCommandBuffer : public CommandCopyBaseTest
+{
+    using CommandCopyBaseTest::CommandCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandCopyImageKHR(nullptr, nullptr, src_image,
+                                             dst_image, origin, origin, region,
+                                             0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_BUFFER_KHR,
+                               "clCommandCopyImageKHR should return "
+                               "CL_INVALID_COMMAND_BUFFER_KHR",
+                               TEST_FAIL);
+
+        error = clCommandCopyImageToBufferKHR(nullptr, nullptr, src_image,
+                                              out_mem, origin, region, 0, 0,
+                                              nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_BUFFER_KHR,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_COMMAND_BUFFER_KHR",
+                               TEST_FAIL);
+
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_OPERATION if command_buffer has been finalized.
+struct CommandBufferCopyImageFinalizedCommandBuffer : public CommandCopyBaseTest
+{
+    using CommandCopyBaseTest::CommandCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clCommandCopyImageKHR(command_buffer, nullptr, src_image,
+                                      dst_image, origin, origin, region, 0,
+                                      nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_OPERATION,
+                               "clCommandCopyImageKHR should return "
+                               "CL_INVALID_OPERATION",
+                               TEST_FAIL);
+
+        error = clCommandCopyImageToBufferKHR(
+            command_buffer, nullptr, src_image, out_mem, origin, region, 0, 0,
+            nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_OPERATION,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_OPERATION",
+                               TEST_FAIL);
+
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_VALUE if mutable_handle is not NULL.
+struct CommandBufferCopyImageMutableHandleNotNull : public CommandCopyBaseTest
+{
+    using CommandCopyBaseTest::CommandCopyBaseTest;
+
+    cl_int Run() override
+    {
+        cl_mutable_command_khr mutable_handle;
+        cl_int error = clCommandCopyImageKHR(
+            command_buffer, nullptr, src_image, dst_image, origin, origin,
+            region, 0, nullptr, nullptr, &mutable_handle);
+
+        test_failure_error_ret(error, CL_INVALID_VALUE,
+                               "clCommandCopyImageKHR should return "
+                               "CL_INVALID_VALUE",
+                               TEST_FAIL);
+
+        error = clCommandCopyImageToBufferKHR(
+            command_buffer, nullptr, src_image, out_mem, origin, region, 0, 0,
+            nullptr, nullptr, &mutable_handle);
+
+        test_failure_error_ret(error, CL_INVALID_VALUE,
+                               "clCommandCopyImageToBufferKHR should return "
+                               "CL_INVALID_VALUE",
+                               TEST_FAIL);
+
+
+        return CL_SUCCESS;
+    }
+};
+}
+
+int test_negative_command_buffer_copy_image_queue_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyImageQueueNotNull>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_copy_image_context_not_same(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyImageContextNotSame>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_copy_image_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopySyncPointsNullOrNumZero>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_copy_image_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyImageInvalidCommandBuffer>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_copy_image_finalized_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyImageFinalizedCommandBuffer>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_copy_image_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCopyImageMutableHandleNotNull>(
+        device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_fill.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_fill.cpp
new file mode 100644
index 0000000000..d8e54c22a9
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_fill.cpp
@@ -0,0 +1,557 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "basic_command_buffer.h"
+#include "procs.h"
+#include <vector>
+
+//--------------------------------------------------------------------------
+template <bool check_image_support>
+struct CommandFillBaseTest : BasicCommandBufferTest
+{
+    CommandFillBaseTest(cl_device_id device, cl_context context,
+                        cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue)
+    {}
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+        if (check_image_support)
+        {
+            src_image = create_image_2d(context, CL_MEM_WRITE_ONLY, &formats,
+                                        512, 512, 0, NULL, &error);
+            test_error(error, "create_image_2d failed");
+
+            dst_image = create_image_2d(context, CL_MEM_READ_ONLY, &formats,
+                                        512, 512, 0, NULL, &error);
+            test_error(error, "create_image_2d failed");
+        }
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        if (check_image_support)
+        {
+            cl_bool image_support;
+
+            cl_int error =
+                clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT,
+                                sizeof(image_support), &image_support, nullptr);
+            test_error(error,
+                       "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
+
+            return (!image_support || BasicCommandBufferTest::Skip());
+        }
+        return BasicCommandBufferTest::Skip();
+    }
+
+protected:
+    clMemWrapper src_image;
+    clMemWrapper dst_image;
+    const cl_uint fill_color_1[4] = { 0x05, 0x05, 0x05, 0x05 };
+    const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 };
+    const size_t origin[3] = { 0, 0, 0 };
+    const size_t region[3] = { 512, 512, 1 };
+    const cl_int pattern = 0xFF;
+};
+
+namespace {
+
+// CL_INVALID_COMMAND_QUEUE if command_queue is not NULL.
+struct CommandBufferCommandFillBufferQueueNotNull
+    : public CommandFillBaseTest<false>
+{
+    using CommandFillBaseTest::CommandFillBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandFillBufferKHR(
+            command_buffer, queue, out_mem, &pattern, sizeof(cl_int), 0,
+            data_size(), 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_QUEUE,
+                               "clCommandFillBufferKHR should return "
+                               "CL_INVALID_COMMAND_QUEUE",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        return CommandFillBaseTest::Skip()
+            || is_extension_available(device,
+                                      "cl_khr_command_buffer_multi_device");
+    }
+};
+
+// CL_INVALID_COMMAND_QUEUE if command_queue is not NULL.
+struct CommandBufferCommandFillImageQueueNotNull
+    : public CommandFillBaseTest<true>
+{
+    using CommandFillBaseTest::CommandFillBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandFillImageKHR(command_buffer, queue, src_image,
+                                             fill_color_1, origin, region, 0,
+                                             nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_QUEUE,
+                               "clCommandFillImageKHR should return "
+                               "CL_INVALID_COMMAND_QUEUE",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        return CommandFillBaseTest::Skip()
+            || is_extension_available(device,
+                                      "cl_khr_command_buffer_multi_device");
+    }
+};
+
+// CL_INVALID_CONTEXT if the context associated with command_queue,
+// command_buffer, and buffer are not the same.
+struct CommandBufferCommandFillBufferContextNotSame
+    : public CommandFillBaseTest<false>
+{
+    using CommandFillBaseTest::CommandFillBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandFillBufferKHR(
+            command_buffer, nullptr, out_mem_ctx, &pattern, sizeof(cl_int), 0,
+            data_size(), 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandFillBufferKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = CommandFillBaseTest::SetUp(elements);
+        test_error(error, "CommandFillBaseTest::SetUp failed");
+
+        context1 = clCreateContext(0, 1, &device, nullptr, nullptr, &error);
+        test_error(error, "Failed to create context");
+
+        out_mem_ctx =
+            clCreateBuffer(context1, CL_MEM_WRITE_ONLY,
+                           sizeof(cl_int) * num_elements, nullptr, &error);
+        test_error(error, "clCreateBuffer failed");
+
+
+        return CL_SUCCESS;
+    }
+
+    clContextWrapper context1;
+    clMemWrapper out_mem_ctx;
+};
+
+// CL_INVALID_CONTEXT if the context associated with command_queue,
+// command_buffer, and buffer are not the same.
+struct CommandBufferCommandFillImageContextNotSame
+    : public CommandFillBaseTest<true>
+{
+    using CommandFillBaseTest::CommandFillBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandFillImageKHR(
+            command_buffer, nullptr, dst_image_ctx, fill_color_1, origin,
+            region, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandFillImageKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = CommandFillBaseTest::SetUp(elements);
+        test_error(error, "CommandFillBaseTest::SetUp failed");
+
+        context1 = clCreateContext(0, 1, &device, nullptr, nullptr, &error);
+        test_error(error, "Failed to create context");
+
+        dst_image_ctx = create_image_2d(context1, CL_MEM_WRITE_ONLY, &formats,
+                                        512, 512, 0, NULL, &error);
+        test_error(error, "create_image_2d failed");
+
+        return CL_SUCCESS;
+    }
+
+    clContextWrapper context1;
+    clMemWrapper dst_image_ctx;
+};
+
+// CL_INVALID_SYNC_POINT_WAIT_LIST_KHR if sync_point_wait_list is NULL and
+// num_sync_points_in_wait_list is > 0, or sync_point_wait_list is not NULL and
+// num_sync_points_in_wait_list is 0, or if synchronization-point objects in
+// sync_point_wait_list are not valid synchronization-points.
+struct CommandBufferCommandFillBufferSyncPointsNullOrNumZero
+    : public CommandFillBaseTest<false>
+{
+    using CommandFillBaseTest::CommandFillBaseTest;
+
+    cl_int Run() override
+    {
+        cl_sync_point_khr invalid_point = 0;
+
+        cl_int error = clCommandFillBufferKHR(
+            command_buffer, nullptr, out_mem, &pattern, sizeof(cl_int), 0,
+            data_size(), 1, &invalid_point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandFillBufferKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        error = clCommandFillBufferKHR(command_buffer, nullptr, out_mem,
+                                       &pattern, sizeof(cl_int), 0, data_size(),
+                                       1, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandFillBufferKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        cl_sync_point_khr point;
+        error = clCommandBarrierWithWaitListKHR(command_buffer, nullptr, 0,
+                                                nullptr, &point, nullptr);
+        test_error(error, "clCommandBarrierWithWaitListKHR failed");
+
+        error = clCommandFillBufferKHR(command_buffer, nullptr, out_mem,
+                                       &pattern, sizeof(cl_int), 0, data_size(),
+                                       0, &point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandFillBufferKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_SYNC_POINT_WAIT_LIST_KHR if sync_point_wait_list is NULL and
+// num_sync_points_in_wait_list is > 0, or sync_point_wait_list is not NULL and
+// num_sync_points_in_wait_list is 0, or if synchronization-point objects in
+// sync_point_wait_list are not valid synchronization-points.
+struct CommandBufferCommandFillImageSyncPointsNullOrNumZero
+    : public CommandFillBaseTest<true>
+{
+    using CommandFillBaseTest::CommandFillBaseTest;
+
+    cl_int Run() override
+    {
+        cl_sync_point_khr invalid_point = 0;
+
+        cl_int error = clCommandFillImageKHR(command_buffer, nullptr, dst_image,
+                                             fill_color_1, origin, region, 1,
+                                             &invalid_point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandFillImageKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        error = clCommandFillImageKHR(command_buffer, nullptr, dst_image,
+                                      fill_color_1, origin, region, 1, nullptr,
+                                      nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandFillImageKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+        cl_sync_point_khr point;
+        error = clCommandBarrierWithWaitListKHR(command_buffer, nullptr, 0,
+                                                nullptr, &point, nullptr);
+        test_error(error, "clCommandBarrierWithWaitListKHR failed");
+
+
+        error = clCommandFillImageKHR(command_buffer, nullptr, dst_image,
+                                      fill_color_1, origin, region, 0, &point,
+                                      nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandFillImageKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        return CL_SUCCESS;
+    }
+};
+
+
+// CL_INVALID_COMMAND_BUFFER_KHR if command_buffer is not a valid
+// command-buffer.
+struct CommandBufferCommandFillBufferInvalidCommandBuffer
+    : public CommandFillBaseTest<false>
+{
+    using CommandFillBaseTest::CommandFillBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandFillBufferKHR(
+            nullptr, nullptr, out_mem, &pattern, sizeof(cl_int), 0, data_size(),
+            0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_BUFFER_KHR,
+                               "clCommandFillBufferKHR should return "
+                               "CL_INVALID_COMMAND_BUFFER_KHR",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_COMMAND_BUFFER_KHR if command_buffer is not a valid
+// command-buffer.
+struct CommandBufferCommandFillImageInvalidCommandBuffer
+    : public CommandFillBaseTest<true>
+{
+    using CommandFillBaseTest::CommandFillBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error =
+            clCommandFillImageKHR(nullptr, nullptr, dst_image, fill_color_1,
+                                  origin, region, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_BUFFER_KHR,
+                               "clCommandFillImageKHR should return "
+                               "CL_INVALID_COMMAND_BUFFER_KHR",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_OPERATION if command_buffer has been finalized.
+struct CommandBufferCommandFillBufferFinalizedCommandBuffer
+    : public CommandFillBaseTest<false>
+{
+    using CommandFillBaseTest::CommandFillBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clCommandFillBufferKHR(command_buffer, nullptr, out_mem,
+                                       &pattern, sizeof(cl_int), 0, data_size(),
+                                       0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_OPERATION,
+                               "clCommandFillBufferKHR should return "
+                               "CL_INVALID_OPERATION",
+                               TEST_FAIL);
+
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_OPERATION if command_buffer has been finalized.
+struct CommandBufferCommandFillImageFinalizedCommandBuffer
+    : public CommandFillBaseTest<true>
+{
+    using CommandFillBaseTest::CommandFillBaseTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+
+        error = clCommandFillImageKHR(command_buffer, nullptr, dst_image,
+                                      fill_color_1, origin, region, 0, nullptr,
+                                      nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_OPERATION,
+                               "clCommandFillImageKHR should return "
+                               "CL_INVALID_OPERATION",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_VALUE if mutable_handle is not NULL.
+struct CommandBufferCommandFillBufferMutableHandleNotNull
+    : public CommandFillBaseTest<false>
+{
+    using CommandFillBaseTest::CommandFillBaseTest;
+
+    cl_int Run() override
+    {
+        cl_mutable_command_khr mutable_handle;
+
+        cl_int error = clCommandFillBufferKHR(
+            command_buffer, nullptr, out_mem, &pattern, sizeof(cl_int), 0,
+            data_size(), 0, nullptr, nullptr, &mutable_handle);
+
+        test_failure_error_ret(error, CL_INVALID_VALUE,
+                               "clCommandFillBufferKHR should return "
+                               "CL_INVALID_VALUE",
+                               TEST_FAIL);
+
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_VALUE if mutable_handle is not NULL.
+struct CommandBufferCommandFillImageMutableHandleNotNull
+    : public CommandFillBaseTest<true>
+{
+    using CommandFillBaseTest::CommandFillBaseTest;
+
+    cl_int Run() override
+    {
+        cl_mutable_command_khr mutable_handle;
+
+        cl_int error = clCommandFillImageKHR(command_buffer, nullptr, dst_image,
+                                             fill_color_1, origin, region, 0,
+                                             nullptr, nullptr, &mutable_handle);
+
+        test_failure_error_ret(error, CL_INVALID_VALUE,
+                               "clCommandFillImageKHR should return "
+                               "CL_INVALID_VALUE",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+}
+
+int test_negative_command_buffer_command_fill_buffer_queue_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandFillBufferQueueNotNull>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_fill_image_queue_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandFillImageQueueNotNull>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_fill_buffer_context_not_same(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandFillBufferContextNotSame>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_fill_image_context_not_same(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandFillImageContextNotSame>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_fill_buffer_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<
+        CommandBufferCommandFillBufferSyncPointsNullOrNumZero>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_fill_image_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandFillImageSyncPointsNullOrNumZero>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_fill_buffer_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandFillBufferInvalidCommandBuffer>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_fill_image_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandFillImageInvalidCommandBuffer>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_fill_buffer_finalized_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandFillBufferFinalizedCommandBuffer>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_fill_image_finalized_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandFillImageFinalizedCommandBuffer>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_fill_buffer_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandFillBufferMutableHandleNotNull>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_fill_image_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandFillImageMutableHandleNotNull>(
+        device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_svm_mem.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_svm_mem.cpp
new file mode 100644
index 0000000000..b5d2355b2a
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_buffer_svm_mem.cpp
@@ -0,0 +1,282 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "basic_command_buffer.h"
+#include "svm_command_basic.h"
+#include "procs.h"
+
+
+//--------------------------------------------------------------------------
+namespace {
+
+// CL_INVALID_COMMAND_QUEUE if command_queue is not NULL.
+struct CommandBufferCommandSVMQueueNotNull : public BasicSVMCommandBufferTest
+{
+    using BasicSVMCommandBufferTest::BasicSVMCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandSVMMemcpyKHR(
+            command_buffer, queue, svm_out_mem(), svm_in_mem(), data_size(), 0,
+            nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(
+            error, CL_INVALID_COMMAND_QUEUE,
+            "clCommandSVMMemcpyKHR should return CL_INVALID_COMMAND_QUEUE",
+            TEST_FAIL);
+
+        error = clCommandSVMMemFillKHR(command_buffer, queue, svm_in_mem(),
+                                       &pattern_1, sizeof(cl_char), data_size(),
+                                       0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(
+            error, CL_INVALID_COMMAND_QUEUE,
+            "clCommandSVMMemFillKHR should return CL_INVALID_COMMAND_QUEUE",
+            TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+
+    const cl_char pattern_1 = 0x14;
+
+    bool Skip() override
+    {
+        if (BasicSVMCommandBufferTest::Skip()) return true;
+        return is_extension_available(device,
+                                      "cl_khr_command_buffer_multi_device");
+    }
+};
+
+// CL_INVALID_SYNC_POINT_WAIT_LIST_KHR if sync_point_wait_list is NULL and
+// num_sync_points_in_wait_list is > 0, or sync_point_wait_list is not NULL and
+// num_sync_points_in_wait_list is 0, or if synchronization-point objects in
+// sync_point_wait_list are not valid synchronization-points.
+struct CommandBufferCommandSVMSyncPointsNullOrNumZero
+    : public BasicSVMCommandBufferTest
+{
+    using BasicSVMCommandBufferTest::BasicSVMCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_sync_point_khr invalid_point = 0;
+
+        cl_int error = clCommandSVMMemcpyKHR(
+            command_buffer, nullptr, svm_out_mem(), svm_in_mem(), data_size(),
+            1, &invalid_point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandSVMMemcpyKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+        error = clCommandSVMMemFillKHR(command_buffer, nullptr, svm_in_mem(),
+                                       &pattern_1, sizeof(cl_char), data_size(),
+                                       1, &invalid_point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandSVMMemFillKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        error = clCommandSVMMemcpyKHR(command_buffer, nullptr, svm_out_mem(),
+                                      svm_in_mem(), data_size(), 1, nullptr,
+                                      nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandSVMMemcpyKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+        error = clCommandSVMMemFillKHR(command_buffer, nullptr, svm_in_mem(),
+                                       &pattern_1, sizeof(cl_char), data_size(),
+                                       1, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandSVMMemFillKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        cl_sync_point_khr point;
+        error = clCommandBarrierWithWaitListKHR(command_buffer, nullptr, 0,
+                                                nullptr, &point, nullptr);
+        test_error(error, "clCommandBarrierWithWaitListKHR failed");
+
+        error = clCommandSVMMemcpyKHR(command_buffer, nullptr, svm_out_mem(),
+                                      svm_in_mem(), data_size(), 0, &point,
+                                      nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandSVMMemcpyKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+        error = clCommandSVMMemFillKHR(command_buffer, nullptr, svm_in_mem(),
+                                       &pattern_1, sizeof(cl_char), data_size(),
+                                       0, &point, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandSVMMemFillKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        return CL_SUCCESS;
+    }
+
+    const cl_char pattern_1 = 0x14;
+};
+
+// CL_INVALID_COMMAND_BUFFER_KHR if command_buffer is not a valid
+// command-buffer.
+struct CommandBufferCommandSVMInvalidCommandBuffer
+    : public BasicSVMCommandBufferTest
+{
+    using BasicSVMCommandBufferTest::BasicSVMCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error =
+            clCommandSVMMemcpyKHR(nullptr, nullptr, svm_out_mem(), svm_in_mem(),
+                                  data_size(), 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(
+            error, CL_INVALID_COMMAND_BUFFER_KHR,
+            "clCommandSVMMemcpyKHR should return CL_INVALID_COMMAND_BUFFER_KHR",
+            TEST_FAIL);
+
+        error = clCommandSVMMemFillKHR(nullptr, nullptr, svm_in_mem(),
+                                       &pattern_1, sizeof(cl_char), data_size(),
+                                       0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_BUFFER_KHR,
+                               "clCommandSVMMemFillKHR should return "
+                               "CL_INVALID_COMMAND_BUFFER_KHR",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+
+    const cl_char pattern_1 = 0x14;
+};
+
+// CL_INVALID_OPERATION if command_buffer has been finalized.
+struct CommandBufferCommandSVMFinalizedCommandBuffer
+    : public BasicSVMCommandBufferTest
+{
+    using BasicSVMCommandBufferTest::BasicSVMCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clCommandSVMMemcpyKHR(command_buffer, nullptr, svm_out_mem(),
+                                      svm_in_mem(), data_size(), 0, nullptr,
+                                      nullptr, nullptr);
+
+        test_failure_error_ret(
+            error, CL_INVALID_OPERATION,
+            "clCommandSVMMemcpyKHR should return CL_INVALID_OPERATION",
+            TEST_FAIL);
+
+        error = clCommandSVMMemFillKHR(command_buffer, nullptr, svm_in_mem(),
+                                       &pattern_1, sizeof(cl_char), data_size(),
+                                       0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(
+            error, CL_INVALID_OPERATION,
+            "clCommandSVMMemFillKHR should return CL_INVALID_OPERATION",
+            TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+
+    const cl_char pattern_1 = 0x14;
+};
+
+// CL_INVALID_VALUE if mutable_handle is not NULL.
+struct CommandBufferCommandSVMMutableHandleNotNull
+    : public BasicSVMCommandBufferTest
+{
+    using BasicSVMCommandBufferTest::BasicSVMCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_mutable_command_khr mutable_handle;
+
+        cl_int error = clCommandSVMMemcpyKHR(
+            command_buffer, nullptr, svm_out_mem(), svm_in_mem(), data_size(),
+            0, nullptr, nullptr, &mutable_handle);
+
+        test_failure_error_ret(
+            error, CL_INVALID_VALUE,
+            "clCommandSVMMemcpyKHR should return CL_INVALID_VALUE", TEST_FAIL);
+
+        error = clCommandSVMMemFillKHR(command_buffer, nullptr, svm_in_mem(),
+                                       &pattern_1, sizeof(cl_char), data_size(),
+                                       0, nullptr, nullptr, &mutable_handle);
+
+        test_failure_error_ret(
+            error, CL_INVALID_VALUE,
+            "clCommandSVMMemFillKHR should return CL_INVALID_VALUE", TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+
+    const cl_char pattern_1 = 0x14;
+};
+}
+
+int test_negative_command_buffer_command_svm_queue_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandSVMQueueNotNull>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_svm_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandSVMSyncPointsNullOrNumZero>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_svm_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandSVMInvalidCommandBuffer>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_svm_finalized_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandSVMFinalizedCommandBuffer>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_buffer_command_svm_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandBufferCommandSVMMutableHandleNotNull>(
+        device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/negative_command_nd_range_kernel.cpp b/test_conformance/extensions/cl_khr_command_buffer/negative_command_nd_range_kernel.cpp
new file mode 100644
index 0000000000..ccbefd85f8
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/negative_command_nd_range_kernel.cpp
@@ -0,0 +1,530 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "basic_command_buffer.h"
+#include "procs.h"
+#include "harness/featureHelpers.h"
+
+//--------------------------------------------------------------------------
+namespace {
+
+// CL_INVALID_COMMAND_QUEUE if command_queue is not NULL.
+struct CommandNDRangeKernelQueueNotNull : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, queue, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_QUEUE,
+                               "clCommandNDRangeKernelKHR should return "
+                               "CL_INVALID_COMMAND_QUEUE",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        if (BasicCommandBufferTest::Skip()) return true;
+        return is_extension_available(device,
+                                      "cl_khr_command_buffer_multi_device");
+    }
+};
+
+// CL_INVALID_CONTEXT if the context associated with command_queue,
+// command_buffer, and kernel are not the same.
+struct CommandNDRangeKernelKernelWithDifferentContext
+    : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = CreateKernelWithDifferentContext();
+        test_error(error, "Failed to create kernel");
+
+        error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_CONTEXT,
+                               "clCommandNDRangeKernelKHR should return "
+                               "CL_INVALID_CONTEXT",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+
+    cl_int CreateKernelWithDifferentContext()
+    {
+        cl_int error = CL_SUCCESS;
+
+        new_context = clCreateContext(0, 1, &device, nullptr, nullptr, &error);
+        test_error(error, "Failed to create context");
+
+        const char* kernel_str =
+            R"(
+      __kernel void copy(__global int* in, __global int* out, __global int* offset) {
+          size_t id = get_global_id(0);
+          int ind = offset[0] + id;
+          out[ind] = in[ind];
+      })";
+
+        error = create_single_kernel_helper_create_program(
+            new_context, &program, 1, &kernel_str);
+        test_error(error, "Failed to create program with source");
+
+        error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
+        test_error(error, "Failed to build program");
+
+        kernel = clCreateKernel(program, "copy", &error);
+        test_error(error, "Failed to create copy kernel");
+
+        return CL_SUCCESS;
+    }
+    clContextWrapper new_context;
+};
+
+// CL_INVALID_SYNC_POINT_WAIT_LIST_KHR if sync_point_wait_list is NULL and
+// num_sync_points_in_wait_list is > 0, or sync_point_wait_list is not NULL and
+// num_sync_points_in_wait_list is 0, or if synchronization-point objects in
+// sync_point_wait_list are not valid synchronization-points.
+struct CommandNDRangeKerneSyncPointsNullOrNumZero
+    : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_sync_point_khr invalid_point = 0;
+        cl_sync_point_khr* invalid_sync_points[] = { &invalid_point };
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 1, invalid_sync_points[0], nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandNDRangeKernelKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 1, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandNDRangeKernelKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        cl_sync_point_khr point;
+        error = clCommandBarrierWithWaitListKHR(command_buffer, nullptr, 0,
+                                                nullptr, &point, nullptr);
+        test_error(error, "clCommandBarrierWithWaitListKHR failed");
+
+        cl_sync_point_khr* sync_points[] = { &point };
+        error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, sync_points[0], nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_SYNC_POINT_WAIT_LIST_KHR,
+                               "clCommandNDRangeKernelKHR should return "
+                               "CL_INVALID_SYNC_POINT_WAIT_LIST_KHR",
+                               TEST_FAIL);
+
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_COMMAND_BUFFER_KHR if command_buffer is not a valid
+// command-buffer.
+struct CommandNDRangeKernelInvalidCommandBuffer : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            nullptr, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_COMMAND_BUFFER_KHR,
+                               "clCommandNDRangeKernelKHR should return "
+                               "CL_INVALID_COMMAND_BUFFER_KHR",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_VALUE if values specified in properties are not valid.
+struct CommandNDRangeKernelInvalidProperties : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_ndrange_kernel_command_properties_khr empty_properties =
+            ~cl_ndrange_kernel_command_properties_khr(0);
+
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, &empty_properties, kernel, 1, nullptr,
+            &num_elements, nullptr, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_VALUE,
+                               "clCommandNDRangeKernelKHR should return "
+                               "CL_INVALID_VALUE",
+                               TEST_FAIL);
+
+        cl_ndrange_kernel_command_properties_khr props_invalid[3] = {
+            CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
+            CL_MEM_USE_CACHED_CPU_MEMORY_IMG, 1
+        };
+
+        error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, props_invalid, kernel, 1, nullptr,
+            &num_elements, nullptr, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_VALUE,
+                               "clCommandNDRangeKernelKHR should return "
+                               "CL_INVALID_VALUE",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_OPERATION if command_buffer has been finalized.
+struct CommandNDRangeKernelCommandBufferFinalized
+    : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_OPERATION,
+                               "clCommandNDRangeKernelKHR should return "
+                               "CL_INVALID_OPERATION",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_VALUE if mutable_handle is not NULL.
+struct CommandNDRangeKernelMutableHandleNotNull : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    bool Skip() override
+    {
+        return BasicCommandBufferTest::Skip()
+            || is_extension_available(device,
+                                      "cl_khr_command_buffer_mutable_dispatch");
+    }
+
+    cl_int Run() override
+    {
+        cl_mutable_command_khr mutable_handle;
+
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, &mutable_handle);
+
+        test_failure_error_ret(error, CL_INVALID_VALUE,
+                               "clCommandNDRangeKernelKHR should return "
+                               "CL_INVALID_VALUE",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_OPERATION if the device associated with command_queue does not
+// support CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR and kernel contains a
+// printf call.
+struct CommandNDRangeKernelNotSupportPrintf : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    bool Skip() override
+    {
+        cl_device_command_buffer_capabilities_khr capabilities;
+        cl_int error =
+            clGetDeviceInfo(device, CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR,
+                            sizeof(capabilities), &capabilities, NULL);
+        test_error(error,
+                   "Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
+
+        bool device_does_not_support_printf =
+            (capabilities & CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR)
+            == 0;
+
+        return !device_does_not_support_printf;
+    }
+
+    cl_int SetUpKernel() override
+    {
+        cl_int error = CL_SUCCESS;
+
+        const char* kernel_str =
+            R"(
+__kernel void printf_kernel() {
+    printf("Hello World\n");
+  }
+)";
+
+        error = build_program_create_kernel_helper(context, &program, &kernel,
+                                                   1, &kernel_str,
+                                                   "printf_kernel", nullptr);
+        test_error(error, "build_program_create_kernel_helper failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = init_extension_functions();
+        if (error != CL_SUCCESS)
+        {
+            return error;
+        }
+
+        if (elements <= 0)
+        {
+            return CL_INVALID_VALUE;
+        }
+        num_elements = static_cast<size_t>(elements);
+
+        error = SetUpKernel();
+        test_error(error, "SetUpKernel failed");
+
+        command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
+        test_error(error, "clCreateCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_OPERATION,
+                               "clCommandNDRangeKernelKHR should return "
+                               "CL_INVALID_OPERATION",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+};
+
+// CL_INVALID_OPERATION if the device associated with command_queue does not
+// support CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR and kernel
+// contains a kernel-enqueue call.
+struct CommandNDRangeKernelWithKernelEnqueueCall : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    bool Skip() override
+    {
+        bool has_device_enqueue = false;
+        bool cl_version_2_0_or_higher = false;
+
+        OpenCLCFeatures features;
+        get_device_cl_c_features(device, features);
+
+        const Version clc_version = get_device_latest_cl_c_version(device);
+        if (clc_version >= Version(3, 0))
+        {
+            cl_std = "-cl-std=CL3.0";
+            has_device_enqueue = features.supports__opencl_c_device_enqueue;
+            cl_version_2_0_or_higher = true;
+        }
+        else if (clc_version >= Version(2, 0) && clc_version < Version(3, 0))
+        {
+            cl_std = "-cl-std=CL2.0";
+            has_device_enqueue = features.supports__opencl_c_device_enqueue;
+            cl_version_2_0_or_higher = true;
+        }
+
+        cl_device_command_buffer_capabilities_khr capabilities;
+        cl_int error =
+            clGetDeviceInfo(device, CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR,
+                            sizeof(capabilities), &capabilities, NULL);
+        test_error(error,
+                   "Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
+
+        bool device_does_support_enqueue_call =
+            (capabilities
+             & CL_COMMAND_BUFFER_CAPABILITY_DEVICE_SIDE_ENQUEUE_KHR)
+            != 0;
+
+        if (!cl_version_2_0_or_higher || !has_device_enqueue) return true;
+        return device_does_support_enqueue_call;
+    }
+
+    cl_int SetUpKernel() override
+    {
+        cl_int error = CL_SUCCESS;
+
+        const char* kernel_str =
+            R"(
+__kernel void enqueue_call_func() {
+  }
+
+__kernel void enqueue_call_kernel() {
+queue_t def_q = get_default_queue();
+ndrange_t ndrange = ndrange_1D(1);
+enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange,
+                   ^{enqueue_call_func();});
+  }
+)";
+        std::string build_options = std::string(" ") + cl_std;
+
+        error = build_program_create_kernel_helper(
+            context, &program, &kernel, 1, &kernel_str, "enqueue_call_kernel",
+            build_options.c_str());
+        test_error(error, "build_program_create_kernel_helper failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = init_extension_functions();
+        if (error != CL_SUCCESS)
+        {
+            return error;
+        }
+
+        if (elements <= 0)
+        {
+            return CL_INVALID_VALUE;
+        }
+        num_elements = static_cast<size_t>(elements);
+
+        error = SetUpKernel();
+        test_error(error, "SetUpKernel failed");
+
+        command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
+        test_error(error, "clCreateCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+
+        test_failure_error_ret(error, CL_INVALID_OPERATION,
+                               "clCommandNDRangeKernelKHR should return "
+                               "CL_INVALID_OPERATION",
+                               TEST_FAIL);
+
+        return CL_SUCCESS;
+    }
+    std::string cl_std = "";
+};
+};
+
+int test_negative_command_ndrange_queue_not_null(cl_device_id device,
+                                                 cl_context context,
+                                                 cl_command_queue queue,
+                                                 int num_elements)
+{
+    return MakeAndRunTest<CommandNDRangeKernelQueueNotNull>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_ndrange_kernel_with_different_context(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandNDRangeKernelKernelWithDifferentContext>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_ndrange_kernel_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandNDRangeKerneSyncPointsNullOrNumZero>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_ndrange_kernel_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandNDRangeKernelInvalidCommandBuffer>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_ndrange_kernel_invalid_properties(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandNDRangeKernelInvalidProperties>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_ndrange_kernel_command_buffer_finalized(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandNDRangeKernelCommandBufferFinalized>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_ndrange_kernel_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandNDRangeKernelMutableHandleNotNull>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_ndrange_kernel_not_support_printf(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandNDRangeKernelNotSupportPrintf>(
+        device, context, queue, num_elements);
+}
+
+int test_negative_command_ndrange_kernel_with_enqueue_call(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<CommandNDRangeKernelWithKernelEnqueueCall>(
+        device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h
index 793678198f..b2bd7c6c62 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/procs.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h
@@ -153,6 +153,50 @@ extern int test_negative_finalize_command_buffer_invalid_command_buffer(
 extern int test_negative_finalize_command_buffer_not_recording_state(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
+extern int test_negative_command_buffer_command_fill_buffer_queue_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_command_fill_buffer_context_not_same(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_fill_buffer_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_fill_buffer_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_fill_buffer_finalized_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_command_fill_image_queue_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_fill_buffer_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_command_fill_image_context_not_same(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_fill_image_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_fill_image_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_fill_image_finalized_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_fill_image_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
 extern int test_negative_create_command_buffer_num_queues(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
@@ -172,6 +216,78 @@ extern int
 test_negative_create_command_buffer_device_does_not_support_out_of_order_queue(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
+extern int test_negative_command_ndrange_queue_not_null(cl_device_id device,
+                                                        cl_context context,
+                                                        cl_command_queue queue,
+                                                        int num_elements);
+extern int test_negative_command_ndrange_kernel_with_different_context(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_ndrange_kernel_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_ndrange_kernel_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_ndrange_kernel_invalid_properties(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_ndrange_kernel_command_buffer_finalized(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_ndrange_kernel_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_ndrange_kernel_not_support_printf(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_ndrange_kernel_with_enqueue_call(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_command_copy_buffer_queue_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_command_copy_buffer_different_contexts(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_copy_buffer_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_copy_buffer_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_copy_buffer_finalized_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_copy_buffer_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_command_copy_image_queue_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_command_copy_image_different_contexts(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_copy_image_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_copy_image_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_copy_image_finalized_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_copy_image_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
 extern int test_negative_get_command_buffer_info_invalid_command_buffer(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
@@ -196,6 +312,37 @@ extern int test_negative_get_command_buffer_info_context(cl_device_id device,
                                                          cl_context context,
                                                          cl_command_queue queue,
                                                          int num_elements);
+extern int test_negative_command_buffer_command_svm_queue_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_negative_command_buffer_command_svm_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_command_svm_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_command_svm_finalized_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_command_svm_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_barrier_not_null_queue(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_barrier_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_barrier_buffer_finalized(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_barrier_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_barrier_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
 extern int test_negative_enqueue_command_buffer_invalid_command_buffer(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
@@ -213,6 +360,24 @@ extern int
 test_negative_enqueue_command_buffer_num_queues_not_zero_different_while_buffer_creation(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
+extern int test_negative_command_buffer_copy_image_queue_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_copy_image_context_not_same(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_copy_image_sync_points_null_or_num_zero(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_copy_image_invalid_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_copy_image_finalized_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_negative_command_buffer_copy_image_mutable_handle_not_null(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
 extern int test_negative_enqueue_command_buffer_not_valid_queue_in_queues(
     cl_device_id device, cl_context context, cl_command_queue queue,
     int num_elements);
diff --git a/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt b/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt
index 6e02ba97e5..df136004cd 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt
@@ -15,12 +15,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 
 include_directories (${CLConform_INCLUDE_DIR})
 
-# needed by Vulkan wrapper to link
-if(WIN32)
-    list(APPEND CLConform_LIBRARIES vulkan-1 vulkan_wrapper)
-else(WIN32)
-    list(APPEND CLConform_LIBRARIES vulkan dl vulkan_wrapper)
-endif(WIN32)
+list(APPEND CLConform_LIBRARIES vulkan_wrapper)
 set(CMAKE_CXX_FLAGS "-fpermissive")
 
 include_directories("../../common/vulkan_wrapper")
diff --git a/test_conformance/extensions/cl_khr_kernel_clock/CMakeLists.txt b/test_conformance/extensions/cl_khr_kernel_clock/CMakeLists.txt
new file mode 100644
index 0000000000..066ebb65d9
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_kernel_clock/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(MODULE_NAME CL_KHR_KERNEL_CLOCK)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    kernel_clock.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/extensions/cl_khr_kernel_clock/kernel_clock.cpp b/test_conformance/extensions/cl_khr_kernel_clock/kernel_clock.cpp
new file mode 100644
index 0000000000..744083a93d
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_kernel_clock/kernel_clock.cpp
@@ -0,0 +1,195 @@
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "harness/typeWrappers.h"
+
+namespace {
+
+// write 1 to the output if the clock did not increase
+static const char *kernel_sources[2] = {
+    R"(__kernel void SampleClock(__global uint* buf)
+    {
+        ulong time1, time2;
+        time1 = clock_read_%s();
+        time2 = clock_read_%s();
+        if(time1 > time2)
+        {
+            buf[0] = 1;
+        }
+    })",
+    R"(__kernel void SampleClock(__global uint* buf)
+    {
+       uint2 time1, time2;
+       time1 = clock_read_hilo_%s();
+       time2 = clock_read_hilo_%s();
+       if(time1.hi > time2.hi || (time1.hi == time2.hi && time1.lo > 
+         time2.lo))
+       {
+            buf[0] = 1;
+       }
+    })",
+};
+
+class KernelClockTest {
+
+public:
+    KernelClockTest(cl_device_id device, cl_context context,
+                    cl_command_queue queue,
+                    cl_device_kernel_clock_capabilities_khr capability)
+        : device(device), context(context), queue(queue), capability(capability)
+    {}
+
+    bool Skip()
+    {
+        cl_device_kernel_clock_capabilities_khr capabilities;
+        cl_int error =
+            clGetDeviceInfo(device, CL_DEVICE_KERNEL_CLOCK_CAPABILITIES_KHR,
+                            sizeof(cl_device_kernel_clock_capabilities_khr),
+                            &capabilities, NULL);
+        test_error(error,
+                   "Unable to query "
+                   "CL_DEVICE_KERNEL_CLOCK_CAPABILITIES_KHR");
+
+        // Skip if capability is not supported
+        return capability != (capabilities & capability);
+    }
+
+    cl_int RunTest()
+    {
+        size_t global_size = 1;
+        cl_uint buf = 0;
+        char kernel_src[512];
+        const char *ptr;
+        cl_int error;
+
+        // 2 built-ins for each scope
+        for (size_t i = 0; i < 2; i++)
+        {
+            buf = 0;
+            clProgramWrapper program;
+            clKernelWrapper kernel;
+            clMemWrapper out_mem;
+
+            if (i == 0 && !gHasLong)
+            {
+                log_info("The device does not support ulong. Testing hilo "
+                         "built-ins only\n");
+                continue;
+            }
+
+            switch (capability)
+            {
+                case CL_DEVICE_KERNEL_CLOCK_SCOPE_DEVICE_KHR: {
+                    sprintf(kernel_src, kernel_sources[i], "device", "device");
+                    break;
+                }
+                case CL_DEVICE_KERNEL_CLOCK_SCOPE_WORK_GROUP_KHR: {
+                    sprintf(kernel_src, kernel_sources[i], "work_group",
+                            "work_group");
+                    break;
+                }
+                case CL_DEVICE_KERNEL_CLOCK_SCOPE_SUB_GROUP_KHR: {
+                    sprintf(kernel_src, kernel_sources[i], "sub_group",
+                            "sub_group");
+                    break;
+                }
+            }
+
+            ptr = kernel_src;
+
+            error = create_single_kernel_helper(context, &program, &kernel, 1,
+                                                &ptr, "SampleClock");
+            test_error(error, "Failed to create program with source");
+
+            out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                                     sizeof(cl_uint), nullptr, &error);
+            test_error(error, "clCreateBuffer failed");
+
+            error = clSetKernelArg(kernel, 0, sizeof(out_mem), &out_mem);
+            test_error(error, "clSetKernelArg failed");
+
+            error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global_size,
+                                           NULL, 0, NULL, NULL);
+            test_error(error, "clNDRangeKernel failed");
+
+            error = clEnqueueReadBuffer(queue, out_mem, CL_BLOCKING, 0,
+                                        sizeof(cl_uint), &buf, 0, NULL, NULL);
+            test_error(error, "clEnqueueReadBuffer failed");
+
+            if (buf == 1)
+            {
+                log_error(
+                    "Sampling the clock returned bad values, time1 > time2.\n");
+                return TEST_FAIL;
+            }
+        }
+
+        return CL_SUCCESS;
+    }
+
+private:
+    cl_device_id device;
+    cl_context context;
+    cl_command_queue queue;
+    cl_device_kernel_clock_capabilities_khr capability;
+};
+
+int MakeAndRunTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue,
+                   cl_device_kernel_clock_capabilities_khr capability)
+{
+    if (!is_extension_available(device, "cl_khr_kernel_clock"))
+    {
+        log_info(
+            "The device does not support the cl_khr_kernel_clock extension.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    KernelClockTest test_fixture =
+        KernelClockTest(device, context, queue, capability);
+
+    if (test_fixture.Skip())
+    {
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    cl_int error = test_fixture.RunTest();
+    test_error_ret(error, "Test Failed", TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+}
+
+int test_device_scope(cl_device_id device, cl_context context,
+                      cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest(device, context, queue,
+                          CL_DEVICE_KERNEL_CLOCK_SCOPE_DEVICE_KHR);
+}
+
+int test_workgroup_scope(cl_device_id device, cl_context context,
+                         cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest(device, context, queue,
+                          CL_DEVICE_KERNEL_CLOCK_SCOPE_WORK_GROUP_KHR);
+}
+
+int test_subgroup_scope(cl_device_id device, cl_context context,
+                        cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest(device, context, queue,
+                          CL_DEVICE_KERNEL_CLOCK_SCOPE_SUB_GROUP_KHR);
+}
diff --git a/test_conformance/extensions/cl_khr_kernel_clock/main.cpp b/test_conformance/extensions/cl_khr_kernel_clock/main.cpp
new file mode 100644
index 0000000000..8a2d98554b
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_kernel_clock/main.cpp
@@ -0,0 +1,29 @@
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "harness/testHarness.h"
+
+test_definition test_list[] = {
+    ADD_TEST(device_scope),
+    ADD_TEST(workgroup_scope),
+    ADD_TEST(subgroup_scope),
+};
+
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness(argc, argv, ARRAY_SIZE(test_list), test_list, false,
+                          0);
+}
diff --git a/test_conformance/extensions/cl_khr_kernel_clock/procs.h b/test_conformance/extensions/cl_khr_kernel_clock/procs.h
new file mode 100644
index 0000000000..a82564bc81
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_kernel_clock/procs.h
@@ -0,0 +1,27 @@
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef CL_KHR_KERNEL_CLOCK_PROCS_H
+#define CL_KHR_KERNEL_CLOCK_PROCS_H
+
+#include <CL/cl.h>
+
+int test_device_scope(cl_device_id device, cl_context context,
+                      cl_command_queue queue, int num_elements);
+int test_workgroup_scope(cl_device_id device, cl_context context,
+                         cl_command_queue queue, int num_elements);
+int test_subgroup_scope(cl_device_id device, cl_context context,
+                        cl_command_queue queue, int num_elements);
+
+#endif /*CL_KHR_KERNEL_CLOCK_PROCS_H*/
diff --git a/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt b/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt
index fcdbf6b3e0..5ebda6f2f8 100644
--- a/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt
@@ -3,6 +3,9 @@ set(MODULE_NAME CL_KHR_SEMAPHORE)
 set(${MODULE_NAME}_SOURCES
          main.cpp
          test_semaphores.cpp
+         test_semaphores_negative_release_retain.cpp
+         test_semaphores_negative_getinfo.cpp
+         test_semaphores_negative_wait.cpp
          semaphore_base.h
 )
 
diff --git a/test_conformance/extensions/cl_khr_semaphore/main.cpp b/test_conformance/extensions/cl_khr_semaphore/main.cpp
index 0ae7206a0d..7215624580 100644
--- a/test_conformance/extensions/cl_khr_semaphore/main.cpp
+++ b/test_conformance/extensions/cl_khr_semaphore/main.cpp
@@ -35,6 +35,20 @@ test_definition test_list[] = {
     ADD_TEST_VERSION(semaphores_multi_wait, Version(1, 2)),
     ADD_TEST_VERSION(semaphores_queries, Version(1, 2)),
     ADD_TEST_VERSION(semaphores_import_export_fd, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_get_info_invalid_semaphore,
+                     Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_get_info_invalid_value, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_wait_invalid_command_queue,
+                     Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_wait_invalid_value, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_wait_invalid_semaphore, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_wait_invalid_context, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_wait_invalid_event_wait_list,
+                     Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_wait_invalid_event_status,
+                     Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_release, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_negative_retain, Version(1, 2)),
 };
 
 const int test_num = ARRAY_SIZE(test_list);
diff --git a/test_conformance/extensions/cl_khr_semaphore/procs.h b/test_conformance/extensions/cl_khr_semaphore/procs.h
index f7c1aaa301..cbe3993db5 100644
--- a/test_conformance/extensions/cl_khr_semaphore/procs.h
+++ b/test_conformance/extensions/cl_khr_semaphore/procs.h
@@ -45,3 +45,37 @@ extern int test_semaphores_import_export_fd(cl_device_id deviceID,
                                             cl_context context,
                                             cl_command_queue queue,
                                             int num_elements);
+extern int test_semaphores_negative_get_info_invalid_semaphore(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_semaphores_negative_get_info_invalid_value(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_semaphores_negative_wait_invalid_command_queue(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_semaphores_negative_wait_invalid_value(cl_device_id device,
+                                                       cl_context context,
+                                                       cl_command_queue queue,
+                                                       int num_elements);
+extern int test_semaphores_negative_wait_invalid_semaphore(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_semaphores_negative_wait_invalid_context(cl_device_id device,
+                                                         cl_context context,
+                                                         cl_command_queue queue,
+                                                         int num_elements);
+extern int test_semaphores_negative_wait_invalid_event_wait_list(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_semaphores_negative_wait_invalid_event_status(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_semaphores_negative_release(cl_device_id device,
+                                            cl_context context,
+                                            cl_command_queue queue,
+                                            int num_elements);
+extern int test_semaphores_negative_retain(cl_device_id device,
+                                           cl_context context,
+                                           cl_command_queue queue,
+                                           int num_elements);
diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
index 6832e96a94..1d07cf1351 100644
--- a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
+++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
@@ -20,6 +20,7 @@
 #include <system_error>
 #include <thread>
 #include <chrono>
+#include <vector>
 
 #include "semaphore_base.h"
 
diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores_negative_getinfo.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores_negative_getinfo.cpp
new file mode 100644
index 0000000000..0cf8bb0faf
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores_negative_getinfo.cpp
@@ -0,0 +1,130 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "semaphore_base.h"
+
+#include "harness/errorHelpers.h"
+
+namespace {
+
+// sema_object is not a valid semaphore.
+
+struct GetInfoInvalidSemaphore : public SemaphoreTestBase
+{
+    GetInfoInvalidSemaphore(cl_device_id device, cl_context context,
+                            cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Wait semaphore
+        cl_semaphore_type_khr type = 0;
+        size_t ret_size = 0;
+        cl_int err = clGetSemaphoreInfoKHR(nullptr, CL_SEMAPHORE_TYPE_KHR,
+                                           sizeof(cl_semaphore_type_khr), &type,
+                                           &ret_size);
+        test_failure_error(err, CL_INVALID_SEMAPHORE_KHR,
+                           "Unexpected clGetSemaphoreInfoKHR return");
+
+        return CL_SUCCESS;
+    }
+};
+
+// 1) param_name is not one of the attribute defined in the Semaphore Queries
+// table
+
+// 2) param_value_size is less than the size of Return Type of the corresponding
+// param_name attribute as defined in the Semaphore Queries table.
+
+struct GetInfoInvalidValue : public SemaphoreTestBase
+{
+    GetInfoInvalidValue(cl_device_id device, cl_context context,
+                        cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Create semaphore
+        cl_semaphore_properties_khr sema_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR),
+            (cl_semaphore_properties_khr)device,
+            CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR,
+            0
+        };
+
+        cl_int err = CL_SUCCESS;
+        semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+        test_error(err, "Could not create semaphore");
+
+        // (1)
+        cl_semaphore_info_khr param_name = ~0;
+        err = clGetSemaphoreInfoKHR(semaphore, param_name, 0, nullptr, nullptr);
+        test_failure_error(err, CL_INVALID_VALUE,
+                           "Unexpected clGetSemaphoreInfoKHR return");
+
+        // (2)
+        size_t size = 0;
+        err = clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_PROPERTIES_KHR, 0,
+                                    nullptr, &size);
+        test_error(err, "Could not query semaphore");
+
+        // make sure that first test provides too small param size
+        if (size != sizeof(sema_props))
+            test_fail("Error: expected size %d, returned %d",
+                      sizeof(sema_props), size);
+
+        // first test with non-zero property size but not enough
+        cl_semaphore_properties_khr ret_props = 0;
+        err = clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_PROPERTIES_KHR,
+                                    sizeof(ret_props), &ret_props, nullptr);
+        test_failure_error(err, CL_INVALID_VALUE,
+                           "Unexpected clGetSemaphoreInfoKHR return");
+
+        // second test with zero property size
+        cl_semaphore_type_khr type = 0;
+        err = clGetSemaphoreInfoKHR(semaphore, CL_SEMAPHORE_TYPE_KHR, 0, &type,
+                                    nullptr);
+        test_failure_error(err, CL_INVALID_VALUE,
+                           "Unexpected clGetSemaphoreInfoKHR return");
+
+        return CL_SUCCESS;
+    }
+};
+
+}
+
+int test_semaphores_negative_get_info_invalid_semaphore(cl_device_id device,
+                                                        cl_context context,
+                                                        cl_command_queue queue,
+                                                        int num_elements)
+{
+    return MakeAndRunTest<GetInfoInvalidSemaphore>(device, context, queue);
+}
+
+int test_semaphores_negative_get_info_invalid_value(cl_device_id device,
+                                                    cl_context context,
+                                                    cl_command_queue queue,
+                                                    int num_elements)
+{
+    return MakeAndRunTest<GetInfoInvalidValue>(device, context, queue);
+}
diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores_negative_release_retain.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores_negative_release_retain.cpp
new file mode 100644
index 0000000000..ea6139de28
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores_negative_release_retain.cpp
@@ -0,0 +1,89 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "semaphore_base.h"
+
+#include "harness/errorHelpers.h"
+#include <chrono>
+#include <system_error>
+#include <thread>
+#include <vector>
+
+namespace {
+
+// sema_object is not a valid semaphore object
+
+struct ReleaseInvalidSemaphore : public SemaphoreTestBase
+{
+    ReleaseInvalidSemaphore(cl_device_id device, cl_context context,
+                            cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Release invalid semaphore
+        cl_int err = CL_SUCCESS;
+        err = clReleaseSemaphoreKHR(nullptr);
+        if (err != CL_INVALID_SEMAPHORE_KHR)
+        {
+            log_error("Unexpected clReleaseSemaphoreKHR result, expected "
+                      "CL_INVALID_SEMAPHORE_KHR, get %s\n",
+                      IGetErrorString(err));
+            return TEST_FAIL;
+        }
+
+        return TEST_PASS;
+    }
+};
+
+struct RetainInvalidSemaphore : public SemaphoreTestBase
+{
+    RetainInvalidSemaphore(cl_device_id device, cl_context context,
+                           cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Release invalid semaphore
+        cl_int err = CL_SUCCESS;
+        err = clRetainSemaphoreKHR(nullptr);
+        if (err != CL_INVALID_SEMAPHORE_KHR)
+        {
+            log_error("Unexpected clRetainSemaphoreKHR result, expected "
+                      "CL_INVALID_SEMAPHORE_KHR, get %s\n",
+                      IGetErrorString(err));
+            return TEST_FAIL;
+        }
+
+        return TEST_PASS;
+    }
+};
+
+}
+
+int test_semaphores_negative_release(cl_device_id device, cl_context context,
+                                     cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<ReleaseInvalidSemaphore>(device, context, queue);
+}
+
+int test_semaphores_negative_retain(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<RetainInvalidSemaphore>(device, context, queue);
+}
diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores_negative_wait.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores_negative_wait.cpp
new file mode 100644
index 0000000000..dab28d96cc
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores_negative_wait.cpp
@@ -0,0 +1,395 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "semaphore_base.h"
+
+#include "harness/errorHelpers.h"
+#include <chrono>
+#include <system_error>
+#include <thread>
+#include <vector>
+
+namespace {
+
+// the device associated with command_queue is not same as one of the devices
+// specified by CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR at the time of creating one
+// or more of sema_objects.
+
+struct WaitInvalidCommandQueue : public SemaphoreTestBase
+{
+    WaitInvalidCommandQueue(cl_device_id device, cl_context context,
+                            cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Create semaphore
+        cl_semaphore_properties_khr sema_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_DEVICE_HANDLE_LIST_KHR),
+            (cl_semaphore_properties_khr)device,
+            CL_SEMAPHORE_DEVICE_HANDLE_LIST_END_KHR,
+            0
+        };
+
+        cl_int err = CL_SUCCESS;
+        semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+        test_error(err, "Could not create semaphore");
+
+        // find other device
+        cl_platform_id platform_id = 0;
+        // find out what platform the harness is using.
+        err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
+                              sizeof(cl_platform_id), &platform_id, nullptr);
+        test_error(err, "clGetDeviceInfo failed");
+
+        cl_uint num_platforms = 0;
+        err = clGetPlatformIDs(16, nullptr, &num_platforms);
+        test_error(err, "clGetPlatformIDs failed");
+
+        std::vector<cl_platform_id> platforms(num_platforms);
+
+        err = clGetPlatformIDs(num_platforms, platforms.data(), &num_platforms);
+        test_error(err, "clGetPlatformIDs failed");
+
+        cl_device_id device_sec = nullptr;
+        cl_uint num_devices = 0;
+        for (int p = 0; p < (int)num_platforms; p++)
+        {
+            if (platform_id == platforms[p]) continue;
+
+            err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, 0, nullptr,
+                                 &num_devices);
+            test_error(err, "clGetDeviceIDs failed");
+
+            std::vector<cl_device_id> devices(num_devices);
+            err = clGetDeviceIDs(platforms[p], CL_DEVICE_TYPE_ALL, num_devices,
+                                 devices.data(), nullptr);
+            test_error(err, "clGetDeviceIDs failed");
+
+            device_sec = devices.front();
+            break;
+        }
+
+        if (device_sec == nullptr)
+        {
+            log_info("Can't find needed resources. Skipping the test.\n");
+            return TEST_SKIPPED_ITSELF;
+        }
+
+        // Create secondary context
+        clContextWrapper context_sec =
+            clCreateContext(0, 1, &device_sec, nullptr, nullptr, &err);
+        test_error(err, "Failed to create context");
+
+        // Create secondary queue
+        clCommandQueueWrapper queue_sec =
+            clCreateCommandQueue(context_sec, device_sec, 0, &err);
+        test_error(err, "Could not create command queue");
+
+        // Signal semaphore
+        err = clEnqueueSignalSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
+                                           nullptr, nullptr);
+        test_error(err, "Could not signal semaphore");
+
+        // Wait semaphore
+        err = clEnqueueWaitSemaphoresKHR(queue_sec, 1, semaphore, nullptr, 0,
+                                         nullptr, nullptr);
+        test_failure_error(err, CL_INVALID_COMMAND_QUEUE,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        return TEST_PASS;
+    }
+};
+
+
+// num_sema_objects is 0.
+
+struct WaitInvalidValue : public SemaphoreTestBase
+{
+    WaitInvalidValue(cl_device_id device, cl_context context,
+                     cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Wait semaphore
+        cl_int err = CL_SUCCESS;
+        err = clEnqueueWaitSemaphoresKHR(queue, 0, semaphore, nullptr, 0,
+                                         nullptr, nullptr);
+        test_failure_error(err, CL_INVALID_VALUE,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        return CL_SUCCESS;
+    }
+};
+
+// any of the semaphore objects specified by sema_objects is not valid.
+
+struct WaitInvalidSemaphore : public SemaphoreTestBase
+{
+    WaitInvalidSemaphore(cl_device_id device, cl_context context,
+                         cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Wait semaphore
+        cl_semaphore_khr sema_objects[] = { nullptr, nullptr, nullptr };
+        cl_int err = CL_SUCCESS;
+        err = clEnqueueWaitSemaphoresKHR(
+            queue, sizeof(sema_objects) / sizeof(sema_objects[0]), sema_objects,
+            nullptr, 0, nullptr, nullptr);
+        test_failure_error(err, CL_INVALID_SEMAPHORE_KHR,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        return CL_SUCCESS;
+    }
+};
+
+// 1) the context associated with command_queue and any of the semaphore objects
+// in sema_objects are not the same, or
+// 2) the context associated with command_queue and that associated with events
+// in event_wait_list are not the same.
+
+struct WaitInvalidContext : public SemaphoreTestBase
+{
+    WaitInvalidContext(cl_device_id device, cl_context context,
+                       cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Create semaphore
+        cl_semaphore_properties_khr sema_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            0
+        };
+
+        cl_int err = CL_SUCCESS;
+        semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+        test_error(err, "Could not create semaphore");
+
+        // Create secondary context
+        clContextWrapper context_sec =
+            clCreateContext(0, 1, &device, nullptr, nullptr, &err);
+        test_error(err, "Failed to create context");
+
+        // Create secondary queue
+        clCommandQueueWrapper queue_sec =
+            clCreateCommandQueue(context_sec, device, 0, &err);
+        test_error(err, "Could not create command queue");
+
+        // Signal semaphore
+        err = clEnqueueSignalSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
+                                           nullptr, nullptr);
+        test_error(err, "Could not signal semaphore");
+
+        // (1) Wait semaphore
+        err = clEnqueueWaitSemaphoresKHR(queue_sec, 1, semaphore, nullptr, 0,
+                                         nullptr, nullptr);
+        test_failure_error(err, CL_INVALID_CONTEXT,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        // Create user event
+        clEventWrapper user_event = clCreateUserEvent(context_sec, &err);
+        test_error(err, "Could not create user event");
+
+        // (2) Wait semaphore
+        err = clEnqueueWaitSemaphoresKHR(queue, 1, semaphore, nullptr, 1,
+                                         &user_event, nullptr);
+
+        cl_int signal_error = clSetUserEventStatus(user_event, CL_COMPLETE);
+        test_error(signal_error, "clSetUserEventStatus failed");
+
+        test_failure_error(err, CL_INVALID_CONTEXT,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        return TEST_PASS;
+    }
+};
+
+// (1) event_wait_list is NULL and num_events_in_wait_list is not 0, or
+// (2) event_wait_list is not NULL and num_events_in_wait_list is 0, or
+// (3) event objects in event_wait_list are not valid events.
+
+struct WaitInvalidEventWaitList : public SemaphoreTestBase
+{
+    WaitInvalidEventWaitList(cl_device_id device, cl_context context,
+                             cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Create semaphore
+        cl_semaphore_properties_khr sema_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            0
+        };
+
+        cl_int err = CL_SUCCESS;
+        semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+        test_error(err, "Could not create semaphore");
+
+
+        // Signal semaphore
+        err = clEnqueueSignalSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
+                                           nullptr, nullptr);
+        test_error(err, "Could not signal semaphore");
+
+        // (1) Wait semaphore
+        err = clEnqueueWaitSemaphoresKHR(queue, 1, semaphore, nullptr, 1,
+                                         nullptr, nullptr);
+        test_failure_error(err, CL_INVALID_EVENT_WAIT_LIST,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        // Create user event
+        clEventWrapper user_event = clCreateUserEvent(context, &err);
+        test_error(err, "Could not create user event");
+
+        // (2) Wait semaphore
+        err = clEnqueueWaitSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
+                                         &user_event, nullptr);
+
+        cl_int signal_error = clSetUserEventStatus(user_event, CL_COMPLETE);
+        test_error(signal_error, "clSetUserEventStatus failed");
+
+        test_failure_error(err, CL_INVALID_EVENT_WAIT_LIST,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        // (3) Wait semaphore
+        cl_event wait_list[] = { nullptr, nullptr, nullptr };
+        err = clEnqueueWaitSemaphoresKHR(
+            queue, 1, semaphore, nullptr,
+            sizeof(wait_list) / sizeof(wait_list[0]), wait_list, nullptr);
+        test_failure_error(err, CL_INVALID_EVENT_WAIT_LIST,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        return CL_SUCCESS;
+    }
+};
+
+// the execution status of any of the events in event_wait_list is a negative
+// integer value.
+
+struct WaitInvalidEventStatus : public SemaphoreTestBase
+{
+    WaitInvalidEventStatus(cl_device_id device, cl_context context,
+                           cl_command_queue queue)
+        : SemaphoreTestBase(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        // Create semaphore
+        cl_semaphore_properties_khr sema_props[] = {
+            static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+            static_cast<cl_semaphore_properties_khr>(
+                CL_SEMAPHORE_TYPE_BINARY_KHR),
+            0
+        };
+
+        cl_int err = CL_SUCCESS;
+        semaphore =
+            clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+        test_error(err, "Could not create semaphore");
+
+        // Signal semaphore
+        err = clEnqueueSignalSemaphoresKHR(queue, 1, semaphore, nullptr, 0,
+                                           nullptr, nullptr);
+        test_error(err, "Could not signal semaphore");
+
+        // Create user event
+        clEventWrapper user_event = clCreateUserEvent(context, &err);
+        test_error(err, "Could not create user event");
+
+        // Now release the user event, which will allow our actual action to run
+        err = clSetUserEventStatus(user_event, -1);
+        test_error(err, "Unable to set event status");
+
+        // Wait semaphore
+        err = clEnqueueWaitSemaphoresKHR(queue, 1, semaphore, nullptr, 1,
+                                         &user_event, nullptr);
+        test_failure_error(err, CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST,
+                           "Unexpected clEnqueueWaitSemaphoresKHR return");
+
+        return CL_SUCCESS;
+    }
+};
+
+}
+
+int test_semaphores_negative_wait_invalid_command_queue(cl_device_id device,
+                                                        cl_context context,
+                                                        cl_command_queue queue,
+                                                        int num_elements)
+{
+    return MakeAndRunTest<WaitInvalidCommandQueue>(device, context, queue);
+}
+
+int test_semaphores_negative_wait_invalid_value(cl_device_id device,
+                                                cl_context context,
+                                                cl_command_queue queue,
+                                                int num_elements)
+{
+    return MakeAndRunTest<WaitInvalidValue>(device, context, queue);
+}
+
+int test_semaphores_negative_wait_invalid_semaphore(cl_device_id device,
+                                                    cl_context context,
+                                                    cl_command_queue queue,
+                                                    int num_elements)
+{
+    return MakeAndRunTest<WaitInvalidSemaphore>(device, context, queue);
+}
+
+int test_semaphores_negative_wait_invalid_context(cl_device_id device,
+                                                  cl_context context,
+                                                  cl_command_queue queue,
+                                                  int num_elements)
+{
+    return MakeAndRunTest<WaitInvalidContext>(device, context, queue);
+}
+
+int test_semaphores_negative_wait_invalid_event_wait_list(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements)
+{
+    return MakeAndRunTest<WaitInvalidEventWaitList>(device, context, queue);
+}
+
+int test_semaphores_negative_wait_invalid_event_status(cl_device_id device,
+                                                       cl_context context,
+                                                       cl_command_queue queue,
+                                                       int num_elements)
+{
+    return MakeAndRunTest<WaitInvalidEventStatus>(device, context, queue);
+}
diff --git a/test_conformance/generic_address_space/CMakeLists.txt b/test_conformance/generic_address_space/CMakeLists.txt
index e74bcf4a57..951c5ab4ed 100644
--- a/test_conformance/generic_address_space/CMakeLists.txt
+++ b/test_conformance/generic_address_space/CMakeLists.txt
@@ -2,6 +2,7 @@ set(MODULE_NAME GENERIC_ADDRESS_SPACE)
 
 set(${MODULE_NAME}_SOURCES
     advanced_tests.cpp
+    atomic_tests.cpp
     basic_tests.cpp
     main.cpp
     stress_tests.cpp
diff --git a/test_conformance/generic_address_space/atomic_tests.cpp b/test_conformance/generic_address_space/atomic_tests.cpp
new file mode 100644
index 0000000000..a24c6ae2fc
--- /dev/null
+++ b/test_conformance/generic_address_space/atomic_tests.cpp
@@ -0,0 +1,224 @@
+//
+// Copyright (c) 2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#ifdef __APPLE__
+#include <OpenCL/opencl.h>
+#else
+#include <CL/cl.h>
+#endif
+
+#include "errorHelpers.h"
+#include "typeWrappers.h"
+
+namespace {
+// In this source, each workgroup will generate one value.
+// Every other workgroup will use either a global or local
+// pointer on an atomic operation.
+const char* KernelSourceInvariant = R"OpenCLC(
+kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr) {
+    int gid = get_global_id(0);
+    int tid = get_local_id(0);
+    int wgid = get_group_id(0);
+    int wgsize = get_local_size(0);
+
+    if (tid == 0) atomic_store(localPtr, 0);
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // Initialise the generic pointer to
+    // the global.
+    generic atomic_int* ptr = globalPtr + wgid;
+
+    // In a workgroup-invariant way, select a localPtr instead.
+    if ((wgid % 2) == 0)
+        ptr = localPtr;
+
+    int inc = atomic_fetch_add(ptr, 1);
+
+    // In the cases where the local memory ptr was used,
+    // save off the final value.
+    if ((wgid % 2) == 0 && inc == (wgsize-1))
+        atomic_store(&globalPtr[wgid], inc);
+}
+)OpenCLC";
+
+// In this source, each workgroup will generate two values.
+// Every other work item in the workgroup will select either
+// a local or global memory pointer and perform an atomic
+// operation on that.
+const char* KernelSourceVariant = R"OpenCLC(
+kernel void testKernel(global atomic_int* globalPtr, local atomic_int* localPtr) {
+    int gid = get_global_id(0);
+    int tid = get_local_id(0);
+    int wgid = get_group_id(0);
+    int wgsize = get_local_size(0);
+
+    if (tid == 0) atomic_store(localPtr, 0);
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // Initialise the generic pointer to
+    // the global.  Two values are written per WG.
+    generic atomic_int* ptr = globalPtr + (wgid * 2);
+
+    // In a workgroup-invariant way, select a localPtr instead.
+    if ((tid % 2) == 0)
+        ptr = localPtr;
+
+    atomic_fetch_add(ptr, 1);
+
+    barrier(CLK_LOCAL_MEM_FENCE);
+
+    // In the cases where the local memory ptr was used,
+    // save off the final value.
+    if (tid == 0)
+        atomic_store(&globalPtr[(wgid * 2) + 1], atomic_load(localPtr));
+}
+)OpenCLC";
+}
+
+int test_generic_atomics_invariant(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int)
+{
+    const auto version = get_device_cl_version(deviceID);
+
+    if (version < Version(2, 0)) return TEST_SKIPPED_ITSELF;
+
+    cl_int err = CL_SUCCESS;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &KernelSourceInvariant, "testKernel");
+    test_error(err, "Failed to create test kernel");
+
+    size_t wgSize, retSize;
+    // Attempt to find the simd unit size for the device.
+    err = clGetKernelWorkGroupInfo(kernel, deviceID,
+                                   CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
+                                   sizeof(wgSize), &wgSize, &retSize);
+    test_error(err, "clGetKernelWorkGroupInfo failed");
+
+    // How many workgroups to run for the test.
+    const int numWGs = 2;
+    const size_t bufferSize = numWGs * sizeof(cl_uint);
+    clMemWrapper buffer =
+        clCreateBuffer(context, CL_MEM_WRITE_ONLY, bufferSize, nullptr, &err);
+    test_error(err, "clCreateBuffer failed");
+    const cl_int zero = 0;
+    err = clEnqueueFillBuffer(queue, buffer, &zero, sizeof(zero), 0, bufferSize,
+                              0, nullptr, nullptr);
+    test_error(err, "clEnqueueFillBuffer failed");
+
+    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer);
+    test_error(err, "clSetKernelArg failed");
+    err = clSetKernelArg(kernel, 1, bufferSize, nullptr);
+    test_error(err, "clSetKernelArg failed");
+
+    const size_t globalSize = wgSize * numWGs;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &globalSize,
+                                 &wgSize, 0, nullptr, nullptr);
+    test_error(err, "clEnqueueNDRangeKernel failed");
+
+    std::vector<cl_int> results(numWGs);
+    err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, bufferSize,
+                              results.data(), 0, nullptr, nullptr);
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    clFinish(queue);
+
+    for (size_t i = 0; i < numWGs; ++i)
+    {
+        const cl_int expected = ((i % 2) == 0) ? wgSize - 1 : wgSize;
+        if (results[i] != expected)
+        {
+            log_error("Verification on device failed at index %zu\n", i);
+            return TEST_FAIL;
+        }
+    }
+
+    return CL_SUCCESS;
+}
+
+int test_generic_atomics_variant(cl_device_id deviceID, cl_context context,
+                                 cl_command_queue queue, int)
+{
+    const auto version = get_device_cl_version(deviceID);
+
+    if (version < Version(2, 0)) return TEST_SKIPPED_ITSELF;
+
+    cl_int err = CL_SUCCESS;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &KernelSourceVariant, "testKernel");
+    test_error(err, "Failed to create test kernel");
+
+    size_t wgSize, retSize;
+    // Attempt to find the simd unit size for the device.
+    err = clGetKernelWorkGroupInfo(kernel, deviceID,
+                                   CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
+                                   sizeof(wgSize), &wgSize, &retSize);
+    test_error(err, "clGetKernelWorkGroupInfo failed");
+
+    // How many workgroups to run for the test.
+    const int numWGs = 2;
+    const size_t bufferSize = numWGs * sizeof(cl_uint) * 2;
+    clMemWrapper buffer =
+        clCreateBuffer(context, CL_MEM_WRITE_ONLY, bufferSize, nullptr, &err);
+    test_error(err, "clCreateBuffer failed");
+    const cl_int zero = 0;
+    err = clEnqueueFillBuffer(queue, buffer, &zero, sizeof(zero), 0, bufferSize,
+                              0, nullptr, nullptr);
+    test_error(err, "clEnqueueFillBuffer failed");
+
+    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer);
+    test_error(err, "clSetKernelArg failed");
+    err = clSetKernelArg(kernel, 1, bufferSize, nullptr);
+    test_error(err, "clSetKernelArg failed");
+
+    const size_t globalSize = wgSize * numWGs;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, &globalSize,
+                                 &wgSize, 0, nullptr, nullptr);
+    test_error(err, "clEnqueueNDRangeKernel failed");
+
+    std::vector<cl_int> results(numWGs * 2);
+    err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, bufferSize,
+                              results.data(), 0, nullptr, nullptr);
+    test_error(err, "clEnqueueReadBuffer failed");
+
+    clFinish(queue);
+
+    const cl_int expected = wgSize / 2;
+    for (size_t i = 0; i < (numWGs * 2); i += 2)
+    {
+        if (results[i] != expected)
+        {
+            log_error("Verification on device failed at index %zu\n", i);
+            return TEST_FAIL;
+        }
+        if (results[i + 1] != expected)
+        {
+            const size_t index = i + 1;
+            log_error("Verification on device failed at index %zu\n", index);
+            return TEST_FAIL;
+        }
+    }
+
+    return CL_SUCCESS;
+}
diff --git a/test_conformance/generic_address_space/main.cpp b/test_conformance/generic_address_space/main.cpp
index 0114758390..a7897367ba 100644
--- a/test_conformance/generic_address_space/main.cpp
+++ b/test_conformance/generic_address_space/main.cpp
@@ -40,31 +40,39 @@ extern int test_generic_advanced_casting(cl_device_id deviceID, cl_context conte
 extern int test_generic_ptr_to_host_mem(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_generic_ptr_to_host_mem_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_max_number_of_params(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+// atomic tests
+int test_generic_atomics_invariant(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements);
+int test_generic_atomics_variant(cl_device_id deviceID, cl_context context,
+                                 cl_command_queue queue, int num_elements);
 
 test_definition test_list[] = {
     // basic tests
-    ADD_TEST( function_get_fence ),
-    ADD_TEST( function_to_address_space ),
-    ADD_TEST( variable_get_fence ),
-    ADD_TEST( variable_to_address_space ),
-    ADD_TEST( casting ),
-    ADD_TEST( conditional_casting ),
-    ADD_TEST( chain_casting ),
-    ADD_TEST( ternary_operator_casting ),
-    ADD_TEST( language_struct ),
-    ADD_TEST( language_union ),
-    ADD_TEST( multiple_calls_same_function ),
-    ADD_TEST( compare_pointers ),
+    ADD_TEST(function_get_fence),
+    ADD_TEST(function_to_address_space),
+    ADD_TEST(variable_get_fence),
+    ADD_TEST(variable_to_address_space),
+    ADD_TEST(casting),
+    ADD_TEST(conditional_casting),
+    ADD_TEST(chain_casting),
+    ADD_TEST(ternary_operator_casting),
+    ADD_TEST(language_struct),
+    ADD_TEST(language_union),
+    ADD_TEST(multiple_calls_same_function),
+    ADD_TEST(compare_pointers),
     // advanced tests
-    ADD_TEST( library_function ),
-    ADD_TEST( generic_variable_volatile ),
-    ADD_TEST( generic_variable_const ),
-    ADD_TEST( generic_variable_gentype ),
-    ADD_TEST( builtin_functions ),
-    ADD_TEST( generic_advanced_casting ),
-    ADD_TEST( generic_ptr_to_host_mem ),
-    ADD_TEST( generic_ptr_to_host_mem_svm ),
-    ADD_TEST( max_number_of_params ),
+    ADD_TEST(library_function),
+    ADD_TEST(generic_variable_volatile),
+    ADD_TEST(generic_variable_const),
+    ADD_TEST(generic_variable_gentype),
+    ADD_TEST(builtin_functions),
+    ADD_TEST(generic_advanced_casting),
+    ADD_TEST(generic_ptr_to_host_mem),
+    ADD_TEST(generic_ptr_to_host_mem_svm),
+    ADD_TEST(max_number_of_params),
+    // atomic tests
+    ADD_TEST(generic_atomics_invariant),
+    ADD_TEST(generic_atomics_variant),
 };
 
 const int test_num = ARRAY_SIZE( test_list );
diff --git a/test_conformance/gl/test_buffers.cpp b/test_conformance/gl/test_buffers.cpp
index c61610d090..73701fb018 100644
--- a/test_conformance/gl/test_buffers.cpp
+++ b/test_conformance/gl/test_buffers.cpp
@@ -126,15 +126,10 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue,
     clProgramWrapper program;
     clKernelWrapper kernel;
     clMemWrapper streams[3];
-    size_t dataSize = numElements * 16 * sizeof(cl_long);
-#if !(defined(_WIN32) && defined(_MSC_VER))
-    cl_long inData[numElements * 16], outDataCL[numElements * 16],
-        outDataGL[numElements * 16];
-#else
-    cl_long *inData = (cl_long *)_malloca(dataSize);
-    cl_long *outDataCL = (cl_long *)_malloca(dataSize);
-    cl_long *outDataGL = (cl_long *)_malloca(dataSize);
-#endif
+    size_t dataSize = numElements * 16;
+    std::vector<cl_long> inData(dataSize), outDataCL(dataSize),
+        outDataGL(dataSize);
+
     glBufferWrapper inGLBuffer, outGLBuffer;
     int i;
     size_t bufferSize;
@@ -168,21 +163,19 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue,
     bufferSize = numElements * vecSize * get_explicit_type_size(vecType);
 
     /* Generate some almost-random input data */
-    gen_input_data(vecType, vecSize * numElements, d, inData);
-    memset(outDataCL, 0, dataSize);
-    memset(outDataGL, 0, dataSize);
+    gen_input_data(vecType, vecSize * numElements, d, inData.data());
 
     /* Generate some GL buffers to go against */
     glGenBuffers(1, &inGLBuffer);
     glGenBuffers(1, &outGLBuffer);
 
     glBindBuffer(GL_ARRAY_BUFFER, inGLBuffer);
-    glBufferData(GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW);
+    glBufferData(GL_ARRAY_BUFFER, bufferSize, inData.data(), GL_STATIC_DRAW);
 
     // Note: we need to bind the output buffer, even though we don't care about
     // its values yet, because CL needs it to get the buffer size
     glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
-    glBufferData(GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW);
+    glBufferData(GL_ARRAY_BUFFER, bufferSize, outDataGL.data(), GL_STATIC_DRAW);
 
     glBindBuffer(GL_ARRAY_BUFFER, 0);
     glFinish();
@@ -257,16 +250,16 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue,
     // Get the results from both CL and GL and make sure everything looks
     // correct
     error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, bufferSize,
-                                outDataCL, 0, NULL, NULL);
+                                outDataCL.data(), 0, NULL, NULL);
     test_error(error, "Unable to read output CL array!");
 
     glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
     void *glMem = glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);
-    memcpy(outDataGL, glMem, bufferSize);
+    memcpy(outDataGL.data(), glMem, bufferSize);
     glUnmapBuffer(GL_ARRAY_BUFFER);
 
-    char *inP = (char *)inData, *glP = (char *)outDataGL,
-         *clP = (char *)outDataCL;
+    char *inP = (char *)inData.data(), *glP = (char *)outDataGL.data(),
+         *clP = (char *)outDataCL.data();
     error = 0;
     for (size_t i = 0; i < numElements * vecSize; i++)
     {
diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp
index 69d00a1ada..65e3b23a10 100644
--- a/test_conformance/gl/test_images_write_common.cpp
+++ b/test_conformance/gl/test_images_write_common.cpp
@@ -37,6 +37,7 @@ static const char *kernelpattern_image_write_1D =
 "}\n";
 
 static const char *kernelpattern_image_write_1D_half =
+"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
 "__kernel void sample_test( __global half4 *source, write_only image1d_t dest )\n"
 "{\n"
 "    uint index = get_global_id(0);\n"
@@ -52,6 +53,7 @@ static const char *kernelpattern_image_write_1D_buffer =
 "}\n";
 
 static const char *kernelpattern_image_write_1D_buffer_half =
+"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
 "__kernel void sample_test( __global half4 *source, write_only image1d_buffer_t dest )\n"
 "{\n"
 "    uint index = get_global_id(0);\n"
@@ -69,6 +71,7 @@ static const char *kernelpattern_image_write_2D =
 "}\n";
 
 static const char *kernelpattern_image_write_2D_half =
+"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
 "__kernel void sample_test( __global half4 *source, write_only image2d_t dest )\n"
 "{\n"
 "    int  tidX = get_global_id(0);\n"
@@ -88,6 +91,7 @@ static const char *kernelpattern_image_write_1Darray =
 "}\n";
 
 static const char *kernelpattern_image_write_1Darray_half =
+"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
 "__kernel void sample_test( __global half4 *source, write_only image1d_array_t dest )\n"
 "{\n"
 "    int  tidX = get_global_id(0);\n"
@@ -111,6 +115,7 @@ static const char *kernelpattern_image_write_3D =
 "}\n";
 
 static const char *kernelpattern_image_write_3D_half =
+"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
 "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n"
 "__kernel void sample_test( __global half4 *source, write_only image3d_t dest )\n"
 "{\n"
@@ -137,6 +142,7 @@ static const char *kernelpattern_image_write_2Darray =
 "}\n";
 
 static const char *kernelpattern_image_write_2Darray_half =
+"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
 "__kernel void sample_test( __global half4 *source, write_only image2d_array_t dest )\n"
 "{\n"
 "    int  tidX   = get_global_id(0);\n"
diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt
index d7e7eded46..9b83f79abe 100644
--- a/test_conformance/images/kernel_read_write/CMakeLists.txt
+++ b/test_conformance/images/kernel_read_write/CMakeLists.txt
@@ -16,6 +16,7 @@ set(${MODULE_NAME}_SOURCES
     test_write_3D.cpp
     test_cl_ext_image_requirements_info.cpp
     test_cl_ext_image_from_buffer.cpp
+    test_cl_ext_image_raw10_raw12.cpp
     ../common.cpp
 )
 
diff --git a/test_conformance/images/kernel_read_write/main.cpp b/test_conformance/images/kernel_read_write/main.cpp
index debbdf18a4..8c4f555767 100644
--- a/test_conformance/images/kernel_read_write/main.cpp
+++ b/test_conformance/images/kernel_read_write/main.cpp
@@ -89,6 +89,8 @@ extern int image_from_buffer_fill_positive(cl_device_id device,
 extern int image_from_buffer_read_positive(cl_device_id device,
                                            cl_context context,
                                            cl_command_queue queue);
+extern int ext_image_raw10_raw12(cl_device_id device, cl_context context,
+                                 cl_command_queue queue);
 
 /** read_write images only support sampler-less read buildt-ins which require special settings
   * for some global parameters. This pair of functions temporarily overwrite those global parameters
@@ -367,6 +369,12 @@ int test_image_from_buffer_read_positive(cl_device_id device,
     return image_from_buffer_read_positive(device, context, queue);
 }
 
+int test_cl_ext_image_raw10_raw12(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int num_elements)
+{
+    return ext_image_raw10_raw12(device, context, queue);
+}
+
 test_definition test_list[] = {
     ADD_TEST(1D),
     ADD_TEST(2D),
@@ -385,6 +393,7 @@ test_definition test_list[] = {
     ADD_TEST_VERSION(image_from_small_buffer_negative, Version(3, 0)),
     ADD_TEST_VERSION(image_from_buffer_fill_positive, Version(3, 0)),
     ADD_TEST_VERSION(image_from_buffer_read_positive, Version(3, 0)),
+    ADD_TEST_VERSION(cl_ext_image_raw10_raw12, Version(1, 2)),
 };
 
 const int test_num = ARRAY_SIZE( test_list );
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
index 7b4860dbcb..2dcc1827e4 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
@@ -494,10 +494,11 @@ int image_from_buffer_alignment_negative(cl_device_id device,
                 test_error(err, "Unable to create buffer");
 
                 /* Test Row pitch images */
-                if (imageType == CL_MEM_OBJECT_IMAGE2D
-                    || imageType == CL_MEM_OBJECT_IMAGE3D
-                    || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY
-                    || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                if ((imageType == CL_MEM_OBJECT_IMAGE2D
+                     || imageType == CL_MEM_OBJECT_IMAGE3D
+                     || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY
+                     || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                    && row_pitch_alignment != 1)
                 {
                     image_desc.buffer = buffer;
                     image_desc.image_row_pitch =
@@ -510,8 +511,9 @@ int image_from_buffer_alignment_negative(cl_device_id device,
                 }
 
                 /* Test Slice pitch images */
-                if (imageType == CL_MEM_OBJECT_IMAGE3D
-                    || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                if ((imageType == CL_MEM_OBJECT_IMAGE3D
+                     || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                    && slice_pitch_alignment != 1)
                 {
                     image_desc.buffer = buffer;
                     image_desc.image_row_pitch = row_pitch;
@@ -524,37 +526,40 @@ int image_from_buffer_alignment_negative(cl_device_id device,
                                        "Unexpected clCreateImage return");
                 }
 
-                /* Test buffer from host ptr to test base address alignment */
-                const size_t aligned_buffer_size =
-                    aligned_size(buffer_size, base_address_alignment);
-                /* Create buffer with host ptr and additional size for the wrong
-                 * alignment */
-                void* const host_ptr =
-                    malloc(aligned_buffer_size + base_address_alignment);
-                void* non_aligned_host_ptr =
-                    (void*)((char*)(aligned_ptr(host_ptr,
-                                                base_address_alignment))
-                            + 1); /* wrong alignment */
-
-                cl_mem buffer_host = clCreateBuffer(
-                    context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
-                    buffer_size, non_aligned_host_ptr, &err);
-                test_error(err, "Unable to create buffer");
+                if (base_address_alignment != 1)
+                {
+                    /* Test buffer from host ptr to test base address alignment
+                     */
+                    const size_t aligned_buffer_size =
+                        aligned_size(buffer_size, base_address_alignment);
+                    /* Create buffer with host ptr and additional size for the
+                     * wrong alignment */
+                    void* const host_ptr =
+                        malloc(aligned_buffer_size + base_address_alignment);
+                    void* non_aligned_host_ptr =
+                        (void*)((char*)(aligned_ptr(host_ptr,
+                                                    base_address_alignment))
+                                + 1); /* wrong alignment */
+
+                    cl_mem buffer_host = clCreateBuffer(
+                        context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                        buffer_size, non_aligned_host_ptr, &err);
+                    test_error(err, "Unable to create buffer");
 
-                image_desc.buffer = buffer_host;
+                    image_desc.buffer = buffer_host;
 
-                clCreateImage(context, flag, &format, &image_desc, nullptr,
-                              &err);
-                test_failure_error(err, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
-                                   "Unexpected clCreateImage return");
+                    clCreateImage(context, flag, &format, &image_desc, nullptr,
+                                  &err);
+                    test_failure_error(err, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+                                       "Unexpected clCreateImage return");
 
-                free(host_ptr);
+                    free(host_ptr);
+                    err = clReleaseMemObject(buffer_host);
+                    test_error(err, "Unable to release buffer");
+                }
 
                 err = clReleaseMemObject(buffer);
                 test_error(err, "Unable to release buffer");
-
-                err = clReleaseMemObject(buffer_host);
-                test_error(err, "Unable to release buffer");
             }
         }
     }
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_raw10_raw12.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_raw10_raw12.cpp
new file mode 100644
index 0000000000..c506528e6d
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_raw10_raw12.cpp
@@ -0,0 +1,78 @@
+//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "../testBase.h"
+#include "../common.h"
+#include "test_cl_ext_image_buffer.hpp"
+
+extern int gTypesToTest;
+extern int gtestTypesToRun;
+extern bool gTestImage2DFromBuffer;
+extern cl_mem_flags gMemFlagsToUse;
+
+static int test_image_set(cl_device_id device, cl_context context,
+                          cl_command_queue queue, cl_mem_object_type imageType)
+{
+    int ret = 0;
+
+    // Grab the list of supported image formats for integer reads
+    std::vector<cl_image_format> formatList = {
+        { CL_R, CL_UNSIGNED_INT_RAW10_EXT }, { CL_R, CL_UNSIGNED_INT_RAW12_EXT }
+    };
+
+    // First time through, we'll go ahead and print the formats supported,
+    // regardless of type
+    log_info("---- Supported %s %s formats for this device for "
+             "cl_ext_image_raw10_raw12---- \n",
+             convert_image_type_to_string(imageType), "read");
+    log_info("  %-7s %-24s %d\n", "CL_R", "CL_UNSIGNED_INT_RAW10_EXT", 0);
+    log_info("  %-7s %-24s %d\n", "CL_R", "CL_UNSIGNED_INT_RAW12_EXT", 0);
+    log_info("------------------------------------------- \n");
+
+    image_sampler_data imageSampler;
+    ImageTestTypes test{ kTestUInt, kUInt, uintFormats, "uint" };
+    if (gTypesToTest & test.type)
+    {
+        std::vector<bool> filterFlags(formatList.size(), false);
+        imageSampler.filter_mode = CL_FILTER_NEAREST;
+        ret = test_read_image_formats(device, context, queue, formatList,
+                                      filterFlags, &imageSampler,
+                                      test.explicitType, imageType);
+    }
+    return ret;
+}
+
+int ext_image_raw10_raw12(cl_device_id device, cl_context context,
+                          cl_command_queue queue)
+{
+    int ret = 0;
+
+    if (0 == is_extension_available(device, "cl_ext_image_raw10_raw12"))
+    {
+        log_info("-----------------------------------------------------\n");
+        log_info("This device does not support "
+                 "cl_ext_image_raw10_raw12.\n");
+        log_info("Skipping cl_ext_image_raw10_raw12 "
+                 "image test.\n");
+        log_info("-----------------------------------------------------\n\n");
+        return 0;
+    }
+    gtestTypesToRun = kReadTests;
+
+    ret += test_image_set(device, context, queue, CL_MEM_OBJECT_IMAGE2D);
+
+    return ret;
+}
diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp
index de7ed0fd29..d30ac0d4e9 100644
--- a/test_conformance/images/kernel_read_write/test_iterations.cpp
+++ b/test_conformance/images/kernel_read_write/test_iterations.cpp
@@ -17,6 +17,7 @@
 #include <float.h>
 
 #include <algorithm>
+#include <cinttypes>
 
 #if defined( __APPLE__ )
     #include <signal.h>
@@ -1481,8 +1482,7 @@ int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel ke
         char *imagePtr = (char *)imageValues + nextLevelOffset;
         if( gTestMipmaps )
         {
-            if(gDebugTrace)
-                log_info("\t- Working at mip level %d\n", lod);
+            if (gDebugTrace) log_info("\t- Working at mip level %zu\n", lod);
             error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
         }
 
@@ -1663,7 +1663,9 @@ int test_read_image_set_2D(cl_device_id device, cl_context context,
     {
         for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
         {
-            imageInfo.rowPitch = imageInfo.width * pixelSize;
+            if (!is_width_compatible(imageInfo)) continue;
+            imageInfo.rowPitch = calculate_row_pitch(imageInfo, pixelSize);
+
             for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
             {
                 if( gTestMipmaps )
@@ -1688,10 +1690,28 @@ int test_read_image_set_2D(cl_device_id device, cl_context context,
 
         for( size_t idx = 0; idx < numbeOfSizes; idx++ )
         {
-            imageInfo.width = sizes[ idx ][ 0 ];
-            imageInfo.height = sizes[ idx ][ 1 ];
-            imageInfo.rowPitch = imageInfo.width * pixelSize;
-            log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
+            if (imageInfo.format->image_channel_data_type
+                == CL_UNSIGNED_INT_RAW10_EXT)
+            {
+                imageInfo.width = sizes[idx][0] & ~0x3ULL;
+            }
+            else if (imageInfo.format->image_channel_data_type
+                     == CL_UNSIGNED_INT_RAW12_EXT)
+            {
+                imageInfo.width = sizes[idx][0] & ~0x1ULL;
+            }
+            else
+            {
+                imageInfo.width = sizes[idx][0];
+            }
+
+            imageInfo.height = sizes[idx][1];
+            imageInfo.rowPitch = calculate_row_pitch(imageInfo, pixelSize);
+
+            if (0 == imageInfo.width) continue;
+
+            log_info("Testing %d x %d\n", (int)imageInfo.width,
+                     (int)imageInfo.height);
 
             if( gTestMipmaps )
                 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
@@ -1723,7 +1743,10 @@ int test_read_image_set_2D(cl_device_id device, cl_context context,
         do
         {
             if( gDebugTrace )
-                log_info( "   at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.height, gRoundingStartValue, typeRange );
+                log_info("   at size %d,%d, starting round ramp at %" PRIu64
+                         " for range %" PRIu64 "\n",
+                         (int)imageInfo.width, (int)imageInfo.height,
+                         gRoundingStartValue, typeRange);
             int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
             if( retCode )
                 return retCode;
@@ -1759,7 +1782,8 @@ int test_read_image_set_2D(cl_device_id device, cl_context context,
                 imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, seed );
                 imageInfo.height = (size_t)random_log_in_range( 16, maxHeightRange, seed );
 
-                imageInfo.rowPitch = imageInfo.width * pixelSize;
+                imageInfo.rowPitch = calculate_row_pitch(imageInfo, pixelSize);
+
                 if( gTestMipmaps )
                 {
                     imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
@@ -1782,7 +1806,8 @@ int test_read_image_set_2D(cl_device_id device, cl_context context,
 
                     size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
                 }
-            } while(  size > maxAllocSize || ( size * 3 ) > memSize );
+            } while (size > maxAllocSize || (size * 3) > memSize
+                     || !is_width_compatible(imageInfo));
 
             if( gDebugTrace )
                 log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp
index 0cbf09891d..cab1fa8e3a 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp
@@ -18,6 +18,7 @@
 #include <float.h>
 
 #include <algorithm>
+#include <cinttypes>
 
 #if defined( __APPLE__ )
     #include <signal.h>
@@ -1151,7 +1152,9 @@ int test_read_image_set_1D(cl_device_id device, cl_context context,
         do
         {
             if( gDebugTrace )
-                log_info( "   at size %d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, gRoundingStartValue, typeRange );
+                log_info("   at size %d, starting round ramp at %" PRIu64
+                         " for range %" PRIu64 "\n",
+                         (int)imageInfo.width, gRoundingStartValue, typeRange);
             int retCode = test_read_image_1D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
             if( retCode )
                 return retCode;
diff --git a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
index a8009420e8..d55d1b09b7 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
@@ -17,6 +17,7 @@
 #include <float.h>
 
 #include <algorithm>
+#include <cinttypes>
 
 #if defined( __APPLE__ )
 #include <signal.h>
@@ -1261,7 +1262,10 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context,
         do
         {
             if( gDebugTrace )
-                log_info( "   at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.arraySize, gRoundingStartValue, typeRange );
+                log_info("   at size %d,%d, starting round ramp at %" PRIu64
+                         " for range %" PRIu64 "\n",
+                         (int)imageInfo.width, (int)imageInfo.arraySize,
+                         gRoundingStartValue, typeRange);
             int retCode = test_read_image_1D_array( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
             if( retCode )
                 return retCode;
diff --git a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
index 533a0fe837..72f1238d3d 100644
--- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
@@ -542,8 +542,7 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
         float lod_float = (float)lod;
         if( gTestMipmaps )
         {
-            if(gDebugTrace)
-                log_info(" - Working at mip level %d\n", lod);
+            if (gDebugTrace) log_info(" - Working at mip level %zu\n", lod);
             error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
         }
     for( int q = 0; q < loopCount; q++ )
diff --git a/test_conformance/images/kernel_read_write/test_write_1D.cpp b/test_conformance/images/kernel_read_write/test_write_1D.cpp
index 5f7267967e..8e5c15553b 100644
--- a/test_conformance/images/kernel_read_write/test_write_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D.cpp
@@ -472,7 +472,7 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que
                                                   test_value[0] & 0x1F,
                                                   (test_value[0] >> 5) & 0x3F,
                                                   (test_value[0] >> 11) & 0x1F);
-                                        log_error("    Error:    %f %f %f %f\n",
+                                        log_error("    Error:    %f %f %f\n",
                                                   errors[0], errors[1],
                                                   errors[2]);
                                         break;
@@ -497,7 +497,7 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que
                                                   test_value[0] & 0x1F,
                                                   (test_value[0] >> 5) & 0x1F,
                                                   (test_value[0] >> 10) & 0x1F);
-                                        log_error("    Error:    %f %f %f %f\n",
+                                        log_error("    Error:    %f %f %f\n",
                                                   errors[0], errors[1],
                                                   errors[2]);
                                         break;
diff --git a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
index f90244052b..a6bf4ec25b 100644
--- a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
@@ -493,7 +493,7 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma
                                                   test_value[0] & 0x1F,
                                                   (test_value[0] >> 5) & 0x3F,
                                                   (test_value[0] >> 11) & 0x1F);
-                                        log_error("    Error:    %f %f %f %f\n",
+                                        log_error("    Error:    %f %f %f\n",
                                                   errors[0], errors[1],
                                                   errors[2]);
                                         break;
@@ -518,7 +518,7 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma
                                                   test_value[0] & 0x1F,
                                                   (test_value[0] >> 5) & 0x1F,
                                                   (test_value[0] >> 10) & 0x1F);
-                                        log_error("    Error:    %f %f %f %f\n",
+                                        log_error("    Error:    %f %f %f\n",
                                                   errors[0], errors[1],
                                                   errors[2]);
                                         break;
diff --git a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
index c1c5699458..40c90e7be9 100644
--- a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
@@ -525,7 +525,7 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma
                                                 (test_value[0] >> 5) & 0x3F,
                                                 (test_value[0] >> 11) & 0x1F);
                                             log_error(
-                                                "    Error:    %f %f %f %f\n",
+                                                "    Error:    %f %f %f\n",
                                                 errors[0], errors[1],
                                                 errors[2]);
                                             break;
@@ -554,7 +554,7 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma
                                                 (test_value[0] >> 5) & 0x1F,
                                                 (test_value[0] >> 10) & 0x1F);
                                             log_error(
-                                                "    Error:    %f %f %f %f\n",
+                                                "    Error:    %f %f %f\n",
                                                 errors[0], errors[1],
                                                 errors[2]);
                                             break;
diff --git a/test_conformance/images/kernel_read_write/test_write_3D.cpp b/test_conformance/images/kernel_read_write/test_write_3D.cpp
index 9da93695e3..b50ccb6112 100644
--- a/test_conformance/images/kernel_read_write/test_write_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_3D.cpp
@@ -532,7 +532,7 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que
                                                 (test_value[0] >> 5) & 0x3F,
                                                 (test_value[0] >> 11) & 0x1F);
                                             log_error(
-                                                "    Error:    %f %f %f %f\n",
+                                                "    Error:    %f %f %f\n",
                                                 errors[0], errors[1],
                                                 errors[2]);
                                             break;
@@ -561,7 +561,7 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que
                                                 (test_value[0] >> 5) & 0x1F,
                                                 (test_value[0] >> 10) & 0x1F);
                                             log_error(
-                                                "    Error:    %f %f %f %f\n",
+                                                "    Error:    %f %f %f\n",
                                                 errors[0], errors[1],
                                                 errors[2]);
                                             break;
diff --git a/test_conformance/images/kernel_read_write/test_write_image.cpp b/test_conformance/images/kernel_read_write/test_write_image.cpp
index 2962697164..69097e3fa5 100644
--- a/test_conformance/images/kernel_read_write/test_write_image.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_image.cpp
@@ -592,7 +592,7 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue
                                                   test_value[0] & 0x1F,
                                                   (test_value[0] >> 5) & 0x3F,
                                                   (test_value[0] >> 11) & 0x1F);
-                                        log_error("    Error:    %f %f %f %f\n",
+                                        log_error("    Error:    %f %f %f\n",
                                                   errors[0], errors[1],
                                                   errors[2]);
                                         break;
@@ -618,7 +618,7 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue
                                                   test_value[0] & 0x1F,
                                                   (test_value[0] >> 5) & 0x1F,
                                                   (test_value[0] >> 10) & 0x1F);
-                                        log_error("    Error:    %f %f %f %f\n",
+                                        log_error("    Error:    %f %f %f\n",
                                                   errors[0], errors[1],
                                                   errors[2]);
                                         break;
diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
index a221f05add..d53911e433 100644
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -3,24 +3,32 @@ set(MODULE_NAME BRUTEFORCE)
 set(${MODULE_NAME}_SOURCES
     binary_double.cpp
     binary_float.cpp
+    binary_half.cpp
     binary_i_double.cpp
     binary_i_float.cpp
+    binary_i_half.cpp
     binary_operator_double.cpp
     binary_operator_float.cpp
+    binary_operator_half.cpp
     binary_two_results_i_double.cpp
     binary_two_results_i_float.cpp
+    binary_two_results_i_half.cpp
     common.cpp
     common.h
     function_list.cpp
     function_list.h
     i_unary_double.cpp
     i_unary_float.cpp
+    i_unary_half.cpp
     macro_binary_double.cpp
     macro_binary_float.cpp
+    macro_binary_half.cpp
     macro_unary_double.cpp
     macro_unary_float.cpp
+    macro_unary_half.cpp
     mad_double.cpp
     mad_float.cpp
+    mad_half.cpp
     main.cpp
     reference_math.cpp
     reference_math.h
@@ -28,15 +36,20 @@ set(${MODULE_NAME}_SOURCES
     sleep.h
     ternary_double.cpp
     ternary_float.cpp
+    ternary_half.cpp
     test_functions.h
     unary_double.cpp
     unary_float.cpp
+    unary_half.cpp
     unary_two_results_double.cpp
     unary_two_results_float.cpp
+    unary_two_results_half.cpp
     unary_two_results_i_double.cpp
     unary_two_results_i_float.cpp
+    unary_two_results_i_half.cpp
     unary_u_double.cpp
     unary_u_float.cpp
+    unary_u_half.cpp
     utility.cpp
     utility.h
 )
diff --git a/test_conformance/math_brute_force/README.txt b/test_conformance/math_brute_force/README.txt
index 5b289868f0..3e9e2b6cf7 100644
--- a/test_conformance/math_brute_force/README.txt
+++ b/test_conformance/math_brute_force/README.txt
@@ -38,10 +38,7 @@ values, followed by a few billion random values. If an error is found in a funct
 the test for that function terminates early, reports an error, and moves on to the 
 next test, if any.
 
-The test currently doesn't support half precision math functions covered in section 
-9 of the OpenCL 1.0 specification, but does cover the half_func functions covered in 
-section six. It also doesn't test the native_<funcname> functions, for which any result 
-is conformant.  
+This test doesn't test the native_<funcname> functions, for which any result is conformant.
 
 For the OpenCL 1.0 time frame, the reference library shall be the operating system 
 math library, as modified by the test itself to conform to the OpenCL specification. 
diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index cd47c76bb4..feeedc471d 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -219,6 +219,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_double *r;
     cl_double *s;
     cl_double *s2;
+    cl_int copysign_test = 0;
 
     Force64BitFPUPrecision();
 
@@ -377,12 +378,16 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     if (gSkipCorrectnessTesting) return CL_SUCCESS;
 
+    if (!strcmp(name, "copysign")) copysign_test = 1;
+
+#define ref_func(s, s2) (copysign_test ? func.f_ff_d(s, s2) : func.f_ff(s, s2))
+
     // Calculate the correctly rounded reference result
     r = (cl_double *)gOut_Ref + thread_id * buffer_elements;
     s = (cl_double *)gIn + thread_id * buffer_elements;
     s2 = (cl_double *)gIn2 + thread_id * buffer_elements;
     for (size_t j = 0; j < buffer_elements; j++)
-        r[j] = (cl_double)func.f_ff(s[j], s2[j]);
+        r[j] = (cl_double)ref_func(s[j], s2[j]);
 
     // Read the data back -- no need to wait for the first N-1 buffers but wait
     // for the last buffer. This is an in order queue.
@@ -412,7 +417,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (t[j] != q[j])
             {
                 cl_double test = ((cl_double *)q)[j];
-                long double correct = func.f_ff(s[j], s2[j]);
+                long double correct = ref_func(s[j], s2[j]);
                 float err = Bruteforce_Ulp_Error_Double(test, correct);
                 int fail = !(fabsf(err) <= ulps);
 
@@ -449,8 +454,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                         // retry per section 6.5.3.3
                         if (IsDoubleSubnormal(s[j]))
                         {
-                            long double correct2 = func.f_ff(0.0, s2[j]);
-                            long double correct3 = func.f_ff(-0.0, s2[j]);
+                            long double correct2 = ref_func(0.0, s2[j]);
+                            long double correct3 = ref_func(-0.0, s2[j]);
                             float err2 =
                                 Bruteforce_Ulp_Error_Double(test, correct2);
                             float err3 =
@@ -472,10 +477,10 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                             // try with both args as zero
                             if (IsDoubleSubnormal(s2[j]))
                             {
-                                correct2 = func.f_ff(0.0, 0.0);
-                                correct3 = func.f_ff(-0.0, 0.0);
-                                long double correct4 = func.f_ff(0.0, -0.0);
-                                long double correct5 = func.f_ff(-0.0, -0.0);
+                                correct2 = ref_func(0.0, 0.0);
+                                correct3 = ref_func(-0.0, 0.0);
+                                long double correct4 = ref_func(0.0, -0.0);
+                                long double correct5 = ref_func(-0.0, -0.0);
                                 err2 =
                                     Bruteforce_Ulp_Error_Double(test, correct2);
                                 err3 =
@@ -507,8 +512,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                         }
                         else if (IsDoubleSubnormal(s2[j]))
                         {
-                            long double correct2 = func.f_ff(s[j], 0.0);
-                            long double correct3 = func.f_ff(s[j], -0.0);
+                            long double correct2 = ref_func(s[j], 0.0);
+                            long double correct3 = ref_func(s[j], -0.0);
                             float err2 =
                                 Bruteforce_Ulp_Error_Double(test, correct2);
                             float err3 =
diff --git a/test_conformance/math_brute_force/binary_half.cpp b/test_conformance/math_brute_force/binary_half.cpp
new file mode 100644
index 0000000000..3a2395c705
--- /dev/null
+++ b/test_conformance/math_brute_force/binary_half.cpp
@@ -0,0 +1,784 @@
+//
+// Copyright (c) 2017-2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "harness/errorHelpers.h"
+
+#include "common.h"
+#include "function_list.h"
+#include "test_functions.h"
+#include "utility.h"
+#include "reference_math.h"
+
+#include <cstring>
+#include <algorithm>
+
+namespace {
+
+cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+{
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Half,
+                               ParameterType::Half, ParameterType::Half,
+                               vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
+}
+
+// Thread specific data for a worker thread
+struct ThreadInfo
+{
+    clMemWrapper inBuf; // input buffer for the thread
+    clMemWrapper inBuf2; // input buffer for the thread
+    clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    float maxError; // max error value. Init to 0.
+    double
+        maxErrorValue; // position of the max error value (param 1).  Init to 0.
+    double maxErrorValue2; // position of the max error value (param 2).  Init
+                           // to 0.
+    MTdataHolder d;
+
+    clCommandQueueWrapper
+        tQueue; // per thread command queue to improve performance
+};
+
+struct TestInfoBase
+{
+    size_t subBufferSize; // Size of the sub-buffer in elements
+    const Func *f; // A pointer to the function info
+
+    cl_uint threadCount; // Number of worker threads
+    cl_uint jobCount; // Number of jobs
+    cl_uint step; // step between each chunk and the next.
+    cl_uint scale; // stride between individual test values
+    float ulps; // max_allowed ulps
+    int ftz; // non-zero if running in flush to zero mode
+
+    int isFDim;
+    int skipNanInf;
+    int isNextafter;
+};
+
+struct TestInfo : public TestInfoBase
+{
+    TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
+    // Programs for various vector sizes.
+    Programs programs;
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+};
+
+// A table of more difficult cases to get right
+const cl_half specialValuesHalf[] = {
+    0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
+    0xfc00, /*-INFINITY*/
+    0x8000, /*-0*/
+    0x7bff, /*HALF_MAX*/
+    0x0400, /*HALF_MIN*/
+    0x03ff, /* Largest denormal */
+    0x3c00, /* 1 */
+    0xbc00, /* -1 */
+    0x3555, /*nearest value to 1/3*/
+    0x3bff, /*largest number less than one*/
+    0xc000, /* -2 */
+    0xfbff, /* -HALF_MAX */
+    0x8400, /* -HALF_MIN */
+    0x4248, /* M_PI_H */
+    0xc248, /* -M_PI_H */
+    0xbbff, /* Largest negative fraction */
+};
+
+constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
+
+cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
+{
+    TestInfo *job = (TestInfo *)data;
+    size_t buffer_elements = job->subBufferSize;
+    size_t buffer_size = buffer_elements * sizeof(cl_half);
+    cl_uint base = job_id * (cl_uint)job->step;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
+    float ulps = job->ulps;
+    fptr func = job->f->func;
+    int ftz = job->ftz;
+    MTdata d = tinfo->d;
+    cl_int error;
+    const char *name = job->f->name;
+
+    int isFDim = job->isFDim;
+    int skipNanInf = job->skipNanInf;
+    int isNextafter = job->isNextafter;
+    cl_ushort *t;
+    cl_half *r;
+    std::vector<float> s(0), s2(0);
+    cl_uint j = 0;
+
+    RoundingMode oldRoundMode;
+    cl_int copysign_test = 0;
+
+    // start the map of the output arrays
+    cl_event e[VECTOR_SIZE_COUNT];
+    cl_ushort *out[VECTOR_SIZE_COUNT];
+
+    if (gHostFill)
+    {
+        // start the map of the output arrays
+        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            out[j] = (cl_ushort *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
+        }
+
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
+
+    // Init input array
+    cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
+    cl_ushort *p2 = (cl_ushort *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount =
+        specialValuesHalfCount * specialValuesHalfCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+
+    if (job_id <= (cl_uint)indx)
+    { // test edge cases
+        uint32_t x, y;
+
+        x = (job_id * buffer_elements) % specialValuesHalfCount;
+        y = (job_id * buffer_elements) / specialValuesHalfCount;
+
+        for (; j < buffer_elements; j++)
+        {
+            p[j] = specialValuesHalf[x];
+            p2[j] = specialValuesHalf[y];
+            if (++x >= specialValuesHalfCount)
+            {
+                x = 0;
+                y++;
+                if (y >= specialValuesHalfCount) break;
+            }
+        }
+    }
+
+    // Init any remaining values.
+    for (; j < buffer_elements; j++)
+    {
+        p[j] = (cl_ushort)genrand_int32(d);
+        p2[j] = (cl_ushort)genrand_int32(d);
+    }
+
+    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
+                                      buffer_size, p, 0, NULL, NULL)))
+    {
+        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
+        return error;
+    }
+
+    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
+                                      buffer_size, p2, 0, NULL, NULL)))
+    {
+        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
+        return error;
+    }
+
+    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    {
+        if (gHostFill)
+        {
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry
+        // over
+        uint32_t pattern = 0xacdcacdc;
+        if (gHostFill)
+        {
+            memset_pattern4(out[j], &pattern, buffer_size);
+            error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
+                                            out[j], 0, NULL, NULL);
+            test_error(error, "clEnqueueUnmapMemObject failed!\n");
+        }
+        else
+        {
+            error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                        &pattern, sizeof(pattern), 0,
+                                        buffer_size, 0, NULL, NULL);
+            test_error(error, "clEnqueueFillBuffer failed!\n");
+        }
+
+        // run the kernel
+        size_t vectorCount =
+            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
+                                                 // own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                                    &tinfo->outBuf[j])))
+        {
+            LogBuildError(program);
+            return error;
+        }
+        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
+                                    &tinfo->inBuf)))
+        {
+            LogBuildError(program);
+            return error;
+        }
+        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
+                                    &tinfo->inBuf2)))
+        {
+            LogBuildError(program);
+            return error;
+        }
+
+        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
+                                            &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error("FAILED -- could not execute kernel\n");
+            return error;
+        }
+    }
+
+    // Get that moving
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
+
+    if (gSkipCorrectnessTesting)
+    {
+        return CL_SUCCESS;
+    }
+
+    FPU_mode_type oldMode;
+    oldRoundMode = kRoundToNearestEven;
+    if (isFDim)
+    {
+        // Calculate the correctly rounded reference result
+        memset(&oldMode, 0, sizeof(oldMode));
+        if (ftz) ForceFTZ(&oldMode);
+
+        // Set the rounding mode to match the device
+        if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
+    }
+
+    if (!strcmp(name, "copysign")) copysign_test = 1;
+
+#define ref_func(s, s2) (copysign_test ? func.f_ff_f(s, s2) : func.f_ff(s, s2))
+
+    // Calculate the correctly rounded reference result
+    r = (cl_half *)gOut_Ref + thread_id * buffer_elements;
+    t = (cl_ushort *)r;
+    s.resize(buffer_elements);
+    s2.resize(buffer_elements);
+    for (j = 0; j < buffer_elements; j++)
+    {
+        s[j] = cl_half_to_float(p[j]);
+        s2[j] = cl_half_to_float(p2[j]);
+        if (isNextafter)
+            r[j] = cl_half_from_float(reference_nextafterh(s[j], s2[j]),
+                                      CL_HALF_RTE);
+        else
+            r[j] = cl_half_from_float(ref_func(s[j], s2[j]), CL_HALF_RTE);
+    }
+
+    if (isFDim && ftz) RestoreFPState(&oldMode);
+    // Read the data back -- no need to wait for the first N-1 buffers. This is
+    // an in order queue.
+    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    {
+        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
+        out[j] = (cl_ushort *)clEnqueueMapBuffer(
+            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
+            buffer_size, 0, NULL, NULL, &error);
+        if (error || NULL == out[j])
+        {
+            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                       error);
+            return error;
+        }
+    }
+
+    // Verify data
+
+    for (j = 0; j < buffer_elements; j++)
+    {
+        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+        {
+            cl_ushort *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if (t[j] != q[j])
+            {
+                double correct;
+                if (isNextafter)
+                    correct = reference_nextafterh(s[j], s2[j]);
+                else
+                    correct = ref_func(s[j], s2[j]);
+
+                float test = cl_half_to_float(q[j]);
+
+                // Per section 10 paragraph 6, accept any result if an input or
+                // output is a infinity or NaN or overflow
+                if (skipNanInf)
+                {
+                    // Note: no double rounding here.  Reference functions
+                    // calculate in single precision.
+                    if (IsFloatInfinity(correct) || IsFloatNaN(correct)
+                        || IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j])
+                        || IsFloatInfinity(s[j]) || IsFloatNaN(s[j]))
+                        continue;
+                }
+                float err = Ulp_Error_Half(q[j], correct);
+                int fail = !(fabsf(err) <= ulps);
+
+                if (fail && ftz)
+                {
+                    // retry per section 6.5.3.2
+                    if (IsHalfResultSubnormal(correct, ulps))
+                    {
+                        if (isNextafter)
+                        {
+                            correct = reference_nextafterh(s[j], s2[j], false);
+                            err = Ulp_Error_Half(q[j], correct);
+                            fail = !(fabsf(err) <= ulps);
+                        }
+
+                        fail = fail && (test != 0.0f);
+                        if (!fail) err = 0.0f;
+                    }
+
+                    if (IsHalfSubnormal(p[j]))
+                    {
+                        double correct2, correct3;
+                        float err2, err3;
+                        if (isNextafter)
+                        {
+                            correct2 = reference_nextafterh(0.0, s2[j]);
+                            correct3 = reference_nextafterh(-0.0, s2[j]);
+                        }
+                        else
+                        {
+                            correct2 = ref_func(0.0, s2[j]);
+                            correct3 = ref_func(-0.0, s2[j]);
+                        }
+                        if (skipNanInf)
+                        {
+                            // Note: no double rounding here.  Reference
+                            // functions calculate in single precision.
+                            if (IsFloatInfinity(correct2)
+                                || IsFloatNaN(correct2)
+                                || IsFloatInfinity(correct3)
+                                || IsFloatNaN(correct3))
+                                continue;
+                        }
+
+                        auto check_error = [&]() {
+                            err2 = Ulp_Error_Half(q[j], correct2);
+                            err3 = Ulp_Error_Half(q[j], correct3);
+                            fail = fail
+                                && ((!(fabsf(err2) <= ulps))
+                                    && (!(fabsf(err3) <= ulps)));
+                        };
+                        check_error();
+                        if (fabsf(err2) < fabsf(err)) err = err2;
+                        if (fabsf(err3) < fabsf(err)) err = err3;
+
+                        // retry per section 6.5.3.4
+                        if (IsHalfResultSubnormal(correct2, ulps)
+                            || IsHalfResultSubnormal(correct3, ulps))
+                        {
+                            if (fail && isNextafter)
+                            {
+                                correct2 =
+                                    reference_nextafterh(0.0, s2[j], false);
+                                correct3 =
+                                    reference_nextafterh(-0.0, s2[j], false);
+                                check_error();
+                            }
+
+                            fail = fail && (test != 0.0f);
+                            if (!fail) err = 0.0f;
+                        }
+
+                        // allow to omit denorm values for platforms with no
+                        // denorm support for nextafter
+                        if (fail && (isNextafter)
+                            && (correct <= cl_half_to_float(0x3FF))
+                            && (correct >= cl_half_to_float(0x83FF)))
+                        {
+                            fail = fail && (q[j] != p[j]);
+                            if (!fail) err = 0.0f;
+                        }
+
+                        // try with both args as zero
+                        if (IsHalfSubnormal(p2[j]))
+                        {
+                            double correct4, correct5;
+                            float err4, err5;
+
+                            if (isNextafter)
+                            {
+                                correct2 = reference_nextafterh(0.0, 0.0);
+                                correct3 = reference_nextafterh(-0.0, 0.0);
+                                correct4 = reference_nextafterh(0.0, -0.0);
+                                correct5 = reference_nextafterh(-0.0, -0.0);
+                            }
+                            else
+                            {
+                                correct2 = ref_func(0.0, 0.0);
+                                correct3 = ref_func(-0.0, 0.0);
+                                correct4 = ref_func(0.0, -0.0);
+                                correct5 = ref_func(-0.0, -0.0);
+                            }
+
+                            // Per section 10 paragraph 6, accept any result if
+                            // an input or output is a infinity or NaN or
+                            // overflow
+                            if (skipNanInf)
+                            {
+                                // Note: no double rounding here.  Reference
+                                // functions calculate in single precision.
+                                if (IsFloatInfinity(correct2)
+                                    || IsFloatNaN(correct2)
+                                    || IsFloatInfinity(correct3)
+                                    || IsFloatNaN(correct3)
+                                    || IsFloatInfinity(correct4)
+                                    || IsFloatNaN(correct4)
+                                    || IsFloatInfinity(correct5)
+                                    || IsFloatNaN(correct5))
+                                    continue;
+                            }
+
+                            err2 = Ulp_Error_Half(q[j], correct2);
+                            err3 = Ulp_Error_Half(q[j], correct3);
+                            err4 = Ulp_Error_Half(q[j], correct4);
+                            err5 = Ulp_Error_Half(q[j], correct5);
+                            fail = fail
+                                && ((!(fabsf(err2) <= ulps))
+                                    && (!(fabsf(err3) <= ulps))
+                                    && (!(fabsf(err4) <= ulps))
+                                    && (!(fabsf(err5) <= ulps)));
+                            if (fabsf(err2) < fabsf(err)) err = err2;
+                            if (fabsf(err3) < fabsf(err)) err = err3;
+                            if (fabsf(err4) < fabsf(err)) err = err4;
+                            if (fabsf(err5) < fabsf(err)) err = err5;
+
+                            // retry per section 6.5.3.4
+                            if (IsHalfResultSubnormal(correct2, ulps)
+                                || IsHalfResultSubnormal(correct3, ulps)
+                                || IsHalfResultSubnormal(correct4, ulps)
+                                || IsHalfResultSubnormal(correct5, ulps))
+                            {
+                                fail = fail && (test != 0.0f);
+                                if (!fail) err = 0.0f;
+                            }
+
+                            // allow to omit denorm values for platforms with no
+                            // denorm support for nextafter
+                            if (fail && (isNextafter)
+                                && (correct <= cl_half_to_float(0x3FF))
+                                && (correct >= cl_half_to_float(0x83FF)))
+                            {
+                                fail = fail && (q[j] != p2[j]);
+                                if (!fail) err = 0.0f;
+                            }
+                        }
+                    }
+                    else if (IsHalfSubnormal(p2[j]))
+                    {
+                        double correct2, correct3;
+                        float err2, err3;
+
+                        if (isNextafter)
+                        {
+                            correct2 = reference_nextafterh(s[j], 0.0);
+                            correct3 = reference_nextafterh(s[j], -0.0);
+                        }
+                        else
+                        {
+                            correct2 = ref_func(s[j], 0.0);
+                            correct3 = ref_func(s[j], -0.0);
+                        }
+
+                        if (skipNanInf)
+                        {
+                            // Note: no double rounding here.  Reference
+                            // functions calculate in single precision.
+                            if (IsFloatInfinity(correct) || IsFloatNaN(correct)
+                                || IsFloatInfinity(correct2)
+                                || IsFloatNaN(correct2))
+                                continue;
+                        }
+
+                        auto check_error = [&]() {
+                            err2 = Ulp_Error_Half(q[j], correct2);
+                            err3 = Ulp_Error_Half(q[j], correct3);
+                            fail = fail
+                                && ((!(fabsf(err2) <= ulps))
+                                    && (!(fabsf(err3) <= ulps)));
+                            if (fabsf(err2) < fabsf(err)) err = err2;
+                            if (fabsf(err3) < fabsf(err)) err = err3;
+                        };
+                        check_error();
+
+                        // retry per section 6.5.3.4
+                        if (IsHalfResultSubnormal(correct2, ulps)
+                            || IsHalfResultSubnormal(correct3, ulps))
+                        {
+                            if (fail && isNextafter)
+                            {
+                                correct2 =
+                                    reference_nextafterh(s[j], 0.0, false);
+                                correct3 =
+                                    reference_nextafterh(s[j], -0.0, false);
+                                check_error();
+                            }
+
+                            fail = fail && (test != 0.0f);
+                            if (!fail) err = 0.0f;
+                        }
+
+                        // allow to omit denorm values for platforms with no
+                        // denorm support for nextafter
+                        if (fail && (isNextafter)
+                            && (correct <= cl_half_to_float(0x3FF))
+                            && (correct >= cl_half_to_float(0x83FF)))
+                        {
+                            fail = fail && (q[j] != p2[j]);
+                            if (!fail) err = 0.0f;
+                        }
+                    }
+                }
+
+                if (fabsf(err) > tinfo->maxError)
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if (fail)
+                {
+                    vlog_error("\nERROR: %s%s: %f ulp error at {%a (0x%04x), "
+                               "%a (0x%04x)}\nExpected: %a  (half 0x%04x) "
+                               "\nActual: %a (half 0x%04x) at index: %u\n",
+                               name, sizeNames[k], err, s[j], p[j], s2[j],
+                               p2[j], cl_half_to_float(r[j]), r[j], test, q[j],
+                               j);
+                    error = -1;
+                    return error;
+                }
+            }
+        }
+    }
+
+    if (isFDim && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
+
+    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    {
+        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
+                                             out[j], 0, NULL, NULL)))
+        {
+            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
+                       j, error);
+            return error;
+        }
+    }
+
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
+
+    if (0 == (base & 0x0fffffff))
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
+                 "ThreadCount:%2u\n",
+                 base, job->step, job->scale, buffer_elements, job->ulps,
+                 job->threadCount);
+        }
+        else
+        {
+            vlog(".");
+        }
+        fflush(stdout);
+    }
+
+    return error;
+}
+
+} // anonymous namespace
+
+int TestFunc_Half_Half_Half_common(const Func *f, MTdata d, int isNextafter,
+                                   bool relaxedMode)
+{
+    TestInfoBase test_info_base;
+    cl_int error;
+    float maxError = 0.0f;
+    double maxErrorVal = 0.0;
+    double maxErrorVal2 = 0.0;
+
+    logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
+    // Init test_info
+    memset(&test_info_base, 0, sizeof(test_info_base));
+    TestInfo test_info(test_info_base);
+
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_half));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = f->half_ulps;
+    test_info.ftz =
+        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
+
+    test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
+    test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
+    test_info.isNextafter = isNextafter;
+
+    test_info.tinfo.resize(test_info.threadCount);
+
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize
+                                        * sizeof(cl_half),
+                                    test_info.subBufferSize * sizeof(cl_half) };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            return error;
+        }
+        test_info.tinfo[i].inBuf2 =
+            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf2)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            return error;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error(
+                    "Error: Unable to create sub-buffer of gOutBuffer[%d] "
+                    "for region {%zd, %zd}\n",
+                    (int)j, region.origin, region.size);
+                return error;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            return error;
+        }
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
+                                       test_info.programs, f->nameInCode };
+        error = ThreadPool_Do(BuildKernel_HalfFn,
+                              gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                              &build_info);
+        test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
+    }
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
+
+        // Accumulate the arithmetic errors
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            if (test_info.tinfo[i].maxError > maxError)
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        test_error(error, "ThreadPool_Do: TestHalf failed\n");
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
+    }
+
+    vlog("\n");
+
+    return error;
+}
+
+int TestFunc_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode)
+{
+    return TestFunc_Half_Half_Half_common(f, d, 0, relaxedMode);
+}
+
+int TestFunc_Half_Half_Half_nextafter(const Func *f, MTdata d, bool relaxedMode)
+{
+    return TestFunc_Half_Half_Half_common(f, d, 1, relaxedMode);
+}
diff --git a/test_conformance/math_brute_force/binary_i_half.cpp b/test_conformance/math_brute_force/binary_i_half.cpp
new file mode 100644
index 0000000000..c74a845a4f
--- /dev/null
+++ b/test_conformance/math_brute_force/binary_i_half.cpp
@@ -0,0 +1,548 @@
+//
+// Copyright (c) 2017-2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+#include "function_list.h"
+#include "test_functions.h"
+#include "utility.h"
+
+#include <climits>
+#include <cstring>
+
+namespace {
+
+cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+{
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Half,
+                               ParameterType::Half, ParameterType::Int,
+                               vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
+}
+
+// Thread specific data for a worker thread
+typedef struct ThreadInfo
+{
+    clMemWrapper inBuf; // input buffer for the thread
+    clMemWrapper inBuf2; // input buffer for the thread
+    clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    float maxError; // max error value. Init to 0.
+    double
+        maxErrorValue; // position of the max error value (param 1).  Init to 0.
+    cl_int maxErrorValue2; // position of the max error value (param 2).  Init
+                           // to 0.
+    MTdataHolder d;
+    clCommandQueueWrapper
+        tQueue; // per thread command queue to improve performance
+} ThreadInfo;
+
+struct TestInfoBase
+{
+    size_t subBufferSize; // Size of the sub-buffer in elements
+    const Func *f; // A pointer to the function info
+
+    cl_uint threadCount; // Number of worker threads
+    cl_uint jobCount; // Number of jobs
+    cl_uint step; // step between each chunk and the next.
+    cl_uint scale; // stride between individual test values
+    float ulps; // max_allowed ulps
+    int ftz; // non-zero if running in flush to zero mode
+};
+
+struct TestInfo : public TestInfoBase
+{
+    TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
+    // Programs for various vector sizes.
+    Programs programs;
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+};
+
+// A table of more difficult cases to get right
+const cl_half specialValuesHalf[] = {
+    0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
+    0xfc00, /*-INFINITY*/
+    0x8000, /*-0*/
+    0x7bff, /*HALF_MAX*/
+    0x0400, /*HALF_MIN*/
+    0x03ff, /* Largest denormal */
+    0x3c00, /* 1 */
+    0xbc00, /* -1 */
+    0x3555, /*nearest value to 1/3*/
+    0x3bff, /*largest number less than one*/
+    0xc000, /* -2 */
+    0xfbff, /* -HALF_MAX */
+    0x8400, /* -HALF_MIN */
+    0x4248, /* M_PI_H */
+    0xc248, /* -M_PI_H */
+    0xbbff, /* Largest negative fraction */
+};
+
+constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
+
+const int specialValuesInt3[] = { 0,     1,       2,       3,       1022, 1023,
+                                  1024,  INT_MIN, INT_MAX, -1,      -2,   -3,
+                                  -1022, -1023,   -11024,  -INT_MAX };
+size_t specialValuesInt3Count = ARRAY_SIZE(specialValuesInt3);
+
+cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
+{
+    TestInfo *job = (TestInfo *)data;
+    size_t buffer_elements = job->subBufferSize;
+    cl_uint base = job_id * (cl_uint)job->step;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
+    float ulps = job->ulps;
+    fptr func = job->f->func;
+    int ftz = job->ftz;
+    MTdata d = tinfo->d;
+    cl_uint j, k;
+    cl_int error;
+    const char *name = job->f->name;
+    cl_ushort *t;
+    cl_half *r;
+    std::vector<float> s;
+    cl_int *s2;
+
+    // start the map of the output arrays
+    cl_event e[VECTOR_SIZE_COUNT];
+    cl_ushort *out[VECTOR_SIZE_COUNT];
+
+    if (gHostFill)
+    {
+        // start the map of the output arrays
+        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            out[j] = (cl_ushort *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_elements * sizeof(cl_ushort), 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
+        }
+
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
+
+    // Init input array
+    cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
+    cl_int *p2 = (cl_int *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount =
+        specialValuesHalfCount * specialValuesInt3Count;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+    if (job_id <= (cl_uint)indx)
+    { // test edge cases
+        uint32_t x, y;
+
+        x = (job_id * buffer_elements) % specialValuesHalfCount;
+        y = (job_id * buffer_elements) / specialValuesHalfCount;
+
+        for (; j < buffer_elements; j++)
+        {
+            p[j] = specialValuesHalf[x];
+            p2[j] = specialValuesInt3[y];
+            if (++x >= specialValuesHalfCount)
+            {
+                x = 0;
+                y++;
+                if (y >= specialValuesInt3Count) break;
+            }
+        }
+    }
+
+    // Init any remaining values.
+    for (; j < buffer_elements; j++)
+    {
+        p[j] = (cl_ushort)genrand_int32(d);
+        p2[j] = genrand_int32(d);
+    }
+
+    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
+                                      buffer_elements * sizeof(cl_half), p, 0,
+                                      NULL, NULL)))
+    {
+        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
+        return error;
+    }
+
+    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
+                                      buffer_elements * sizeof(cl_int), p2, 0,
+                                      NULL, NULL)))
+    {
+        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
+        return error;
+    }
+
+    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    {
+        if (gHostFill)
+        {
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry
+        // over
+        uint32_t pattern = 0xacdcacdc;
+        if (gHostFill)
+        {
+            memset_pattern4(out[j], &pattern,
+                            buffer_elements * sizeof(cl_half));
+            error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
+                                            out[j], 0, NULL, NULL);
+            test_error(error, "clEnqueueUnmapMemObject failed!\n");
+        }
+        else
+        {
+            error = clEnqueueFillBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], &pattern, sizeof(pattern), 0,
+                buffer_elements * sizeof(cl_half), 0, NULL, NULL);
+            test_error(error, "clEnqueueFillBuffer failed!\n");
+        }
+
+        // run the kernel
+        size_t vectorCount =
+            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
+                                                 // own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                                    &tinfo->outBuf[j])))
+        {
+            LogBuildError(program);
+            return error;
+        }
+        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
+                                    &tinfo->inBuf)))
+        {
+            LogBuildError(program);
+            return error;
+        }
+        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
+                                    &tinfo->inBuf2)))
+        {
+            LogBuildError(program);
+            return error;
+        }
+
+        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
+                                            &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error("FAILED -- could not execute kernel\n");
+            return error;
+        }
+    }
+
+    // Get that moving
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
+
+    if (gSkipCorrectnessTesting) return CL_SUCCESS;
+
+    // Calculate the correctly rounded reference result
+    r = (cl_half *)gOut_Ref + thread_id * buffer_elements;
+    t = (cl_ushort *)r;
+    s.resize(buffer_elements);
+    s2 = (cl_int *)gIn2 + thread_id * buffer_elements;
+    for (j = 0; j < buffer_elements; j++)
+    {
+        s[j] = cl_half_to_float(p[j]);
+        r[j] = HFF(func.f_fi(s[j], s2[j]));
+    }
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is
+    // an in order queue.
+    for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
+    {
+        out[j] = (cl_ushort *)clEnqueueMapBuffer(
+            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
+            buffer_elements * sizeof(cl_ushort), 0, NULL, NULL, &error);
+        if (error || NULL == out[j])
+        {
+            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                       error);
+            return error;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (cl_ushort *)clEnqueueMapBuffer(
+        tinfo->tQueue, tinfo->outBuf[j], CL_TRUE, CL_MAP_READ, 0,
+        buffer_elements * sizeof(cl_ushort), 0, NULL, NULL, &error);
+    if (error || NULL == out[j])
+    {
+        vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
+        return error;
+    }
+
+    // Verify data
+    for (j = 0; j < buffer_elements; j++)
+    {
+        for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+        {
+            cl_ushort *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if (t[j] != q[j])
+            {
+                float test = cl_half_to_float(q[j]);
+                double correct = func.f_fi(s[j], s2[j]);
+                float err = Ulp_Error_Half(q[j], correct);
+                int fail = !(fabsf(err) <= ulps);
+
+                if (fail && ftz)
+                {
+                    // retry per section 6.5.3.2
+                    if (IsHalfResultSubnormal(correct, ulps))
+                    {
+                        fail = fail && (test != 0.0f);
+                        if (!fail) err = 0.0f;
+                    }
+
+                    // retry per section 6.5.3.3
+                    if (IsHalfSubnormal(p[j]))
+                    {
+                        double correct2, correct3;
+                        float err2, err3;
+                        correct2 = func.f_fi(0.0, s2[j]);
+                        correct3 = func.f_fi(-0.0, s2[j]);
+                        err2 = Ulp_Error_Half(q[j], correct2);
+                        err3 = Ulp_Error_Half(q[j], correct3);
+                        fail = fail
+                            && ((!(fabsf(err2) <= ulps))
+                                && (!(fabsf(err3) <= ulps)));
+                        if (fabsf(err2) < fabsf(err)) err = err2;
+                        if (fabsf(err3) < fabsf(err)) err = err3;
+
+                        // retry per section 6.5.3.4
+                        if (IsHalfResultSubnormal(correct2, ulps)
+                            || IsHalfResultSubnormal(correct3, ulps))
+                        {
+                            fail = fail && (test != 0.0f);
+                            if (!fail) err = 0.0f;
+                        }
+                    }
+                }
+
+                if (fabsf(err) > tinfo->maxError)
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if (fail)
+                {
+                    vlog_error("\nERROR: %s%s: %f ulp error at {%a (0x%04x), "
+                               "%d}\nExpected: %a (half 0x%04x) \nActual: %a "
+                               "(half 0x%04x) at index: %d\n",
+                               name, sizeNames[k], err, s[j], p[j], s2[j],
+                               cl_half_to_float(r[j]), r[j], test, q[j],
+                               (cl_uint)j);
+                    error = -1;
+                    return error;
+                }
+            }
+        }
+    }
+
+    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    {
+        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
+                                             out[j], 0, NULL, NULL)))
+        {
+            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
+                       j, error);
+            return error;
+        }
+    }
+
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
+
+    if (0 == (base & 0x0fffffff))
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f "
+                 "ThreadCount:%2u\n",
+                 base, job->step, job->scale, buffer_elements, job->ulps,
+                 job->threadCount);
+        }
+        else
+        {
+            vlog(".");
+        }
+        fflush(stdout);
+    }
+    return error;
+}
+
+} // anonymous namespace
+
+int TestFunc_Half_Half_Int(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfoBase test_info_base;
+    cl_int error;
+    size_t i, j;
+    float maxError = 0.0f;
+    double maxErrorVal = 0.0;
+    cl_int maxErrorVal2 = 0;
+
+    logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
+
+    // Init test_info
+    memset(&test_info_base, 0, sizeof(test_info_base));
+    TestInfo test_info(test_info_base);
+
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_int) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_half));
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = f->half_ulps;
+    test_info.ftz =
+        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
+
+    test_info.tinfo.resize(test_info.threadCount);
+
+    for (i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize
+                                        * sizeof(cl_half),
+                                    test_info.subBufferSize * sizeof(cl_half) };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            return error;
+        }
+        cl_buffer_region region2 = { i * test_info.subBufferSize
+                                         * sizeof(cl_int),
+                                     test_info.subBufferSize * sizeof(cl_int) };
+        test_info.tinfo[i].inBuf2 =
+            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf2)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            return error;
+        }
+
+        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of gOutBuffer "
+                           "for region {%zd, %zd}\n",
+                           region.origin, region.size);
+                return error;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            return error;
+        }
+
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+    }
+
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
+                                       test_info.programs, f->nameInCode };
+        error = ThreadPool_Do(BuildKernel_HalfFn,
+                              gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                              &build_info);
+        test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
+    }
+
+    // Run the kernels
+    if (!gSkipCorrectnessTesting)
+        error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
+
+
+    // Accumulate the arithmetic errors
+    for (i = 0; i < test_info.threadCount; i++)
+    {
+        if (test_info.tinfo[i].maxError > maxError)
+        {
+            maxError = test_info.tinfo[i].maxError;
+            maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+        }
+    }
+
+    test_error(error, "ThreadPool_Do: TestHalf failed\n");
+
+    if (!gSkipCorrectnessTesting)
+    {
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
+    }
+
+    vlog("\n");
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/binary_operator_half.cpp b/test_conformance/math_brute_force/binary_operator_half.cpp
new file mode 100644
index 0000000000..366e63e8b1
--- /dev/null
+++ b/test_conformance/math_brute_force/binary_operator_half.cpp
@@ -0,0 +1,661 @@
+//
+// Copyright (c) 2017-2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+#include "function_list.h"
+#include "test_functions.h"
+#include "utility.h"
+
+#include <cstring>
+
+namespace {
+
+cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+{
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Half,
+                               ParameterType::Half, ParameterType::Half,
+                               vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
+}
+
+// Thread specific data for a worker thread
+struct ThreadInfo
+{
+    // Input and output buffers for the thread
+    clMemWrapper inBuf;
+    clMemWrapper inBuf2;
+    Buffers outBuf;
+
+    // max error value. Init to 0.
+    float maxError;
+    // position of the max error value (param 1).  Init to 0.
+    double maxErrorValue;
+    // position of the max error value (param 2).  Init to 0.
+    double maxErrorValue2;
+    MTdataHolder d;
+
+    // Per thread command queue to improve performance
+    clCommandQueueWrapper tQueue;
+};
+
+struct TestInfo
+{
+    size_t subBufferSize; // Size of the sub-buffer in elements
+    const Func *f; // A pointer to the function info
+
+    // Programs for various vector sizes.
+    Programs programs;
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
+    cl_uint threadCount; // Number of worker threads
+    cl_uint jobCount; // Number of jobs
+    cl_uint step; // step between each chunk and the next.
+    cl_uint scale; // stride between individual test values
+    float ulps; // max_allowed ulps
+    int ftz; // non-zero if running in flush to zero mode
+
+    // no special fields
+};
+
+// A table of more difficult cases to get right
+const cl_half specialValuesHalf[] = {
+    0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
+    0xfc00, /*-INFINITY*/
+    0x8000, /*-0*/
+    0x7bff, /*HALF_MAX*/
+    0x0400, /*HALF_MIN*/
+    0x03ff, /* Largest denormal */
+    0x3c00, /* 1 */
+    0xbc00, /* -1 */
+    0x3555, /*nearest value to 1/3*/
+    0x3bff, /*largest number less than one*/
+    0xc000, /* -2 */
+    0xfbff, /* -HALF_MAX */
+    0x8400, /* -HALF_MIN */
+    0x4248, /* M_PI_H */
+    0xc248, /* -M_PI_H */
+    0xbbff, /* Largest negative fraction */
+};
+
+constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
+
+cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
+{
+    TestInfo *job = (TestInfo *)data;
+    size_t buffer_elements = job->subBufferSize;
+    size_t buffer_size = buffer_elements * sizeof(cl_half);
+    cl_uint base = job_id * (cl_uint)job->step;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
+    float ulps = job->ulps;
+    fptr func = job->f->func;
+    int ftz = job->ftz;
+    MTdata d = tinfo->d;
+    cl_int error;
+
+    const char *name = job->f->name;
+    cl_half *r = 0;
+    std::vector<float> s(0), s2(0);
+    RoundingMode oldRoundMode;
+
+    cl_event e[VECTOR_SIZE_COUNT];
+    cl_half *out[VECTOR_SIZE_COUNT];
+
+    if (gHostFill)
+    {
+        // start the map of the output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            out[j] = (cl_ushort *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
+        }
+
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
+
+    // Init input array
+    cl_half *p = (cl_half *)gIn + thread_id * buffer_elements;
+    cl_half *p2 = (cl_half *)gIn2 + thread_id * buffer_elements;
+    cl_uint idx = 0;
+    int totalSpecialValueCount =
+        specialValuesHalfCount * specialValuesHalfCount;
+    int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
+
+    if (job_id <= (cl_uint)lastSpecialJobIndex)
+    {
+        // Insert special values
+        uint32_t x, y;
+
+        x = (job_id * buffer_elements) % specialValuesHalfCount;
+        y = (job_id * buffer_elements) / specialValuesHalfCount;
+
+        for (; idx < buffer_elements; idx++)
+        {
+            p[idx] = specialValuesHalf[x];
+            p2[idx] = specialValuesHalf[y];
+            if (++x >= specialValuesHalfCount)
+            {
+                x = 0;
+                y++;
+                if (y >= specialValuesHalfCount) break;
+            }
+        }
+    }
+
+    // Init any remaining values
+    for (; idx < buffer_elements; idx++)
+    {
+        p[idx] = (cl_half)genrand_int32(d);
+        p2[idx] = (cl_half)genrand_int32(d);
+    }
+    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
+                                      buffer_size, p, 0, NULL, NULL)))
+    {
+        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
+        return error;
+    }
+
+    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
+                                      buffer_size, p2, 0, NULL, NULL)))
+    {
+        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
+        return error;
+    }
+
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    {
+        if (gHostFill)
+        {
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry
+        // over
+        uint32_t pattern = 0xacdcacdc;
+        if (gHostFill)
+        {
+            memset_pattern4(out[j], &pattern, buffer_size);
+            error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
+                                            out[j], 0, NULL, NULL);
+            test_error(error, "clEnqueueUnmapMemObject failed!\n");
+        }
+        else
+        {
+            error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                        &pattern, sizeof(pattern), 0,
+                                        buffer_size, 0, NULL, NULL);
+            test_error(error, "clEnqueueFillBuffer failed!\n");
+        }
+
+        // Run the kernel
+        size_t vectorCount =
+            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
+                                                 // own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                                    &tinfo->outBuf[j])))
+        {
+            LogBuildError(program);
+            return error;
+        }
+        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
+                                    &tinfo->inBuf)))
+        {
+            LogBuildError(program);
+            return error;
+        }
+        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
+                                    &tinfo->inBuf2)))
+        {
+            LogBuildError(program);
+            return error;
+        }
+
+        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
+                                            &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error("FAILED -- could not execute kernel\n");
+            return error;
+        }
+    }
+
+    // Get that moving
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
+
+    if (gSkipCorrectnessTesting)
+    {
+        return CL_SUCCESS;
+    }
+
+    // Calculate the correctly rounded reference result
+    FPU_mode_type oldMode;
+    memset(&oldMode, 0, sizeof(oldMode));
+    if (ftz) ForceFTZ(&oldMode);
+
+    // Set the rounding mode to match the device
+    oldRoundMode = kRoundToNearestEven;
+    if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
+
+    // Calculate the correctly rounded reference result
+    r = (cl_half *)gOut_Ref + thread_id * buffer_elements;
+    s.resize(buffer_elements);
+    s2.resize(buffer_elements);
+
+    for (size_t j = 0; j < buffer_elements; j++)
+    {
+        s[j] = HTF(p[j]);
+        s2[j] = HTF(p2[j]);
+        r[j] = HFF(func.f_ff(s[j], s2[j]));
+    }
+
+    if (ftz) RestoreFPState(&oldMode);
+
+    // Read the data back -- no need to wait for the first N-1 buffers but wait
+    // for the last buffer. This is an in order queue.
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    {
+        cl_bool blocking = (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
+        out[j] = (cl_ushort *)clEnqueueMapBuffer(
+            tinfo->tQueue, tinfo->outBuf[j], blocking, CL_MAP_READ, 0,
+            buffer_size, 0, NULL, NULL, &error);
+        if (error || NULL == out[j])
+        {
+            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                       error);
+            return error;
+        }
+    }
+
+    // Verify data
+
+    for (size_t j = 0; j < buffer_elements; j++)
+    {
+        for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+        {
+            cl_half *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if (r[j] != q[j])
+            {
+                float test = HTF(q[j]);
+                float correct = func.f_ff(s[j], s2[j]);
+
+                // Per section 10 paragraph 6, accept any result if an input or
+                // output is a infinity or NaN or overflow
+                if (!gInfNanSupport)
+                {
+                    // Note: no double rounding here.  Reference functions
+                    // calculate in single precision.
+                    if (IsFloatInfinity(correct) || IsFloatNaN(correct)
+                        || IsFloatInfinity(s2[j]) || IsFloatNaN(s2[j])
+                        || IsFloatInfinity(s[j]) || IsFloatNaN(s[j]))
+                        continue;
+                }
+
+                float err = Ulp_Error_Half(q[j], correct);
+
+                int fail = !(fabsf(err) <= ulps);
+
+                if (fail && ftz)
+                {
+                    // retry per section 6.5.3.2
+                    if (IsHalfResultSubnormal(correct, ulps))
+                    {
+                        fail = fail && (test != 0.0f);
+                        if (!fail) err = 0.0f;
+                    }
+
+                    // retry per section 6.5.3.3
+                    if (IsHalfSubnormal(p[j]))
+                    {
+                        double correct2, correct3;
+                        float err2, err3;
+
+                        correct2 = func.f_ff(0.0, s2[j]);
+                        correct3 = func.f_ff(-0.0, s2[j]);
+
+                        // Per section 10 paragraph 6, accept any result if an
+                        // input or output is a infinity or NaN or overflow
+                        if (!gInfNanSupport)
+                        {
+                            // Note: no double rounding here.  Reference
+                            // functions calculate in single precision.
+                            if (IsFloatInfinity(correct2)
+                                || IsFloatNaN(correct2)
+                                || IsFloatInfinity(correct3)
+                                || IsFloatNaN(correct3))
+                                continue;
+                        }
+
+                        err2 = Ulp_Error_Half(q[j], correct2);
+                        err3 = Ulp_Error_Half(q[j], correct3);
+                        fail = fail
+                            && ((!(fabsf(err2) <= ulps))
+                                && (!(fabsf(err3) <= ulps)));
+
+                        if (fabsf(err2) < fabsf(err)) err = err2;
+                        if (fabsf(err3) < fabsf(err)) err = err3;
+
+                        // retry per section 6.5.3.4
+                        if (IsHalfResultSubnormal(correct2, ulps)
+                            || IsHalfResultSubnormal(correct3, ulps))
+                        {
+                            fail = fail && (test != 0.0f);
+                            if (!fail) err = 0.0f;
+                        }
+
+
+                        // try with both args as zero
+                        if (IsHalfSubnormal(p2[j]))
+                        {
+                            double correct4, correct5;
+                            float err4, err5;
+
+                            correct2 = func.f_ff(0.0, 0.0);
+                            correct3 = func.f_ff(-0.0, 0.0);
+                            correct4 = func.f_ff(0.0, -0.0);
+                            correct5 = func.f_ff(-0.0, -0.0);
+
+                            // Per section 10 paragraph 6, accept any result if
+                            // an input or output is a infinity or NaN or
+                            // overflow
+                            if (!gInfNanSupport)
+                            {
+                                // Note: no double rounding here.  Reference
+                                // functions calculate in single precision.
+                                if (IsFloatInfinity(correct2)
+                                    || IsFloatNaN(correct2)
+                                    || IsFloatInfinity(correct3)
+                                    || IsFloatNaN(correct3)
+                                    || IsFloatInfinity(correct4)
+                                    || IsFloatNaN(correct4)
+                                    || IsFloatInfinity(correct5)
+                                    || IsFloatNaN(correct5))
+                                    continue;
+                            }
+
+                            err2 = Ulp_Error_Half(q[j], correct2);
+                            err3 = Ulp_Error_Half(q[j], correct3);
+                            err4 = Ulp_Error_Half(q[j], correct4);
+                            err5 = Ulp_Error_Half(q[j], correct5);
+                            fail = fail
+                                && ((!(fabsf(err2) <= ulps))
+                                    && (!(fabsf(err3) <= ulps))
+                                    && (!(fabsf(err4) <= ulps))
+                                    && (!(fabsf(err5) <= ulps)));
+                            if (fabsf(err2) < fabsf(err)) err = err2;
+                            if (fabsf(err3) < fabsf(err)) err = err3;
+                            if (fabsf(err4) < fabsf(err)) err = err4;
+                            if (fabsf(err5) < fabsf(err)) err = err5;
+
+                            // retry per section 6.5.3.4
+                            if (IsHalfResultSubnormal(correct2, ulps)
+                                || IsHalfResultSubnormal(correct3, ulps)
+                                || IsHalfResultSubnormal(correct4, ulps)
+                                || IsHalfResultSubnormal(correct5, ulps))
+                            {
+                                fail = fail && (test != 0.0f);
+                                if (!fail) err = 0.0f;
+                            }
+                        }
+                    }
+                    else if (IsHalfSubnormal(p2[j]))
+                    {
+                        double correct2, correct3;
+                        float err2, err3;
+
+                        correct2 = func.f_ff(s[j], 0.0);
+                        correct3 = func.f_ff(s[j], -0.0);
+
+
+                        // Per section 10 paragraph 6, accept any result if an
+                        // input or output is a infinity or NaN or overflow
+                        if (!gInfNanSupport)
+                        {
+                            // Note: no double rounding here.  Reference
+                            // functions calculate in single precision.
+                            if (IsFloatInfinity(correct) || IsFloatNaN(correct)
+                                || IsFloatInfinity(correct2)
+                                || IsFloatNaN(correct2))
+                                continue;
+                        }
+
+                        err2 = Ulp_Error_Half(q[j], correct2);
+                        err3 = Ulp_Error_Half(q[j], correct3);
+                        fail = fail
+                            && ((!(fabsf(err2) <= ulps))
+                                && (!(fabsf(err3) <= ulps)));
+                        if (fabsf(err2) < fabsf(err)) err = err2;
+                        if (fabsf(err3) < fabsf(err)) err = err3;
+
+                        // retry per section 6.5.3.4
+                        if (IsHalfResultSubnormal(correct2, ulps)
+                            || IsHalfResultSubnormal(correct3, ulps))
+                        {
+                            fail = fail && (test != 0.0f);
+                            if (!fail) err = 0.0f;
+                        }
+                    }
+                }
+
+                if (fabsf(err) > tinfo->maxError)
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                    tinfo->maxErrorValue2 = s2[j];
+                }
+                if (fail)
+                {
+                    vlog_error("\nERROR: %s%s: %f ulp error at {%a (0x%04x), "
+                               "%a (0x%04x)}\nExpected: %a  (half 0x%04x) "
+                               "\nActual: %a (half 0x%04x) at index: %zu\n",
+                               name, sizeNames[k], err, s[j], p[j], s2[j],
+                               p2[j], HTF(r[j]), r[j], test, q[j], j);
+                    return -1;
+                }
+            }
+        }
+    }
+
+    if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
+
+    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    {
+        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
+                                             out[j], 0, NULL, NULL)))
+        {
+            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
+                       j, error);
+            return error;
+        }
+    }
+
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
+
+
+    if (0 == (base & 0x0fffffff))
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
+                 "ThreadCount:%2u\n",
+                 base, job->step, job->scale, buffer_elements, job->ulps,
+                 job->threadCount);
+        }
+        else
+        {
+            vlog(".");
+        }
+        fflush(stdout);
+    }
+
+    return CL_SUCCESS;
+}
+
+} // anonymous namespace
+
+int TestFunc_Half_Half_Half_Operator(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfo test_info{};
+    cl_int error;
+    float maxError = 0.0f;
+    double maxErrorVal = 0.0;
+    double maxErrorVal2 = 0.0;
+
+    logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
+
+    // Init test_info
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_half));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = f->half_ulps;
+    test_info.ftz =
+        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
+
+    test_info.tinfo.resize(test_info.threadCount);
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize
+                                        * sizeof(cl_half),
+                                    test_info.subBufferSize * sizeof(cl_half) };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            return error;
+        }
+        test_info.tinfo[i].inBuf2 =
+            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf2)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            return error;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of "
+                           "gOutBuffer[%d] for region {%zd, %zd}\n",
+                           (int)j, region.origin, region.size);
+                return error;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            return error;
+        }
+
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                    test_info.programs, f->nameInCode };
+        error = ThreadPool_Do(BuildKernel_HalfFn,
+                              gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                              &build_info);
+
+        test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
+    }
+    // Run the kernels
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
+
+        // Accumulate the arithmetic errors
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            if (test_info.tinfo[i].maxError > maxError)
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        test_error(error, "ThreadPool_Do: TestHalf failed\n");
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
+    }
+
+    vlog("\n");
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/binary_two_results_i_half.cpp b/test_conformance/math_brute_force/binary_two_results_i_half.cpp
new file mode 100644
index 0000000000..adeada7ca2
--- /dev/null
+++ b/test_conformance/math_brute_force/binary_two_results_i_half.cpp
@@ -0,0 +1,477 @@
+//
+// Copyright (c) 2017-2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+#include "function_list.h"
+#include "test_functions.h"
+#include "utility.h"
+
+#include <cinttypes>
+#include <climits>
+#include <cstring>
+
+namespace {
+
+cl_int BuildKernelFn_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+{
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Half,
+                               ParameterType::Int, ParameterType::Half,
+                               ParameterType::Half, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
+}
+
+struct ComputeReferenceInfoF
+{
+    const cl_half *x;
+    const cl_half *y;
+    cl_half *r;
+    int32_t *i;
+    double (*f_ffpI)(double, double, int *);
+    cl_uint lim;
+    cl_uint count;
+};
+
+cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
+{
+    ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo;
+    cl_uint lim = cri->lim;
+    cl_uint count = cri->count;
+    cl_uint off = jid * count;
+    const cl_half *x = cri->x + off;
+    const cl_half *y = cri->y + off;
+    cl_half *r = cri->r + off;
+    int32_t *i = cri->i + off;
+    double (*f)(double, double, int *) = cri->f_ffpI;
+
+    if (off + count > lim) count = lim - off;
+
+    for (cl_uint j = 0; j < count; ++j)
+        r[j] = HFF((float)f((double)HTF(x[j]), (double)HTF(y[j]), i + j));
+
+    return CL_SUCCESS;
+}
+
+} // anonymous namespace
+
+int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode)
+{
+    int error;
+
+    logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
+
+    Programs programs;
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
+    int64_t maxError2 = 0;
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
+
+    // use larger type of output data to prevent overflowing buffer size
+    constexpr size_t buffer_size = BUFFER_SIZE / sizeof(int32_t);
+
+    cl_uint threadCount = GetThreadCount();
+
+    float half_ulps = f->half_ulps;
+
+    int testingRemquo = !strcmp(f->name, "remquo");
+
+    // Init the kernels
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode };
+    if ((error = ThreadPool_Do(BuildKernelFn_HalfFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
+
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
+    {
+        // Init input array
+        cl_half *p = (cl_half *)gIn;
+        cl_half *p2 = (cl_half *)gIn2;
+        for (size_t j = 0; j < buffer_size; j++)
+        {
+            p[j] = (cl_half)genrand_int32(d);
+            p2[j] = (cl_half)genrand_int32(d);
+        }
+
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
+                                          buffer_size * sizeof(cl_half), gIn, 0,
+                                          NULL, NULL)))
+        {
+            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
+            return error;
+        }
+
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
+                                          buffer_size * sizeof(cl_half), gIn2,
+                                          0, NULL, NULL)))
+        {
+            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
+            return error;
+        }
+
+        // Write garbage into output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            uint32_t pattern = 0xacdcacdc;
+            if (gHostFill)
+            {
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+
+                memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut2[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
+            {
+                error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
+                                            sizeof(pattern), 0, BUFFER_SIZE, 0,
+                                            NULL, NULL);
+                test_error(error, "clEnqueueFillBuffer 1 failed!\n");
+
+                error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j], &pattern,
+                                            sizeof(pattern), 0, BUFFER_SIZE, 0,
+                                            NULL, NULL);
+                test_error(error, "clEnqueueFillBuffer 2 failed!\n");
+            }
+        }
+
+        // Run the kernels
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            // align working group size with the bigger output type
+            size_t vectorSize = sizeValues[j] * sizeof(int32_t);
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error =
+                     clSetKernelArg(kernels[j][thread_id], 1,
+                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer), &gInBuffer)))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
+                                        sizeof(gInBuffer2), &gInBuffer2)))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
+            {
+                vlog_error("FAILED -- could not execute kernel\n");
+                return error;
+            }
+        }
+
+        // Get that moving
+        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
+
+        if (threadCount > 1)
+        {
+            ComputeReferenceInfoF cri;
+            cri.x = p;
+            cri.y = p2;
+            cri.r = (cl_half *)gOut_Ref;
+            cri.i = (int32_t *)gOut_Ref2;
+            cri.f_ffpI = f->func.f_ffpI;
+            cri.lim = buffer_size;
+            cri.count = (cri.lim + threadCount - 1) / threadCount;
+            ThreadPool_Do(ReferenceF, threadCount, &cri);
+        }
+        else
+        {
+            cl_half *r = (cl_half *)gOut_Ref;
+            int32_t *r2 = (int32_t *)gOut_Ref2;
+            for (size_t j = 0; j < buffer_size; j++)
+                r[j] =
+                    HFF((float)f->func.f_ffpI(HTF(p[j]), HTF(p2[j]), r2 + j));
+        }
+
+        // Read the data back
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            cl_bool blocking =
+                (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
+            if ((error =
+                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], blocking, 0,
+                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error("ReadArray failed %d\n", error);
+                return error;
+            }
+            if ((error =
+                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], blocking, 0,
+                                         BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
+            {
+                vlog_error("ReadArray2 failed %d\n", error);
+                return error;
+            }
+        }
+
+        if (gSkipCorrectnessTesting) break;
+
+        // Verify data
+        cl_half *t = (cl_half *)gOut_Ref;
+        int32_t *t2 = (int32_t *)gOut_Ref2;
+        for (size_t j = 0; j < buffer_size; j++)
+        {
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            {
+                cl_half *q = (cl_half *)(gOut[k]);
+                int32_t *q2 = (int32_t *)gOut2[k];
+
+                // Check for exact match to correctly rounded result
+                if (t[j] == q[j] && t2[j] == q2[j]) continue;
+
+                // Check for paired NaNs
+                if (IsHalfNaN(t[j]) && IsHalfNaN(q[j]) && t2[j] == q2[j])
+                    continue;
+
+                cl_half test = ((cl_half *)q)[j];
+                int correct2 = INT_MIN;
+                float correct =
+                    (float)f->func.f_ffpI(HTF(p[j]), HTF(p2[j]), &correct2);
+                float err = Ulp_Error_Half(test, correct);
+                int64_t iErr;
+
+                // in case of remquo, we only care about the sign and last
+                // seven bits of integer as per the spec.
+                if (testingRemquo)
+                    iErr = (long long)(q2[j] & 0x0000007f)
+                        - (long long)(correct2 & 0x0000007f);
+                else
+                    iErr = (long long)q2[j] - (long long)correct2;
+
+                // For remquo, if y = 0, x is infinite, or either is NaN
+                // then the standard either neglects to say what is returned
+                // in iptr or leaves it undefined or implementation defined.
+                int iptrUndefined = IsHalfInfinity(p[j]) || (HTF(p2[j]) == 0.0f)
+                    || IsHalfNaN(p2[j]) || IsHalfNaN(p[j]);
+                if (iptrUndefined) iErr = 0;
+
+                int fail = !(fabsf(err) <= half_ulps && iErr == 0);
+                if (ftz && fail)
+                {
+                    // retry per section 6.5.3.2
+                    if (IsHalfResultSubnormal(correct, half_ulps))
+                    {
+                        fail = fail && !(test == 0.0f && iErr == 0);
+                        if (!fail) err = 0.0f;
+                    }
+
+                    // retry per section 6.5.3.3
+                    if (IsHalfSubnormal(p[j]))
+                    {
+                        int correct3i, correct4i;
+                        float correct3 =
+                            (float)f->func.f_ffpI(0.0, HTF(p2[j]), &correct3i);
+                        float correct4 =
+                            (float)f->func.f_ffpI(-0.0, HTF(p2[j]), &correct4i);
+                        float err2 = Ulp_Error_Half(test, correct3);
+                        float err3 = Ulp_Error_Half(test, correct4);
+                        int64_t iErr3 = (long long)q2[j] - (long long)correct3i;
+                        int64_t iErr4 = (long long)q2[j] - (long long)correct4i;
+                        fail = fail
+                            && ((!(fabsf(err2) <= half_ulps && iErr3 == 0))
+                                && (!(fabsf(err3) <= half_ulps && iErr4 == 0)));
+                        if (fabsf(err2) < fabsf(err)) err = err2;
+                        if (fabsf(err3) < fabsf(err)) err = err3;
+                        if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
+                        if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
+
+                        // retry per section 6.5.3.4
+                        if (IsHalfResultSubnormal(correct2, half_ulps)
+                            || IsHalfResultSubnormal(correct3, half_ulps))
+                        {
+                            fail = fail
+                                && !(test == 0.0f
+                                     && (iErr3 == 0 || iErr4 == 0));
+                            if (!fail) err = 0.0f;
+                        }
+
+                        // try with both args as zero
+                        if (IsHalfSubnormal(p2[j]))
+                        {
+                            int correct7i, correct8i;
+                            correct3 = f->func.f_ffpI(0.0, 0.0, &correct3i);
+                            correct4 = f->func.f_ffpI(-0.0, 0.0, &correct4i);
+                            double correct7 =
+                                f->func.f_ffpI(0.0, -0.0, &correct7i);
+                            double correct8 =
+                                f->func.f_ffpI(-0.0, -0.0, &correct8i);
+                            err2 = Ulp_Error_Half(test, correct3);
+                            err3 = Ulp_Error_Half(test, correct4);
+                            float err4 = Ulp_Error_Half(test, correct7);
+                            float err5 = Ulp_Error_Half(test, correct8);
+                            iErr3 = (long long)q2[j] - (long long)correct3i;
+                            iErr4 = (long long)q2[j] - (long long)correct4i;
+                            int64_t iErr7 =
+                                (long long)q2[j] - (long long)correct7i;
+                            int64_t iErr8 =
+                                (long long)q2[j] - (long long)correct8i;
+                            fail = fail
+                                && ((!(fabsf(err2) <= half_ulps && iErr3 == 0))
+                                    && (!(fabsf(err3) <= half_ulps
+                                          && iErr4 == 0))
+                                    && (!(fabsf(err4) <= half_ulps
+                                          && iErr7 == 0))
+                                    && (!(fabsf(err5) <= half_ulps
+                                          && iErr8 == 0)));
+                            if (fabsf(err2) < fabsf(err)) err = err2;
+                            if (fabsf(err3) < fabsf(err)) err = err3;
+                            if (fabsf(err4) < fabsf(err)) err = err4;
+                            if (fabsf(err5) < fabsf(err)) err = err5;
+                            if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
+                            if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
+                            if (llabs(iErr7) < llabs(iErr)) iErr = iErr7;
+                            if (llabs(iErr8) < llabs(iErr)) iErr = iErr8;
+
+                            // retry per section 6.5.3.4
+                            if (IsHalfResultSubnormal(correct3, half_ulps)
+                                || IsHalfResultSubnormal(correct4, half_ulps)
+                                || IsHalfResultSubnormal(correct7, half_ulps)
+                                || IsHalfResultSubnormal(correct8, half_ulps))
+                            {
+                                fail = fail
+                                    && !(test == 0.0f
+                                         && (iErr3 == 0 || iErr4 == 0
+                                             || iErr7 == 0 || iErr8 == 0));
+                                if (!fail) err = 0.0f;
+                            }
+                        }
+                    }
+                    else if (IsHalfSubnormal(p2[j]))
+                    {
+                        int correct3i, correct4i;
+                        double correct3 =
+                            f->func.f_ffpI(HTF(p[j]), 0.0, &correct3i);
+                        double correct4 =
+                            f->func.f_ffpI(HTF(p[j]), -0.0, &correct4i);
+                        float err2 = Ulp_Error_Half(test, correct3);
+                        float err3 = Ulp_Error_Half(test, correct4);
+                        int64_t iErr3 = (long long)q2[j] - (long long)correct3i;
+                        int64_t iErr4 = (long long)q2[j] - (long long)correct4i;
+                        fail = fail
+                            && ((!(fabsf(err2) <= half_ulps && iErr3 == 0))
+                                && (!(fabsf(err3) <= half_ulps && iErr4 == 0)));
+                        if (fabsf(err2) < fabsf(err)) err = err2;
+                        if (fabsf(err3) < fabsf(err)) err = err3;
+                        if (llabs(iErr3) < llabs(iErr)) iErr = iErr3;
+                        if (llabs(iErr4) < llabs(iErr)) iErr = iErr4;
+
+                        // retry per section 6.5.3.4
+                        if (IsHalfResultSubnormal(correct2, half_ulps)
+                            || IsHalfResultSubnormal(correct3, half_ulps))
+                        {
+                            fail = fail
+                                && !(test == 0.0f
+                                     && (iErr3 == 0 || iErr4 == 0));
+                            if (!fail) err = 0.0f;
+                        }
+                    }
+                }
+                if (fabsf(err) > maxError)
+                {
+                    maxError = fabsf(err);
+                    maxErrorVal = HTF(p[j]);
+                }
+                if (llabs(iErr) > maxError2)
+                {
+                    maxError2 = llabs(iErr);
+                    maxErrorVal2 = HTF(p[j]);
+                }
+
+                if (fail)
+                {
+                    vlog_error("\nERROR: %s%s: {%f, %" PRId64
+                               "} ulp error at {%a, %a} "
+                               "({0x%04x, 0x%04x}): *{%a, %d} ({0x%04x, "
+                               "0x%8.8x}) vs. {%a, %d} ({0x%04x, 0x%8.8x})\n",
+                               f->name, sizeNames[k], err, iErr, HTF(p[j]),
+                               HTF(p2[j]), p[j], p2[j], HTF(t[j]), t2[j], t[j],
+                               t2[j], HTF(test), q2[j], test, q2[j]);
+                    return -1;
+                }
+            }
+        }
+
+        if (0 == (i & 0x0fffffff))
+        {
+            if (gVerboseBruteForce)
+            {
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
+            }
+            else
+            {
+                vlog(".");
+            }
+            fflush(stdout);
+        }
+    }
+
+    if (!gSkipCorrectnessTesting)
+    {
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+             maxErrorVal, maxErrorVal2);
+    }
+
+    vlog("\n");
+
+    return CL_SUCCESS;
+}
diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp
index 47f493e7a6..df45a70073 100644
--- a/test_conformance/math_brute_force/common.cpp
+++ b/test_conformance/math_brute_force/common.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2022 The Khronos Group Inc.
+// Copyright (c) 2022-2024 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -27,8 +27,11 @@ const char *GetTypeName(ParameterType type)
 {
     switch (type)
     {
+        case ParameterType::Half: return "half";
         case ParameterType::Float: return "float";
         case ParameterType::Double: return "double";
+        case ParameterType::Short: return "short";
+        case ParameterType::UShort: return "ushort";
         case ParameterType::Int: return "int";
         case ParameterType::UInt: return "uint";
         case ParameterType::Long: return "long";
@@ -41,9 +44,13 @@ const char *GetUndefValue(ParameterType type)
 {
     switch (type)
     {
+        case ParameterType::Half:
         case ParameterType::Float:
         case ParameterType::Double: return "NAN";
 
+        case ParameterType::Short:
+        case ParameterType::UShort: return "0x5678";
+
         case ParameterType::Int:
         case ParameterType::UInt: return "0x12345678";
 
@@ -71,14 +78,17 @@ void EmitEnableExtension(std::ostringstream &kernel,
                          const std::initializer_list<ParameterType> &types)
 {
     bool needsFp64 = false;
+    bool needsFp16 = false;
 
     for (const auto &type : types)
     {
         switch (type)
         {
             case ParameterType::Double: needsFp64 = true; break;
-
+            case ParameterType::Half: needsFp16 = true; break;
             case ParameterType::Float:
+            case ParameterType::Short:
+            case ParameterType::UShort:
             case ParameterType::Int:
             case ParameterType::UInt:
             case ParameterType::Long:
@@ -89,6 +99,7 @@ void EmitEnableExtension(std::ostringstream &kernel,
     }
 
     if (needsFp64) kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+    if (needsFp16) kernel << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
 }
 
 std::string GetBuildOptions(bool relaxed_mode)
diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h
index 481b3b2a29..eb82c5f8f3 100644
--- a/test_conformance/math_brute_force/common.h
+++ b/test_conformance/math_brute_force/common.h
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2021 The Khronos Group Inc.
+// Copyright (c) 2021-2024 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -36,8 +36,11 @@ using Buffers = std::array<clMemWrapper, VECTOR_SIZE_COUNT>;
 // Types supported for kernel code generation.
 enum class ParameterType
 {
+    Half,
     Float,
     Double,
+    Short,
+    UShort,
     Int,
     UInt,
     Long,
diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp
index 917362852c..0b4ec0e544 100644
--- a/test_conformance/math_brute_force/function_list.cpp
+++ b/test_conformance/math_brute_force/function_list.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
+// Copyright (c) 2017-2024 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -29,36 +29,41 @@
 // Only use ulps information in spir test
 #ifdef FUNCTION_LIST_ULPS_ONLY
 
-#define ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                       \
+#define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _rmode, _type)            \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL },      \
-            _ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,             \
+            _ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,  \
             RELAXED_OFF, _type                                                 \
     }
-#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type,     \
-                  _relaxed_embedded_ulp)                                       \
+#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _relaxed_ulp, _rmode, \
+                  _type, _relaxed_embedded_ulp)                                \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { NULL }, { NULL }, { NULL },      \
-            _ulp, _ulp, _embedded_ulp, _relaxed_ulp, _relaxed_embedded_ulp,    \
-            _rmode, RELAXED_ON, _type                                          \
+            _ulp, _ulp, _half_ulp, _embedded_ulp, _relaxed_ulp,                \
+            _relaxed_embedded_ulp, _rmode, RELAXED_ON, _type                   \
     }
 #define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                  \
     {                                                                          \
         "half_" STRINGIFY(_name), "half_" STRINGIFY(_name), { NULL },          \
-            { NULL }, { NULL }, _ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, \
-            _rmode, RELAXED_OFF, _type                                         \
+            { NULL }, { NULL }, _ulp, _ulp, _ulp, _embedded_ulp, INFINITY,     \
+            INFINITY, _rmode, RELAXED_OFF, _type                               \
     }
-#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type)   \
+#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _half_ulp,       \
+                       _rmode, _type)                                          \
     {                                                                          \
         STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \
-            _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type      \
+            _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, \
+            _type                                                              \
     }
 
 #define unaryF NULL
+#define unaryOF NULL
 #define i_unaryF NULL
 #define unaryF_u NULL
 #define macro_unaryF NULL
 #define binaryF NULL
+#define binaryOF NULL
+#define binaryF_nextafter NULL
 #define binaryOperatorF NULL
 #define binaryF_i NULL
 #define macro_binaryF NULL
@@ -68,6 +73,8 @@
 #define binaryF_two_results_i NULL
 #define mad_function NULL
 
+#define reference_copysignf NULL
+#define reference_copysign NULL
 #define reference_sqrt NULL
 #define reference_sqrtl NULL
 #define reference_divide NULL
@@ -76,31 +83,34 @@
 
 #else // FUNCTION_LIST_ULPS_ONLY
 
-#define ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                       \
+#define ENTRY(_name, _ulp, _embedded_ulp, _half_ulp, _rmode, _type)            \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name },      \
             { (void*)reference_##_name##l }, { (void*)reference_##_name },     \
-            _ulp, _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,             \
+            _ulp, _ulp, _half_ulp, _embedded_ulp, INFINITY, INFINITY, _rmode,  \
             RELAXED_OFF, _type                                                 \
     }
-#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _relaxed_ulp, _rmode, _type,     \
-                  _relaxed_embedded_ulp)                                       \
+#define ENTRY_EXT(_name, _ulp, _embedded_ulp, _half_ulp, _relaxed_ulp, _rmode, \
+                  _type, _relaxed_embedded_ulp)                                \
     {                                                                          \
         STRINGIFY(_name), STRINGIFY(_name), { (void*)reference_##_name },      \
             { (void*)reference_##_name##l },                                   \
-            { (void*)reference_##relaxed_##_name }, _ulp, _ulp, _embedded_ulp, \
-            _relaxed_ulp, _relaxed_embedded_ulp, _rmode, RELAXED_ON, _type     \
+            { (void*)reference_##relaxed_##_name }, _ulp, _ulp, _half_ulp,     \
+            _embedded_ulp, _relaxed_ulp, _relaxed_embedded_ulp, _rmode,        \
+            RELAXED_ON, _type                                                  \
     }
 #define HALF_ENTRY(_name, _ulp, _embedded_ulp, _rmode, _type)                  \
     {                                                                          \
         "half_" STRINGIFY(_name), "half_" STRINGIFY(_name),                    \
             { (void*)reference_##_name }, { NULL }, { NULL }, _ulp, _ulp,      \
-            _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type      \
+            _ulp, _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF,      \
+            _type                                                              \
     }
-#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _rmode, _type)   \
+#define OPERATOR_ENTRY(_name, _operator, _ulp, _embedded_ulp, _half_ulp,       \
+                       _rmode, _type)                                          \
     {                                                                          \
         STRINGIFY(_name), _operator, { (void*)reference_##_name },             \
-            { (void*)reference_##_name##l }, { NULL }, _ulp, _ulp,             \
+            { (void*)reference_##_name##l }, { NULL }, _ulp, _ulp, _half_ulp,  \
             _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type      \
     }
 
@@ -108,85 +118,114 @@ static constexpr vtbl _unary = {
     "unary",
     TestFunc_Float_Float,
     TestFunc_Double_Double,
+    TestFunc_Half_Half,
 };
 
+static constexpr vtbl _unaryof = { "unaryof", TestFunc_Float_Float, NULL,
+                                   NULL };
+
 static constexpr vtbl _i_unary = {
     "i_unary",
     TestFunc_Int_Float,
     TestFunc_Int_Double,
+    TestFunc_Int_Half,
 };
 
 static constexpr vtbl _unary_u = {
     "unary_u",
     TestFunc_Float_UInt,
     TestFunc_Double_ULong,
+    TestFunc_Half_UShort,
 };
 
 static constexpr vtbl _macro_unary = {
     "macro_unary",
     TestMacro_Int_Float,
     TestMacro_Int_Double,
+    TestMacro_Int_Half,
 };
 
 static constexpr vtbl _binary = {
     "binary",
     TestFunc_Float_Float_Float,
     TestFunc_Double_Double_Double,
+    TestFunc_Half_Half_Half,
+};
+
+static constexpr vtbl _binary_nextafter = {
+    "binary",
+    TestFunc_Float_Float_Float,
+    TestFunc_Double_Double_Double,
+    TestFunc_Half_Half_Half_nextafter,
 };
 
+static constexpr vtbl _binaryof = { "binaryof", TestFunc_Float_Float_Float,
+                                    NULL, NULL };
+
 static constexpr vtbl _binary_operator = {
     "binaryOperator",
     TestFunc_Float_Float_Float_Operator,
     TestFunc_Double_Double_Double_Operator,
+    TestFunc_Half_Half_Half_Operator,
 };
 
 static constexpr vtbl _binary_i = {
     "binary_i",
     TestFunc_Float_Float_Int,
     TestFunc_Double_Double_Int,
+    TestFunc_Half_Half_Int,
 };
 
 static constexpr vtbl _macro_binary = {
     "macro_binary",
     TestMacro_Int_Float_Float,
     TestMacro_Int_Double_Double,
+    TestMacro_Int_Half_Half,
 };
 
 static constexpr vtbl _ternary = {
     "ternary",
     TestFunc_Float_Float_Float_Float,
     TestFunc_Double_Double_Double_Double,
+    TestFunc_Half_Half_Half_Half,
 };
 
 static constexpr vtbl _unary_two_results = {
     "unary_two_results",
     TestFunc_Float2_Float,
     TestFunc_Double2_Double,
+    TestFunc_Half2_Half,
 };
 
 static constexpr vtbl _unary_two_results_i = {
     "unary_two_results_i",
     TestFunc_FloatI_Float,
     TestFunc_DoubleI_Double,
+    TestFunc_HalfI_Half,
 };
 
 static constexpr vtbl _binary_two_results_i = {
     "binary_two_results_i",
     TestFunc_FloatI_Float_Float,
     TestFunc_DoubleI_Double_Double,
+    TestFunc_HalfI_Half_Half,
 };
 
 static constexpr vtbl _mad_tbl = {
     "ternary",
     TestFunc_mad_Float,
     TestFunc_mad_Double,
+    TestFunc_mad_Half,
 };
 
 #define unaryF &_unary
+#define unaryOF &_unaryof
 #define i_unaryF &_i_unary
 #define unaryF_u &_unary_u
 #define macro_unaryF &_macro_unary
 #define binaryF &_binary
+#define binaryF_nextafter &_binary_nextafter
+#define binaryOF &_binaryof
 #define binaryOperatorF &_binary_operator
 #define binaryF_i &_binary_i
 #define macro_binaryF &_macro_binary
@@ -198,108 +237,121 @@ static constexpr vtbl _mad_tbl = {
 
 #endif // FUNCTION_LIST_ULPS_ONLY
 
+// clang-format off
 const Func functionList[] = {
-    ENTRY_EXT(acos, 4.0f, 4.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(acosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(acospi, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(asin, 4.0f, 4.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(asinh, 4.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(asinpi, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(atan, 5.0f, 5.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
-    ENTRY(atanh, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY(atanpi, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY(atan2, 6.0f, 6.0f, FTZ_OFF, binaryF),
-    ENTRY(atan2pi, 6.0f, 6.0f, FTZ_OFF, binaryF),
-    ENTRY(cbrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(ceil, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(copysign, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY_EXT(cos, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
+    ENTRY_EXT(acos, 4.0f, 4.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
+    ENTRY(acosh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
+    ENTRY(acospi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
+    ENTRY_EXT(asin, 4.0f, 4.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
+    ENTRY(asinh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
+    ENTRY(asinpi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
+    ENTRY_EXT(atan, 5.0f, 5.0f, 2.0f, 4096.0f, FTZ_OFF, unaryF, 4096.0f),
+    ENTRY(atanh, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
+    ENTRY(atanpi, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
+    ENTRY(atan2, 6.0f, 6.0f, 2.0f, FTZ_OFF, binaryF),
+    ENTRY(atan2pi, 6.0f, 6.0f, 2.0f, FTZ_OFF, binaryF),
+    ENTRY(cbrt, 2.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
+    ENTRY(ceil, 0.0f, 0.0f, 0.f, FTZ_OFF, unaryF),
+    { "copysign",
+      "copysign",
+      { (void*)reference_copysignf },
+      { (void*)reference_copysign },
+      { (void*)reference_copysignf },
+      0.0f,
+      0.0f,
+      0.0f,
+      0.0f,
+      INFINITY,
+      INFINITY,
+      FTZ_OFF,
+      RELAXED_OFF,
+      binaryF },
+    ENTRY_EXT(cos, 4.0f, 4.0f, 2.f, 0.00048828125f, FTZ_OFF, unaryF,
               0.00048828125f), // relaxed ulp 2^-11
-    ENTRY(cosh, 4.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(cospi, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
+    ENTRY(cosh, 4.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
+    ENTRY_EXT(cospi, 4.0f, 4.0f, 2.f, 0.00048828125f, FTZ_OFF, unaryF,
               0.00048828125f), // relaxed ulp 2^-11
-    //                                  ENTRY( erfc,                  16.0f,
-    //                                  16.0f,         FTZ_OFF,     unaryF),
-    //                                  //disabled for 1.0 due to lack of
-    //                                  reference implementation ENTRY( erf,
-    //                                  16.0f,         16.0f,         FTZ_OFF,
-    //                                  unaryF), //disabled for 1.0 due to lack
-    //                                  of reference implementation
-    ENTRY_EXT(exp, 3.0f, 4.0f, 3.0f, FTZ_OFF, unaryF,
-              4.0f), // relaxed error is actually overwritten in unary.c as it
-                     // is 3+floor(fabs(2*x))
-    ENTRY_EXT(exp2, 3.0f, 4.0f, 3.0f, FTZ_OFF, unaryF,
-              4.0f), // relaxed error is actually overwritten in unary.c as it
-                     // is 3+floor(fabs(2*x))
-    ENTRY_EXT(exp10, 3.0f, 4.0f, 8192.0f, FTZ_OFF, unaryF,
-              8192.0f), // relaxed error is actually overwritten in unary.c as
-                        // it is 3+floor(fabs(2*x)) in derived mode,
+    //ENTRY(erfc, 16.0f, 16.0f, FTZ_OFF, unaryF), //disabled for 1.0 due to lack of reference implementation
+    //ENTRY(erf,  16.0f, 16.0f, FTZ_OFF, unaryF), //disabled for 1.0 due to lack of reference implementation
+
+    // relaxed error is overwritten in unary.c as it is 3+floor(fabs(2*x))
+    ENTRY_EXT(exp, 3.0f, 4.0f, 2.f, 3.0f, FTZ_OFF, unaryF, 4.0f),
+
+    // relaxed error is overwritten in unary.c as it is 3+floor(fabs(2*x))
+    ENTRY_EXT(exp2, 3.0f, 4.0f, 2.f, 3.0f, FTZ_OFF, unaryF, 4.0f),
+
+    // relaxed error is overwritten in unary.c as it is 3+floor(fabs(2*x)) in derived mode;
     // in non-derived mode it uses the ulp error for half_exp10.
-    ENTRY(expm1, 3.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(fabs, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(fdim, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(floor, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(fma, 0.0f, 0.0f, FTZ_OFF, ternaryF),
-    ENTRY(fmax, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fmin, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fmod, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(fract, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
-    ENTRY(frexp, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results_i),
-    ENTRY(hypot, 4.0f, 4.0f, FTZ_OFF, binaryF),
-    ENTRY(ilogb, 0.0f, 0.0f, FTZ_OFF, i_unaryF),
-    ENTRY(isequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isfinite, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isgreater, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isgreaterequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isinf, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isless, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(islessequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(islessgreater, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isnan, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isnormal, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY(isnotequal, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isordered, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(isunordered, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
-    ENTRY(ldexp, 0.0f, 0.0f, FTZ_OFF, binaryF_i),
-    ENTRY(lgamma, INFINITY, INFINITY, FTZ_OFF, unaryF),
-    ENTRY(lgamma_r, INFINITY, INFINITY, FTZ_OFF, unaryF_two_results_i),
-    ENTRY_EXT(log, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
+    ENTRY_EXT(exp10, 3.0f, 4.0f, 2.f, 8192.0f, FTZ_OFF, unaryF, 8192.0f),
+
+    ENTRY(expm1, 3.0f, 4.0f, 2.f, FTZ_OFF, unaryF),
+    ENTRY(fabs, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(fdim, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(floor, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(fma, 0.0f, 0.0f, 0.0f, FTZ_OFF, ternaryF),
+    ENTRY(fmax, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(fmin, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(fmod, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(fract, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
+    ENTRY(frexp, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results_i),
+    ENTRY(hypot, 4.0f, 4.0f, 2.0f, FTZ_OFF, binaryF),
+    ENTRY(ilogb, 0.0f, 0.0f, 0.0f, FTZ_OFF, i_unaryF),
+    ENTRY(isequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isfinite, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY(isgreater, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isgreaterequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isinf, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY(isless, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(islessequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(islessgreater, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isnan, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY(isnormal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY(isnotequal, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isordered, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(isunordered, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_binaryF),
+    ENTRY(ldexp, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_i),
+    ENTRY(lgamma, INFINITY, INFINITY, INFINITY, FTZ_OFF, unaryF),
+    ENTRY(lgamma_r, INFINITY, INFINITY, INFINITY, FTZ_OFF,
+          unaryF_two_results_i),
+    ENTRY_EXT(log, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
               4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY_EXT(log2, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
+    ENTRY_EXT(log2, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
               4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY_EXT(log10, 3.0f, 4.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
+    ENTRY_EXT(log10, 3.0f, 4.0f, 2.0f, 4.76837158203125e-7f, FTZ_OFF, unaryF,
               4.76837158203125e-7f), // relaxed ulp 2^-21
-    ENTRY(log1p, 2.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(logb, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(mad, INFINITY, INFINITY, INFINITY, FTZ_OFF, mad_function,
-              INFINITY), // in fast-relaxed-math mode it has to be either
-                         // exactly rounded fma or exactly rounded a*b+c
-    ENTRY(maxmag, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(minmag, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(modf, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
-    ENTRY(nan, 0.0f, 0.0f, FTZ_OFF, unaryF_u),
-    ENTRY(nextafter, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY_EXT(pow, 16.0f, 16.0f, 8192.0f, FTZ_OFF, binaryF,
-              8192.0f), // in derived mode the ulp error is calculated as
-                        // exp2(y*log2(x)) and in non-derived it is the same as
-                        // half_pow
-    ENTRY(pown, 16.0f, 16.0f, FTZ_OFF, binaryF_i),
-    ENTRY(powr, 16.0f, 16.0f, FTZ_OFF, binaryF),
-    //                                  ENTRY( reciprocal,            1.0f,
-    //                                  1.0f,         FTZ_OFF,     unaryF),
-    ENTRY(remainder, 0.0f, 0.0f, FTZ_OFF, binaryF),
-    ENTRY(remquo, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
-    ENTRY(rint, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(rootn, 16.0f, 16.0f, FTZ_OFF, binaryF_i),
-    ENTRY(round, 0.0f, 0.0f, FTZ_OFF, unaryF),
-    ENTRY(rsqrt, 2.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY(signbit, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
-    ENTRY_EXT(sin, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
+    ENTRY(log1p, 2.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
+    ENTRY(logb, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+
+    // In fast-relaxed-math mode it has to be either exactly rounded fma or exactly rounded a*b+c
+    ENTRY_EXT(mad, INFINITY, INFINITY, INFINITY, INFINITY, FTZ_OFF, mad_function, INFINITY),
+
+    ENTRY(maxmag, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(minmag, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(modf, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_two_results),
+    ENTRY(nan, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF_u),
+    ENTRY(nextafter, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_nextafter),
+
+    // In derived mode the ulp error is calculated as exp2(y*log2(x)).
+    // In non-derived it is the same as half_pow.
+    ENTRY_EXT(pow, 16.0f, 16.0f, 4.0f, 8192.0f, FTZ_OFF, binaryF, 8192.0f),
+
+    ENTRY(pown, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
+    ENTRY(powr, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF),
+    //ENTRY(reciprocal, 1.0f, 1.0f, FTZ_OFF, unaryF),
+    ENTRY(remainder, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF),
+    ENTRY(remquo, 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryF_two_results_i),
+    ENTRY(rint, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(rootn, 16.0f, 16.0f, 4.0f, FTZ_OFF, binaryF_i),
+    ENTRY(round, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+    ENTRY(rsqrt, 2.0f, 4.0f, 1.0f, FTZ_OFF, unaryF),
+    ENTRY(signbit, 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    ENTRY_EXT(sin, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
               0.00048828125f), // relaxed ulp 2^-11
-    ENTRY_EXT(sincos, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF_two_results,
+    ENTRY_EXT(sincos, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF,
+              unaryF_two_results,
               0.00048828125f), // relaxed ulp 2^-11
-    ENTRY(sinh, 4.0f, 4.0f, FTZ_OFF, unaryF),
-    ENTRY_EXT(sinpi, 4.0f, 4.0f, 0.00048828125f, FTZ_OFF, unaryF,
+    ENTRY(sinh, 4.0f, 4.0f, 2.0f, FTZ_OFF, unaryF),
+    ENTRY_EXT(sinpi, 4.0f, 4.0f, 2.0f, 0.00048828125f, FTZ_OFF, unaryF,
               0.00048828125f), // relaxed ulp 2^-11
     { "sqrt",
       "sqrt",
@@ -308,6 +360,7 @@ const Func functionList[] = {
       { NULL },
       3.0f,
       0.0f,
+      0.0f,
       4.0f,
       INFINITY,
       INFINITY,
@@ -322,41 +375,40 @@ const Func functionList[] = {
       0.0f,
       0.0f,
       0.0f,
+      0.0f,
       INFINITY,
       INFINITY,
       FTZ_OFF,
       RELAXED_OFF,
       unaryF },
-    ENTRY_EXT(
-        tan, 5.0f, 5.0f, 8192.0f, FTZ_OFF, unaryF,
-        8192.0f), // in derived mode it the ulp error is calculated as sin/cos
-                  // and in non-derived mode it is the same as half_tan.
-    ENTRY(tanh, 5.0f, 5.0f, FTZ_OFF, unaryF),
-    ENTRY(tanpi, 6.0f, 6.0f, FTZ_OFF, unaryF),
-    //                                    ENTRY( tgamma,                 16.0f,
-    //                                    16.0f,         FTZ_OFF,     unaryF),
-    //                                    // Commented this out until we can be
-    //                                    sure this requirement is realistic
-    ENTRY(trunc, 0.0f, 0.0f, FTZ_OFF, unaryF),
-
-    HALF_ENTRY(cos, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(divide, 8192.0f, 8192.0f, FTZ_ON, binaryF),
-    HALF_ENTRY(exp, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(exp2, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(exp10, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(log, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(log2, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(log10, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(powr, 8192.0f, 8192.0f, FTZ_ON, binaryF),
-    HALF_ENTRY(recip, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(rsqrt, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(sin, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(sqrt, 8192.0f, 8192.0f, FTZ_ON, unaryF),
-    HALF_ENTRY(tan, 8192.0f, 8192.0f, FTZ_ON, unaryF),
+
+    // In derived mode it the ulp error is calculated as sin/cos.
+    // In non-derived mode it is the same as half_tan.
+    ENTRY_EXT(tan, 5.0f, 5.0f, 2.0f, 8192.0f, FTZ_OFF, unaryF, 8192.0f),
+
+    ENTRY(tanh, 5.0f, 5.0f, 2.0f, FTZ_OFF, unaryF),
+    ENTRY(tanpi, 6.0f, 6.0f, 2.0f, FTZ_OFF, unaryF),
+    //ENTRY(tgamma, 16.0f, 16.0f, FTZ_OFF, unaryF), Commented this out until we can be sure this requirement is realistic
+    ENTRY(trunc, 0.0f, 0.0f, 0.0f, FTZ_OFF, unaryF),
+
+    HALF_ENTRY(cos, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
+    HALF_ENTRY(divide, 8192.0f, 8192.0f, FTZ_ON, binaryOF),
+    HALF_ENTRY(exp, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
+    HALF_ENTRY(exp2, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
+    HALF_ENTRY(exp10, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
+    HALF_ENTRY(log, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
+    HALF_ENTRY(log2, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
+    HALF_ENTRY(log10, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
+    HALF_ENTRY(powr, 8192.0f, 8192.0f, FTZ_ON, binaryOF),
+    HALF_ENTRY(recip, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
+    HALF_ENTRY(rsqrt, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
+    HALF_ENTRY(sin, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
+    HALF_ENTRY(sqrt, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
+    HALF_ENTRY(tan, 8192.0f, 8192.0f, FTZ_ON, unaryOF),
 
     // basic operations
-    OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
-    OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
+    OPERATOR_ENTRY(add, "+", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
+    OPERATOR_ENTRY(subtract, "-", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
     { "divide",
       "/",
       { (void*)reference_divide },
@@ -364,6 +416,7 @@ const Func functionList[] = {
       { (void*)reference_relaxed_divide },
       2.5f,
       0.0f,
+      0.0f,
       3.0f,
       2.5f,
       INFINITY,
@@ -378,15 +431,17 @@ const Func functionList[] = {
       0.0f,
       0.0f,
       0.0f,
+      0.0f,
       0.f,
       INFINITY,
       FTZ_OFF,
       RELAXED_OFF,
       binaryOperatorF },
-    OPERATOR_ENTRY(multiply, "*", 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
-    OPERATOR_ENTRY(assignment, "", 0.0f, 0.0f, FTZ_OFF,
+    OPERATOR_ENTRY(multiply, "*", 0.0f, 0.0f, 0.0f, FTZ_OFF, binaryOperatorF),
+    OPERATOR_ENTRY(assignment, "", 0.0f, 0.0f, 0.0f, FTZ_OFF,
                    unaryF), // A simple copy operation
-    OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
+    OPERATOR_ENTRY(not, "!", 0.0f, 0.0f, 0.0f, FTZ_OFF, macro_unaryF),
 };
+// clang-format on
 
 const size_t functionListCount = sizeof(functionList) / sizeof(functionList[0]);
diff --git a/test_conformance/math_brute_force/function_list.h b/test_conformance/math_brute_force/function_list.h
index 95a2945932..56190e334d 100644
--- a/test_conformance/math_brute_force/function_list.h
+++ b/test_conformance/math_brute_force/function_list.h
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
+// Copyright (c) 2017-2024 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -52,6 +52,7 @@ union dptr {
     long double (*f_f)(long double);
     long double (*f_u)(cl_ulong);
     int (*i_f)(long double);
+    double (*f_ff_d)(double, double);
     long double (*f_ff)(long double, long double);
     int (*i_ff)(long double, long double);
     long double (*f_fi)(long double, int);
@@ -70,6 +71,9 @@ struct vtbl
     int (*DoubleTestFunc)(
         const struct Func *, MTdata,
         bool); // may be NULL if function is single precision only
+    int (*HalfTestFunc)(
+        const struct Func *, MTdata,
+        bool); // may be NULL if function is single precision only
 };
 
 struct Func
@@ -82,6 +86,7 @@ struct Func
     fptr rfunc;
     float float_ulps;
     float double_ulps;
+    float half_ulps;
     float float_embedded_ulps;
     float relaxed_error;
     float relaxed_embedded_error;
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index 953c33bbb4..2ed8087441 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -98,7 +98,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                     vlog_error(
                         "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
                         error, j);
-                    goto exit;
+                    return error;
                 }
             }
             else
@@ -124,13 +124,13 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                                         sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
             if ((error = clSetKernelArg(kernels[j][thread_id], 1,
                                         sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
@@ -138,7 +138,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                                                 NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -159,7 +159,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -188,8 +188,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                         "\nERROR: %sD%s: %d ulp error at %.13la: *%d vs. %d\n",
                         f->name, sizeNames[k], err, ((double *)gIn)[j], t[j],
                         q[j]);
-                    error = -1;
-                    goto exit;
+                    return -1;
                 }
             }
         }
@@ -221,6 +220,5 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
     return error;
 }
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index 0ce37cc8cc..0df35c4add 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -97,7 +97,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                     vlog_error(
                         "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
                         error, j);
-                    goto exit;
+                    return error;
                 }
             }
             else
@@ -123,13 +123,13 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                                         sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
             if ((error = clSetKernelArg(kernels[j][thread_id], 1,
                                         sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
             if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
@@ -137,7 +137,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                                                 NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -158,7 +158,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -187,8 +187,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                                "*%d vs. %d\n",
                                f->name, sizeNames[k], err, ((float *)gIn)[j],
                                ((cl_uint *)gIn)[j], t[j], q[j]);
-                    error = -1;
-                    goto exit;
+                    return -1;
                 }
             }
         }
@@ -219,6 +218,5 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
     return error;
 }
diff --git a/test_conformance/math_brute_force/i_unary_half.cpp b/test_conformance/math_brute_force/i_unary_half.cpp
new file mode 100644
index 0000000000..3a8991a384
--- /dev/null
+++ b/test_conformance/math_brute_force/i_unary_half.cpp
@@ -0,0 +1,220 @@
+//
+// Copyright (c) 2017-2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+#include "function_list.h"
+#include "test_functions.h"
+#include "utility.h"
+
+#include <algorithm>
+#include <cstring>
+#include <memory>
+#include <cinttypes>
+
+namespace {
+
+static cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED,
+                                 void *p)
+{
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Int,
+                              ParameterType::Half, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
+}
+
+} // anonymous namespace
+
+int TestFunc_Int_Half(const Func *f, MTdata d, bool relaxedMode)
+{
+    int error;
+    Programs programs;
+    KernelMatrix kernels;
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    int ftz = f->ftz || 0 == (gHalfCapabilities & CL_FP_DENORM) || gForceFTZ;
+    uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
+    size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_int),
+                                     size_t(1ULL << (sizeof(cl_half) * 8)));
+    size_t bufferSizeIn = bufferElements * sizeof(cl_half);
+    size_t bufferSizeOut = bufferElements * sizeof(cl_int);
+
+    logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
+    // This test is not using ThreadPool so we need to disable FTZ here
+    // for reference computations
+    FPU_mode_type oldMode;
+    DisableFTZ(&oldMode);
+    std::shared_ptr<int> at_scope_exit(
+        nullptr, [&oldMode](int *) { RestoreFPState(&oldMode); });
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { 1, kernels, programs, f->nameInCode };
+        if ((error = ThreadPool_Do(BuildKernel_HalfFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            return error;
+    }
+    std::vector<float> s(bufferElements);
+
+    for (uint64_t i = 0; i < (1ULL << 16); i += step)
+    {
+        // Init input array
+        cl_ushort *p = (cl_ushort *)gIn;
+
+        for (size_t j = 0; j < bufferElements; j++) p[j] = (cl_ushort)i + j;
+
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
+                                          bufferSizeIn, gIn, 0, NULL, NULL)))
+        {
+            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
+            return error;
+        }
+
+        // write garbage into output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            uint32_t pattern = 0xacdcacdc;
+            if (gHostFill)
+            {
+                memset_pattern4(gOut[j], &pattern, bufferSizeOut);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, bufferSizeOut,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
+            {
+                error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
+                                            sizeof(pattern), 0, bufferSizeOut,
+                                            0, NULL, NULL);
+                test_error(error, "clEnqueueFillBuffer failed!\n");
+            }
+        }
+
+        // Run the kernels
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_int);
+            size_t localCount = (bufferSizeOut + vectorSize - 1) / vectorSize;
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
+                                        sizeof(gInBuffer), &gInBuffer)))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
+            {
+                vlog_error("FAILED -- could not execute kernel\n");
+                return error;
+            }
+        }
+
+        // Get that moving
+        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
+
+        // Calculate the correctly rounded reference result
+        int *r = (int *)gOut_Ref;
+        for (size_t j = 0; j < bufferElements; j++)
+        {
+            s[j] = HTF(p[j]);
+            r[j] = f->func.i_f(s[j]);
+        }
+        // Read the data back
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
+                                             bufferSizeOut, gOut[j], 0, NULL,
+                                             NULL)))
+            {
+                vlog_error("ReadArray failed %d\n", error);
+                return error;
+            }
+        }
+
+        if (gSkipCorrectnessTesting) break;
+
+        // Verify data
+        uint32_t *t = (uint32_t *)gOut_Ref;
+        for (size_t j = 0; j < bufferElements; j++)
+        {
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            {
+                uint32_t *q = (uint32_t *)(gOut[k]);
+                // If we aren't getting the correctly rounded result
+                if (t[j] != q[j])
+                {
+                    if (ftz && IsHalfSubnormal(p[j]))
+                    {
+                        unsigned int correct0 = f->func.i_f(0.0);
+                        unsigned int correct1 = f->func.i_f(-0.0);
+                        if (q[j] == correct0 || q[j] == correct1) continue;
+                    }
+
+                    uint32_t err = t[j] - q[j];
+                    if (q[j] > t[j]) err = q[j] - t[j];
+                    vlog_error("\nERROR: %s%s: %d ulp error at %a (0x%04x): "
+                               "*%d vs. %d\n",
+                               f->name, sizeNames[k], err, s[j], p[j], t[j],
+                               q[j]);
+                    return -1;
+                }
+            }
+        }
+
+        if (0 == (i & 0x0fffffff))
+        {
+            if (gVerboseBruteForce)
+            {
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10zd \n",
+                     i, step, bufferSizeOut);
+            }
+            else
+            {
+                vlog(".");
+            }
+            fflush(stdout);
+        }
+    }
+
+    if (!gSkipCorrectnessTesting)
+    {
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+    }
+
+    vlog("\n");
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/macro_binary_half.cpp b/test_conformance/math_brute_force/macro_binary_half.cpp
new file mode 100644
index 0000000000..ea4ef8128f
--- /dev/null
+++ b/test_conformance/math_brute_force/macro_binary_half.cpp
@@ -0,0 +1,540 @@
+//
+// Copyright (c) 2017-2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+#include "function_list.h"
+#include "test_functions.h"
+#include "utility.h"
+
+#include <cstring>
+
+namespace {
+
+cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+{
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Short,
+                               ParameterType::Half, ParameterType::Half,
+                               vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
+}
+
+struct ThreadInfo
+{
+    clMemWrapper inBuf; // input buffer for the thread
+    clMemWrapper inBuf2; // input buffer for the thread
+    clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    MTdataHolder d;
+    clCommandQueueWrapper
+        tQueue; // per thread command queue to improve performance
+};
+
+struct TestInfoBase
+{
+    size_t subBufferSize; // Size of the sub-buffer in elements
+    const Func *f; // A pointer to the function info
+
+    cl_uint threadCount; // Number of worker threads
+    cl_uint jobCount; // Number of jobs
+    cl_uint step; // step between each chunk and the next.
+    cl_uint scale; // stride between individual test values
+    int ftz; // non-zero if running in flush to zero mode
+};
+
+struct TestInfo : public TestInfoBase
+{
+    TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
+    // Programs for various vector sizes.
+    Programs programs;
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+};
+
+// A table of more difficult cases to get right
+const cl_half specialValuesHalf[] = {
+    0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
+    0xfc00, /*-INFINITY*/
+    0x8000, /*-0*/
+    0x7bff, /*HALF_MAX*/
+    0x0400, /*HALF_MIN*/
+    0x03ff, /* Largest denormal */
+    0x3c00, /* 1 */
+    0xbc00, /* -1 */
+    0x3555, /*nearest value to 1/3*/
+    0x3bff, /*largest number less than one*/
+    0xc000, /* -2 */
+    0xfbff, /* -HALF_MAX */
+    0x8400, /* -HALF_MIN */
+    0x4248, /* M_PI_H */
+    0xc248, /* -M_PI_H */
+    0xbbff, /* Largest negative fraction */
+};
+
+constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
+
+cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
+{
+    TestInfo *job = (TestInfo *)data;
+    size_t buffer_elements = job->subBufferSize;
+    size_t buffer_size = buffer_elements * sizeof(cl_half);
+    cl_uint base = job_id * (cl_uint)job->step;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
+    fptr func = job->f->func;
+    int ftz = job->ftz;
+    MTdata d = tinfo->d;
+    cl_uint j, k;
+    cl_int error;
+    const char *name = job->f->name;
+    cl_short *t, *r;
+    std::vector<float> s(0), s2(0);
+
+    // start the map of the output arrays
+    cl_event e[VECTOR_SIZE_COUNT];
+    cl_short *out[VECTOR_SIZE_COUNT];
+
+    if (gHostFill)
+    {
+        // start the map of the output arrays
+        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            out[j] = (cl_short *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
+        }
+
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
+
+    // Init input array
+    cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
+    cl_ushort *p2 = (cl_ushort *)gIn2 + thread_id * buffer_elements;
+    j = 0;
+    int totalSpecialValueCount =
+        specialValuesHalfCount * specialValuesHalfCount;
+    int indx = (totalSpecialValueCount - 1) / buffer_elements;
+
+    if (job_id <= (cl_uint)indx)
+    { // test edge cases
+        uint32_t x, y;
+
+        x = (job_id * buffer_elements) % specialValuesHalfCount;
+        y = (job_id * buffer_elements) / specialValuesHalfCount;
+
+        for (; j < buffer_elements; j++)
+        {
+            p[j] = specialValuesHalf[x];
+            p2[j] = specialValuesHalf[y];
+            if (++x >= specialValuesHalfCount)
+            {
+                x = 0;
+                y++;
+                if (y >= specialValuesHalfCount) break;
+            }
+        }
+    }
+
+    // Init any remaining values.
+    for (; j < buffer_elements; j++)
+    {
+        p[j] = (cl_ushort)genrand_int32(d);
+        p2[j] = (cl_ushort)genrand_int32(d);
+    }
+
+
+    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
+                                      buffer_size, p, 0, NULL, NULL)))
+    {
+        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
+        return error;
+    }
+
+    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
+                                      buffer_size, p2, 0, NULL, NULL)))
+    {
+        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
+        return error;
+    }
+
+    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    {
+        if (gHostFill)
+        {
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry
+        // over
+        uint32_t pattern = 0xacdcacdc;
+        if (gHostFill)
+        {
+            memset_pattern4(out[j], &pattern, buffer_size);
+            error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
+                                            out[j], 0, NULL, NULL);
+            test_error(error, "clEnqueueUnmapMemObject failed!\n");
+        }
+        else
+        {
+            error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                        &pattern, sizeof(pattern), 0,
+                                        buffer_size, 0, NULL, NULL);
+            test_error(error, "clEnqueueFillBuffer failed!\n");
+        }
+
+        // run the kernel
+        size_t vectorCount =
+            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
+                                                 // own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                                    &tinfo->outBuf[j])))
+        {
+            LogBuildError(program);
+            return error;
+        }
+        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
+                                    &tinfo->inBuf)))
+        {
+            LogBuildError(program);
+            return error;
+        }
+        if ((error = clSetKernelArg(kernel, 2, sizeof(tinfo->inBuf2),
+                                    &tinfo->inBuf2)))
+        {
+            LogBuildError(program);
+            return error;
+        }
+
+        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
+                                            &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error("FAILED -- could not execute kernel\n");
+            return error;
+        }
+    }
+
+    // Get that moving
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
+
+    if (gSkipCorrectnessTesting) return CL_SUCCESS;
+
+    // Calculate the correctly rounded reference result
+    r = (cl_short *)gOut_Ref + thread_id * buffer_elements;
+    t = (cl_short *)r;
+    s.resize(buffer_elements);
+    s2.resize(buffer_elements);
+    for (j = 0; j < buffer_elements; j++)
+    {
+        s[j] = cl_half_to_float(p[j]);
+        s2[j] = cl_half_to_float(p2[j]);
+        r[j] = (short)func.i_ff(s[j], s2[j]);
+    }
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is
+    // an in order queue.
+    for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
+    {
+        out[j] = (cl_short *)clEnqueueMapBuffer(
+            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
+            buffer_size, 0, NULL, NULL, &error);
+        if (error || NULL == out[j])
+        {
+            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                       error);
+            return error;
+        }
+    }
+
+    // Wait for the last buffer
+    out[j] = (cl_short *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                            CL_TRUE, CL_MAP_READ, 0,
+                                            buffer_size, 0, NULL, NULL, &error);
+    if (error || NULL == out[j])
+    {
+        vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
+        return error;
+    }
+
+    // Verify data
+    for (j = 0; j < buffer_elements; j++)
+    {
+        cl_short *q = (cl_short *)out[0];
+
+        // If we aren't getting the correctly rounded result
+        if (gMinVectorSizeIndex == 0 && t[j] != q[j])
+        {
+            if (ftz)
+            {
+                if (IsHalfSubnormal(p[j]))
+                {
+                    if (IsHalfSubnormal(p2[j]))
+                    {
+                        short correct = (short)func.i_ff(0.0f, 0.0f);
+                        short correct2 = (short)func.i_ff(0.0f, -0.0f);
+                        short correct3 = (short)func.i_ff(-0.0f, 0.0f);
+                        short correct4 = (short)func.i_ff(-0.0f, -0.0f);
+
+                        if (correct == q[j] || correct2 == q[j]
+                            || correct3 == q[j] || correct4 == q[j])
+                            continue;
+                    }
+                    else
+                    {
+                        short correct = (short)func.i_ff(0.0f, s2[j]);
+                        short correct2 = (short)func.i_ff(-0.0f, s2[j]);
+                        if (correct == q[j] || correct2 == q[j]) continue;
+                    }
+                }
+                else if (IsHalfSubnormal(p2[j]))
+                {
+                    short correct = (short)func.i_ff(s[j], 0.0f);
+                    short correct2 = (short)func.i_ff(s[j], -0.0f);
+                    if (correct == q[j] || correct2 == q[j]) continue;
+                }
+            }
+
+            short err = t[j] - q[j];
+            if (q[j] > t[j]) err = q[j] - t[j];
+            vlog_error(
+                "\nERROR: %s: %d ulp error at {%a (0x%04x), %a "
+                "(0x%04x)}\nExpected: 0x%04x \nActual: 0x%04x (index: %d)\n",
+                name, err, s[j], p[j], s2[j], p2[j], t[j], q[j], j);
+            error = -1;
+            return error;
+        }
+
+
+        for (k = std::max(1U, gMinVectorSizeIndex); k < gMaxVectorSizeIndex;
+             k++)
+        {
+            q = out[k];
+            // If we aren't getting the correctly rounded result
+            if (-t[j] != q[j])
+            {
+                if (ftz)
+                {
+                    if (IsHalfSubnormal(p[j]))
+                    {
+                        if (IsHalfSubnormal(p2[j]))
+                        {
+                            short correct = (short)-func.i_ff(0.0f, 0.0f);
+                            short correct2 = (short)-func.i_ff(0.0f, -0.0f);
+                            short correct3 = (short)-func.i_ff(-0.0f, 0.0f);
+                            short correct4 = (short)-func.i_ff(-0.0f, -0.0f);
+
+                            if (correct == q[j] || correct2 == q[j]
+                                || correct3 == q[j] || correct4 == q[j])
+                                continue;
+                        }
+                        else
+                        {
+                            short correct = (short)-func.i_ff(0.0f, s2[j]);
+                            short correct2 = (short)-func.i_ff(-0.0f, s2[j]);
+                            if (correct == q[j] || correct2 == q[j]) continue;
+                        }
+                    }
+                    else if (IsHalfSubnormal(p2[j]))
+                    {
+                        short correct = (short)-func.i_ff(s[j], 0.0f);
+                        short correct2 = (short)-func.i_ff(s[j], -0.0f);
+                        if (correct == q[j] || correct2 == q[j]) continue;
+                    }
+                }
+
+                cl_ushort err = -t[j] - q[j];
+                if (q[j] > -t[j]) err = q[j] + t[j];
+                vlog_error("\nERROR: %s: %d ulp error at {%a (0x%04x), %a "
+                           "(0x%04x)}\nExpected: 0x%04x \nActual: 0x%04x "
+                           "(index: %d)\n",
+                           name, err, s[j], p[j], s2[j], p2[j], -t[j], q[j], j);
+                error = -1;
+                return error;
+            }
+        }
+    }
+
+    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    {
+        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
+                                             out[j], 0, NULL, NULL)))
+        {
+            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
+                       j, error);
+            return error;
+        }
+    }
+
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
+
+
+    if (0 == (base & 0x0fffffff))
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
+                 "ThreadCount:%2u\n",
+                 base, job->step, job->scale, buffer_elements,
+                 job->threadCount);
+        }
+        else
+        {
+            vlog(".");
+        }
+        fflush(stdout);
+    }
+
+    return error;
+}
+
+} // anonymous namespace
+
+int TestMacro_Int_Half_Half(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfoBase test_info_base;
+    cl_int error;
+    size_t i, j;
+
+    logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
+
+    // Init test_info
+    memset(&test_info_base, 0, sizeof(test_info_base));
+    TestInfo test_info(test_info_base);
+
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_half));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ftz =
+        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
+
+    test_info.tinfo.resize(test_info.threadCount);
+
+    for (i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize
+                                        * sizeof(cl_half),
+                                    test_info.subBufferSize * sizeof(cl_half) };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            return error;
+        }
+        test_info.tinfo[i].inBuf2 =
+            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf2)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            return error;
+        }
+
+        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of gOutBuffer "
+                           "for region {%zd, %zd}\n",
+                           region.origin, region.size);
+                return error;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            return error;
+        }
+
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
+                                       test_info.programs, f->nameInCode };
+        error = ThreadPool_Do(BuildKernel_HalfFn,
+                              gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                              &build_info);
+        test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
+    }
+
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
+
+        test_error(error, "ThreadPool_Do: TestHalf failed\n");
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+    }
+
+    vlog("\n");
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 2d75bc5c33..b747b9802f 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -241,7 +241,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (q[j] > t[j]) err = q[j] - t[j];
             vlog_error("\nERROR: %sD: %" PRId64
                        " ulp error at %.13la: *%" PRId64 " vs. %" PRId64 "\n",
-                       name, err, ((double *)gIn)[j], t[j], q[j]);
+                       name, err, s[j], t[j], q[j]);
             return -1;
         }
 
@@ -265,10 +265,10 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
                 cl_ulong err = -t[j] - q[j];
                 if (q[j] > -t[j]) err = q[j] + t[j];
-                vlog_error(
-                    "\nERROR: %sD%s: %" PRId64 " ulp error at %.13la: *%" PRId64
-                    " vs. %" PRId64 "\n",
-                    name, sizeNames[k], err, ((double *)gIn)[j], -t[j], q[j]);
+                vlog_error("\nERROR: %sD%s: %" PRId64
+                           " ulp error at %.13la: *%" PRId64 " vs. %" PRId64
+                           "\n",
+                           name, sizeNames[k], err, s[j], -t[j], q[j]);
                 return -1;
             }
         }
diff --git a/test_conformance/math_brute_force/macro_unary_half.cpp b/test_conformance/math_brute_force/macro_unary_half.cpp
new file mode 100644
index 0000000000..cb20205514
--- /dev/null
+++ b/test_conformance/math_brute_force/macro_unary_half.cpp
@@ -0,0 +1,427 @@
+//
+// Copyright (c) 2017-2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+#include "function_list.h"
+#include "test_functions.h"
+#include "utility.h"
+
+#include <cstring>
+
+namespace {
+
+cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+{
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Short,
+                              ParameterType::Half, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
+}
+
+// Thread specific data for a worker thread
+struct ThreadInfo
+{
+    clMemWrapper inBuf; // input buffer for the thread
+    clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    clCommandQueueWrapper
+        tQueue; // per thread command queue to improve performance
+};
+
+struct TestInfoBase
+{
+    size_t subBufferSize; // Size of the sub-buffer in elements
+    const Func *f; // A pointer to the function info
+    cl_uint threadCount; // Number of worker threads
+    cl_uint jobCount; // Number of jobs
+    cl_uint step; // step between each chunk and the next.
+    cl_uint scale; // stride between individual test values
+    int ftz; // non-zero if running in flush to zero mode
+};
+
+struct TestInfo : public TestInfoBase
+{
+    TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
+    // Programs for various vector sizes.
+    Programs programs;
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+};
+
+cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
+{
+    TestInfo *job = (TestInfo *)data;
+    size_t buffer_elements = job->subBufferSize;
+    size_t buffer_size = buffer_elements * sizeof(cl_half);
+    cl_uint scale = job->scale;
+    cl_uint base = job_id * (cl_uint)job->step;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
+    fptr func = job->f->func;
+    int ftz = job->ftz;
+    cl_uint j, k;
+    cl_int error = CL_SUCCESS;
+    const char *name = job->f->name;
+    std::vector<float> s(0);
+
+    int signbit_test = 0;
+    if (!strcmp(name, "signbit")) signbit_test = 1;
+
+#define ref_func(s) (signbit_test ? func.i_f_f(s) : func.i_f(s))
+
+    // start the map of the output arrays
+    cl_event e[VECTOR_SIZE_COUNT];
+    cl_short *out[VECTOR_SIZE_COUNT];
+
+    if (gHostFill)
+    {
+        // start the map of the output arrays
+        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            out[j] = (cl_short *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
+        }
+
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
+
+    // Write the new values to the input array
+    cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
+    for (j = 0; j < buffer_elements; j++) p[j] = base + j * scale;
+
+    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
+                                      buffer_size, p, 0, NULL, NULL)))
+    {
+        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
+        return error;
+    }
+
+    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    {
+        if (gHostFill)
+        {
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry
+        // over
+        uint32_t pattern = 0xacdcacdc;
+        if (gHostFill)
+        {
+            memset_pattern4(out[j], &pattern, buffer_size);
+            error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
+                                            out[j], 0, NULL, NULL);
+            test_error(error, "clEnqueueUnmapMemObject failed!\n");
+        }
+        else
+        {
+            error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                        &pattern, sizeof(pattern), 0,
+                                        buffer_size, 0, NULL, NULL);
+            test_error(error, "clEnqueueFillBuffer failed!\n");
+        }
+
+        // run the kernel
+        size_t vectorCount =
+            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
+                                                 // own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                                    &tinfo->outBuf[j])))
+        {
+            LogBuildError(program);
+            return error;
+        }
+        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
+                                    &tinfo->inBuf)))
+        {
+            LogBuildError(program);
+            return error;
+        }
+
+        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
+                                            &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error("FAILED -- could not execute kernel\n");
+            return error;
+        }
+    }
+
+
+    // Get that moving
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
+
+    if (gSkipCorrectnessTesting) return CL_SUCCESS;
+
+    // Calculate the correctly rounded reference result
+    cl_short *r = (cl_short *)gOut_Ref + thread_id * buffer_elements;
+    cl_short *t = (cl_short *)r;
+    s.resize(buffer_elements);
+    for (j = 0; j < buffer_elements; j++)
+    {
+        s[j] = cl_half_to_float(p[j]);
+        if (!strcmp(name, "isnormal"))
+        {
+            if ((IsHalfSubnormal(p[j]) == 0) && !((p[j] & 0x7fffU) >= 0x7c00U)
+                && ((p[j] & 0x7fffU) != 0x0000U))
+                r[j] = 1;
+            else
+                r[j] = 0;
+        }
+        else
+            r[j] = (short)ref_func(s[j]);
+    }
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is
+    // an in order queue.
+    for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
+    {
+        out[j] = (cl_short *)clEnqueueMapBuffer(
+            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
+            buffer_size, 0, NULL, NULL, &error);
+        if (error || NULL == out[j])
+        {
+            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                       error);
+            return error;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (cl_short *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                            CL_TRUE, CL_MAP_READ, 0,
+                                            buffer_size, 0, NULL, NULL, &error);
+    if (error || NULL == out[j])
+    {
+        vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
+        return error;
+    }
+
+    // Verify data
+    for (j = 0; j < buffer_elements; j++)
+    {
+        cl_short *q = out[0];
+
+        // If we aren't getting the correctly rounded result
+        if (gMinVectorSizeIndex == 0 && t[j] != q[j])
+        {
+            // If we aren't getting the correctly rounded result
+            if (ftz)
+            {
+                if (IsHalfSubnormal(p[j]))
+                {
+                    short correct = (short)ref_func(+0.0f);
+                    short correct2 = (short)ref_func(-0.0f);
+                    if (correct == q[j] || correct2 == q[j]) continue;
+                }
+            }
+
+            short err = t[j] - q[j];
+            if (q[j] > t[j]) err = q[j] - t[j];
+            vlog_error("\nERROR: %s: %d ulp error at %a (0x%04x)\nExpected: "
+                       "%d vs. %d\n",
+                       name, err, s[j], p[j], t[j], q[j]);
+            error = -1;
+            return error;
+        }
+
+
+        for (k = std::max(1U, gMinVectorSizeIndex); k < gMaxVectorSizeIndex;
+             k++)
+        {
+            q = out[k];
+            // If we aren't getting the correctly rounded result
+            if (-t[j] != q[j])
+            {
+                if (ftz)
+                {
+                    if (IsHalfSubnormal(p[j]))
+                    {
+                        short correct = (short)-ref_func(+0.0f);
+                        short correct2 = (short)-ref_func(-0.0f);
+                        if (correct == q[j] || correct2 == q[j]) continue;
+                    }
+                }
+
+                short err = -t[j] - q[j];
+                if (q[j] > -t[j]) err = q[j] + t[j];
+                vlog_error("\nERROR: %s%s: %d ulp error at %a "
+                           "(0x%04x)\nExpected: %d \nActual: %d\n",
+                           name, sizeNames[k], err, s[j], p[j], -t[j], q[j]);
+                error = -1;
+                return error;
+            }
+        }
+    }
+
+    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    {
+        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
+                                             out[j], 0, NULL, NULL)))
+        {
+            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
+                       j, error);
+            return error;
+        }
+    }
+
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
+
+    if (0 == (base & 0x0fffffff))
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd "
+                 "ThreadCount:%2u\n",
+                 base, job->step, job->scale, buffer_elements,
+                 job->threadCount);
+        }
+        else
+        {
+            vlog(".");
+        }
+        fflush(stdout);
+    }
+    return error;
+}
+
+} // anonymous namespace
+
+int TestMacro_Int_Half(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfoBase test_info_base;
+    cl_int error;
+    size_t i, j;
+
+    logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
+    // Init test_info
+    memset(&test_info_base, 0, sizeof(test_info_base));
+    TestInfo test_info(test_info_base);
+
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_half));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount =
+            std::max((cl_uint)1,
+                     (cl_uint)((1ULL << sizeof(cl_half) * 8) / test_info.step));
+    }
+
+    test_info.f = f;
+    test_info.ftz =
+        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
+
+    test_info.tinfo.resize(test_info.threadCount);
+
+    for (i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize
+                                        * sizeof(cl_half),
+                                    test_info.subBufferSize * sizeof(cl_half) };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            return error;
+        }
+
+        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of gOutBuffer "
+                           "for region {%zd, %zd}\n",
+                           region.origin, region.size);
+                return error;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            return error;
+        }
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
+                                       test_info.programs, f->nameInCode };
+        error = ThreadPool_Do(BuildKernel_HalfFn,
+                              gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                              &build_info);
+        test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
+    }
+
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
+
+        test_error(error, "ThreadPool_Do: TestHalf failed\n");
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+    }
+
+    vlog("\n");
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/mad_half.cpp b/test_conformance/math_brute_force/mad_half.cpp
new file mode 100644
index 0000000000..b6cdda640a
--- /dev/null
+++ b/test_conformance/math_brute_force/mad_half.cpp
@@ -0,0 +1,201 @@
+//
+// Copyright (c) 2017-2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+#include "function_list.h"
+#include "test_functions.h"
+#include "utility.h"
+
+#include <cstring>
+
+namespace {
+
+cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+{
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetTernaryKernel(kernel_name, builtin, ParameterType::Half,
+                                ParameterType::Half, ParameterType::Half,
+                                ParameterType::Half, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
+}
+
+} // anonymous namespace
+
+int TestFunc_mad_Half(const Func *f, MTdata d, bool relaxedMode)
+{
+    int error;
+    Programs programs;
+    KernelMatrix kernels;
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    float maxError = 0.0f;
+
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    float maxErrorVal3 = 0.0f;
+    size_t bufferSize = BUFFER_SIZE;
+
+    logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
+    uint64_t step = getTestStep(sizeof(cl_half), bufferSize);
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { 1, kernels, programs, f->nameInCode };
+        if ((error = ThreadPool_Do(BuildKernel_HalfFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            return error;
+    }
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
+    {
+        // Init input array
+        cl_ushort *p = (cl_ushort *)gIn;
+        cl_ushort *p2 = (cl_ushort *)gIn2;
+        cl_ushort *p3 = (cl_ushort *)gIn3;
+        for (size_t j = 0; j < bufferSize / sizeof(cl_ushort); j++)
+        {
+            p[j] = (cl_ushort)genrand_int32(d);
+            p2[j] = (cl_ushort)genrand_int32(d);
+            p3[j] = (cl_ushort)genrand_int32(d);
+        }
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
+                                          bufferSize, gIn, 0, NULL, NULL)))
+        {
+            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
+            return error;
+        }
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
+                                          bufferSize, gIn2, 0, NULL, NULL)))
+        {
+            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
+            return error;
+        }
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
+                                          bufferSize, gIn3, 0, NULL, NULL)))
+        {
+            vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
+            return error;
+        }
+
+        // write garbage into output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            uint32_t pattern = 0xacdcacdc;
+            if (gHostFill)
+            {
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
+            {
+                error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
+                                            sizeof(pattern), 0, BUFFER_SIZE, 0,
+                                            NULL, NULL);
+                test_error(error, "clEnqueueFillBuffer failed!\n");
+            }
+        }
+
+        // Run the kernels
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            size_t vectorSize = sizeof(cl_half) * sizeValues[j];
+            size_t localCount = (bufferSize + vectorSize - 1)
+                / vectorSize; // bufferSize / vectorSize  rounded up
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
+                                        sizeof(gInBuffer), &gInBuffer)))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer2), &gInBuffer2)))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
+                                        sizeof(gInBuffer3), &gInBuffer3)))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
+            {
+                vlog_error("FAILED -- could not execute kernel\n");
+                return error;
+            }
+        }
+
+        // Get that moving
+        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
+
+        // Read the data back
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            if ((error =
+                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
+                                         bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error("ReadArray failed %d\n", error);
+                return error;
+            }
+        }
+
+        if (gSkipCorrectnessTesting) break;
+
+        // Verify data - no verification possible. MAD is a random number
+        // generator.
+
+        if (0 == (i & 0x0fffffff))
+        {
+            vlog(".");
+            fflush(stdout);
+        }
+    }
+
+    if (!gSkipCorrectnessTesting)
+    {
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("pass");
+
+        vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
+             maxErrorVal3);
+    }
+    vlog("\n");
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index 947b945098..d939984e55 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
+// Copyright (c) 2017-2024 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -49,6 +49,8 @@
 #include "harness/testHarness.h"
 
 #define kPageSize 4096
+#define HALF_REQUIRED_FEATURES_1 (CL_FP_ROUND_TO_ZERO)
+#define HALF_REQUIRED_FEATURES_2 (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN)
 #define DOUBLE_REQUIRED_FEATURES                                               \
     (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO                  \
      | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)
@@ -81,6 +83,8 @@ static int gTestFastRelaxed = 1;
 */
 int gFastRelaxedDerived = 1;
 static int gToggleCorrectlyRoundedDivideSqrt = 0;
+int gHasHalf = 0;
+cl_device_fp_config gHalfCapabilities = 0;
 int gDeviceILogb0 = 1;
 int gDeviceILogbNaN = 1;
 int gCheckTininessBeforeRounding = 1;
@@ -104,6 +108,8 @@ cl_device_fp_config gFloatCapabilities = 0;
 int gWimpyReductionFactor = 32;
 int gVerboseBruteForce = 0;
 
+cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE;
+
 static int ParseArgs(int argc, const char **argv);
 static void PrintUsage(void);
 static void PrintFunctions(void);
@@ -167,7 +173,6 @@ static int doTest(const char *name)
             return 0;
         }
     }
-
     {
         if (0 == strcmp("ilogb", func_data->name))
         {
@@ -236,6 +241,23 @@ static int doTest(const char *name)
                 }
             }
         }
+
+        if (gHasHalf && NULL != func_data->vtbl_ptr->HalfTestFunc)
+        {
+            gTestCount++;
+            vlog("%3d: ", gTestCount);
+            if (func_data->vtbl_ptr->HalfTestFunc(func_data, gMTdata,
+                                                  false /* relaxed mode*/))
+            {
+                gFailCount++;
+                error++;
+                if (gStopOnError)
+                {
+                    gSkipRestOfTests = true;
+                    return error;
+                }
+            }
+        }
     }
 
     return error;
@@ -408,6 +430,8 @@ static int ParseArgs(int argc, const char **argv)
 
                     case 'm': singleThreaded ^= 1; break;
 
+                    case 'g': gHasHalf ^= 1; break;
+
                     case 'r': gTestFastRelaxed ^= 1; break;
 
                     case 's': gStopOnError ^= 1; break;
@@ -540,6 +564,8 @@ static void PrintUsage(void)
     vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
          "on)\n");
     vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
+    vlog("\t\t-g\tToggle half precision testing. (Default: on if khr_fp_16 "
+         "on)\n");
     vlog("\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n");
     vlog("\t\t-e\tToggle test as derived implementations for fast relaxed math "
          "precision. (Default: on)\n");
@@ -640,6 +666,54 @@ test_status InitCL(cl_device_id device)
 #endif
     }
 
+    gFloatToHalfRoundingMode = kRoundToNearestEven;
+    if (is_extension_available(gDevice, "cl_khr_fp16"))
+    {
+        gHasHalf ^= 1;
+#if defined(CL_DEVICE_HALF_FP_CONFIG)
+        if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_HALF_FP_CONFIG,
+                                     sizeof(gHalfCapabilities),
+                                     &gHalfCapabilities, NULL)))
+        {
+            vlog_error(
+                "ERROR: Unable to get device CL_DEVICE_HALF_FP_CONFIG. (%d)\n",
+                error);
+            return TEST_FAIL;
+        }
+        if (HALF_REQUIRED_FEATURES_1
+                != (gHalfCapabilities & HALF_REQUIRED_FEATURES_1)
+            && HALF_REQUIRED_FEATURES_2
+                != (gHalfCapabilities & HALF_REQUIRED_FEATURES_2))
+        {
+            char list[300] = "";
+            if (0 == (gHalfCapabilities & CL_FP_ROUND_TO_NEAREST))
+                strncat(list, "CL_FP_ROUND_TO_NEAREST, ", sizeof(list) - 1);
+            if (0 == (gHalfCapabilities & CL_FP_ROUND_TO_ZERO))
+                strncat(list, "CL_FP_ROUND_TO_ZERO, ", sizeof(list) - 1);
+            if (0 == (gHalfCapabilities & CL_FP_INF_NAN))
+                strncat(list, "CL_FP_INF_NAN, ", sizeof(list) - 1);
+            vlog_error("ERROR: required half features are missing: %s\n", list);
+
+            return TEST_FAIL;
+        }
+
+        if ((gHalfCapabilities & CL_FP_ROUND_TO_NEAREST) != 0)
+        {
+            gHalfRoundingMode = CL_HALF_RTE;
+        }
+        else // due to above condition it must be RTZ
+        {
+            gHalfRoundingMode = CL_HALF_RTZ;
+        }
+
+#else
+        vlog_error("FAIL: device says it supports cl_khr_fp16 but "
+                   "CL_DEVICE_HALF_FP_CONFIG is not in the headers!\n");
+        return TEST_FAIL;
+#endif
+    }
+
+
     uint32_t deviceFrequency = 0;
     size_t configSize = sizeof(deviceFrequency);
     if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY,
@@ -829,6 +903,7 @@ test_status InitCL(cl_device_id device)
              "Bruteforce_Ulp_Error_Double() for more details.\n\n");
     }
 
+    vlog("\tTesting half precision? %s\n", no_yes[0 != gHasHalf]);
     vlog("\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded]);
     if (gIsEmbedded)
         vlog("\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode]);
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index afa072f8e0..3912fd7973 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
+// Copyright (c) 2017-2024 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -691,7 +691,7 @@ double reference_rootn(double x, int i)
     double sign = x;
     x = reference_fabs(x);
     x = reference_exp2(reference_log2(x) / (double)i);
-    return reference_copysignd(x, sign);
+    return reference_copysign(x, sign);
 }
 
 double reference_rsqrt(double x) { return 1.0 / reference_sqrt(x); }
@@ -707,7 +707,7 @@ double reference_sinpi(double x)
         r = 1 - r;
 
     // sinPi zeros have the same sign as x
-    if (r == 0.0) return reference_copysignd(0.0, x);
+    if (r == 0.0) return reference_copysign(0.0, x);
 
     return reference_sin(r * M_PI);
 }
@@ -717,7 +717,7 @@ double reference_relaxed_sinpi(double x) { return reference_sinpi(x); }
 double reference_tanpi(double x)
 {
     // set aside the sign  (allows us to preserve sign of -0)
-    double sign = reference_copysignd(1.0, x);
+    double sign = reference_copysign(1.0, x);
     double z = reference_fabs(x);
 
     // if big and even  -- caution: only works if x only has single precision
@@ -725,7 +725,7 @@ double reference_tanpi(double x)
     {
         if (z == INFINITY) return x - x; // nan
 
-        return reference_copysignd(
+        return reference_copysign(
             0.0, x); // tanpi ( n ) is copysign( 0.0, n)  for even integers n.
     }
 
@@ -739,7 +739,7 @@ double reference_tanpi(double x)
     if ((i & 1) && z == 0.0) sign = -sign;
 
     // track changes to the sign
-    sign *= reference_copysignd(1.0, z); // really should just be an xor
+    sign *= reference_copysign(1.0, z); // really should just be an xor
     z = reference_fabs(z); // remove the sign again
 
     // reduce once more
@@ -1070,7 +1070,7 @@ int reference_signbit(float x) { return 0 != signbit(x); }
 // Missing functions for win32
 
 
-float reference_copysign(float x, float y)
+float reference_copysignf(float x, float y)
 {
     union {
         float f;
@@ -1084,7 +1084,7 @@ float reference_copysign(float x, float y)
 }
 
 
-double reference_copysignd(double x, double y)
+double reference_copysign(double x, double y)
 {
     union {
         double f;
@@ -1101,10 +1101,10 @@ double reference_copysignd(double x, double y)
 double reference_round(double x)
 {
     double absx = reference_fabs(x);
-    if (absx < 0.5) return reference_copysignd(0.0, x);
+    if (absx < 0.5) return reference_copysign(0.0, x);
 
     if (absx < HEX_DBL(+, 1, 0, +, 53))
-        x = reference_trunc(x + reference_copysignd(0.5, x));
+        x = reference_trunc(x + reference_copysign(0.5, x));
 
     return x;
 }
@@ -1115,7 +1115,7 @@ double reference_trunc(double x)
     {
         cl_long l = (cl_long)x;
 
-        return reference_copysignd((double)l, x);
+        return reference_copysign((double)l, x);
     }
 
     return x;
@@ -1132,16 +1132,16 @@ double reference_trunc(double x)
 
 double reference_cbrt(double x)
 {
-    return reference_copysignd(reference_pow(reference_fabs(x), 1.0 / 3.0), x);
+    return reference_copysign(reference_pow(reference_fabs(x), 1.0 / 3.0), x);
 }
 
 double reference_rint(double x)
 {
     if (reference_fabs(x) < HEX_DBL(+, 1, 0, +, 52))
     {
-        double magic = reference_copysignd(HEX_DBL(+, 1, 0, +, 52), x);
+        double magic = reference_copysign(HEX_DBL(+, 1, 0, +, 52), x);
         double rounded = (x + magic) - magic;
-        x = reference_copysignd(rounded, x);
+        x = reference_copysign(rounded, x);
     }
 
     return x;
@@ -1174,7 +1174,7 @@ double reference_asinh(double x)
     double absx = reference_fabs(x);
     if (absx < HEX_DBL(+, 1, 0, -, 28)) return x;
 
-    double sign = reference_copysignd(1.0, x);
+    double sign = reference_copysign(1.0, x);
 
     if (absx > HEX_DBL(+, 1, 0, +, 28))
         return sign
@@ -1206,7 +1206,7 @@ double reference_atanh(double x)
      */
     if (isnan(x)) return x + x;
 
-    double signed_half = reference_copysignd(0.5, x);
+    double signed_half = reference_copysign(0.5, x);
     x = reference_fabs(x);
     if (x > 1.0) return cl_make_nan();
 
@@ -4699,6 +4699,49 @@ double reference_nextafter(double xx, double yy)
     return a.f;
 }
 
+cl_half reference_nanh(cl_ushort x)
+{
+    cl_ushort u;
+    cl_half h;
+    u = x | 0x7e00U;
+    memcpy(&h, &u, sizeof(cl_half));
+    return h;
+}
+
+float reference_nextafterh(float xx, float yy, bool allow_denorms)
+{
+    cl_half tmp_a = cl_half_from_float(xx, CL_HALF_RTE);
+    cl_half tmp_b = cl_half_from_float(yy, CL_HALF_RTE);
+    float x = cl_half_to_float(tmp_a);
+    float y = cl_half_to_float(tmp_b);
+
+    // take care of nans
+    if (x != x) return x;
+
+    if (y != y) return y;
+
+    if (x == y) return y;
+
+    short a_h = cl_half_from_float(x, CL_HALF_RTE);
+    short b_h = cl_half_from_float(y, CL_HALF_RTE);
+    short oa_h = a_h;
+
+    if (a_h & 0x8000) a_h = 0x8000 - a_h;
+    if (b_h & 0x8000) b_h = 0x8000 - b_h;
+
+    a_h += (a_h < b_h) ? 1 : -1;
+    a_h = (a_h < 0) ? (cl_short)0x8000 - a_h : a_h;
+
+    if (!allow_denorms && IsHalfSubnormal(a_h))
+    {
+        if (cl_half_to_float(0x7fff & oa_h) < cl_half_to_float(0x7fff & a_h))
+            a_h = (a_h & 0x8000) ? 0x8400 : 0x0400;
+        else
+            a_h = 0;
+    }
+
+    return cl_half_to_float(a_h);
+}
 
 long double reference_nextafterl(long double xx, long double yy)
 {
@@ -5290,7 +5333,7 @@ double reference_pow(double x, double y)
     __log2_ep(&hi, &lo, fabsx);
     double prod = y * hi;
     double result = reference_exp2(prod);
-    return isOddInt ? reference_copysignd(result, x) : result;
+    return isOddInt ? reference_copysign(result, x) : result;
 }
 
 double reference_sqrt(double x) { return sqrt(x); }
diff --git a/test_conformance/math_brute_force/reference_math.h b/test_conformance/math_brute_force/reference_math.h
index 78b245105e..aef8d2eaf8 100644
--- a/test_conformance/math_brute_force/reference_math.h
+++ b/test_conformance/math_brute_force/reference_math.h
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
+// Copyright (c) 2017-2024 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@
 #include <OpenCL/opencl.h>
 #else
 #include <CL/cl.h>
+#include "CL/cl_half.h"
 #endif
 
 // --  for testing float --
@@ -87,8 +88,8 @@ double reference_acosh(double x);
 double reference_asinh(double x);
 double reference_atanh(double x);
 double reference_cbrt(double x);
-float reference_copysign(float x, float y);
-double reference_copysignd(double x, double y);
+float reference_copysignf(float x, float y);
+double reference_copysign(double x, double y);
 double reference_exp10(double);
 double reference_exp2(double x);
 double reference_expm1(double x);
@@ -160,6 +161,8 @@ long double reference_fractl(long double, long double*);
 long double reference_fmal(long double, long double, long double);
 long double reference_madl(long double, long double, long double);
 long double reference_nextafterl(long double, long double);
+float reference_nextafterh(float, float, bool allow_denormals = true);
+cl_half reference_nanh(cl_ushort);
 long double reference_recipl(long double);
 long double reference_rootnl(long double, int);
 long double reference_rsqrtl(long double);
diff --git a/test_conformance/math_brute_force/ternary_half.cpp b/test_conformance/math_brute_force/ternary_half.cpp
new file mode 100644
index 0000000000..18075379da
--- /dev/null
+++ b/test_conformance/math_brute_force/ternary_half.cpp
@@ -0,0 +1,777 @@
+//
+// Copyright (c) 2017-2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+#include "function_list.h"
+#include "test_functions.h"
+#include "utility.h"
+
+#include <cinttypes>
+#include <cstring>
+
+#define CORRECTLY_ROUNDED 0
+#define FLUSHED 1
+
+namespace {
+
+cl_int BuildKernelFn_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+{
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetTernaryKernel(kernel_name, builtin, ParameterType::Half,
+                                ParameterType::Half, ParameterType::Half,
+                                ParameterType::Half, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
+}
+
+// A table of more difficult cases to get right
+static const cl_half specialValuesHalf[] = {
+    0xffff, 0x0000, 0x0001, 0x7c00, /*INFINITY*/
+    0xfc00, /*-INFINITY*/
+    0x8000, /*-0*/
+    0x7bff, /*HALF_MAX*/
+    0x0400, /*HALF_MIN*/
+    0x03ff, /* Largest denormal */
+    0x3c00, /* 1 */
+    0xbc00, /* -1 */
+    0x3555, /*nearest value to 1/3*/
+    0x3bff, /*largest number less than one*/
+    0xc000, /* -2 */
+    0xfbff, /* -HALF_MAX */
+    0x8400, /* -HALF_MIN */
+    0x4248, /* M_PI_H */
+    0xc248, /* -M_PI_H */
+    0xbbff, /* Largest negative fraction */
+};
+
+constexpr size_t specialValuesHalfCount = ARRAY_SIZE(specialValuesHalf);
+
+} // anonymous namespace
+
+int TestFunc_Half_Half_Half_Half(const Func *f, MTdata d, bool relaxedMode)
+{
+    int error;
+
+    Programs programs;
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    float maxErrorVal3 = 0.0f;
+    uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
+
+    constexpr size_t bufferElements = BUFFER_SIZE / sizeof(cl_half);
+
+    cl_uchar overflow[bufferElements];
+    float half_ulps = f->half_ulps;
+    int skipNanInf = (0 == strcmp("fma", f->nameInCode));
+
+    logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
+
+    // Init the kernels
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode };
+    if ((error = ThreadPool_Do(BuildKernelFn_HalfFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
+
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
+    {
+        // Init input array
+        cl_half *hp0 = (cl_half *)gIn;
+        cl_half *hp1 = (cl_half *)gIn2;
+        cl_half *hp2 = (cl_half *)gIn3;
+        size_t idx = 0;
+
+        if (i == 0)
+        { // test edge cases
+            uint32_t x, y, z;
+            x = y = z = 0;
+            for (; idx < bufferElements; idx++)
+            {
+                hp0[idx] = specialValuesHalf[x];
+                hp1[idx] = specialValuesHalf[y];
+                hp2[idx] = specialValuesHalf[z];
+
+                if (++x >= specialValuesHalfCount)
+                {
+                    x = 0;
+                    if (++y >= specialValuesHalfCount)
+                    {
+                        y = 0;
+                        if (++z >= specialValuesHalfCount) break;
+                    }
+                }
+            }
+            if (idx == bufferElements)
+                vlog_error("Test Error: not all special cases tested!\n");
+        }
+
+        auto any_value = [&d]() {
+            float t = (float)((double)genrand_int32(d) / (double)0xFFFFFFFF);
+            return HFF((1.0f - t) * CL_HALF_MIN + t * CL_HALF_MAX);
+        };
+
+        for (; idx < bufferElements; idx++)
+        {
+            hp0[idx] = any_value();
+            hp1[idx] = any_value();
+            hp2[idx] = any_value();
+        }
+
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
+                                          BUFFER_SIZE, gIn, 0, NULL, NULL)))
+        {
+            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
+            return error;
+        }
+
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
+                                          BUFFER_SIZE, gIn2, 0, NULL, NULL)))
+        {
+            vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
+            return error;
+        }
+
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
+                                          BUFFER_SIZE, gIn3, 0, NULL, NULL)))
+        {
+            vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
+            return error;
+        }
+
+        // Write garbage into output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            uint32_t pattern = 0xacdcacdc;
+            if (gHostFill)
+            {
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
+            {
+                error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
+                                            sizeof(pattern), 0, BUFFER_SIZE, 0,
+                                            NULL, NULL);
+                test_error(error, "clEnqueueFillBuffer failed!\n");
+            }
+        }
+
+        // Run the kernels
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            size_t vectorSize = sizeof(cl_half) * sizeValues[j];
+            size_t localCount = (BUFFER_SIZE + vectorSize - 1)
+                / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
+                                        sizeof(gInBuffer), &gInBuffer)))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer2), &gInBuffer2)))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
+                                        sizeof(gInBuffer3), &gInBuffer3)))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
+            {
+                vlog_error("FAILED -- could not execute kernel\n");
+                return error;
+            }
+        }
+
+        // Get that moving
+        if ((error = clFlush(gQueue)))
+        {
+            vlog("clFlush failed\n");
+            return error;
+        }
+
+        // Calculate the correctly rounded reference result
+        cl_half *res = (cl_half *)gOut_Ref;
+        if (skipNanInf)
+        {
+            for (size_t j = 0; j < bufferElements; j++)
+            {
+                feclearexcept(FE_OVERFLOW);
+                res[j] = HFD((double)f->dfunc.f_fff(HTF(hp0[j]), HTF(hp1[j]),
+                                                    HTF(hp2[j])));
+                overflow[j] =
+                    FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+            }
+        }
+        else
+        {
+            for (size_t j = 0; j < bufferElements; j++)
+                res[j] = HFD((double)f->dfunc.f_fff(HTF(hp0[j]), HTF(hp1[j]),
+                                                    HTF(hp2[j])));
+        }
+
+        // Read the data back
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            if ((error =
+                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
+                                         BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error("ReadArray failed %d\n", error);
+                return error;
+            }
+        }
+
+        if (gSkipCorrectnessTesting) break;
+
+        // Verify data
+        uint16_t *t = (uint16_t *)gOut_Ref;
+        for (size_t j = 0; j < bufferElements; j++)
+        {
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            {
+                uint16_t *q = (uint16_t *)(gOut[k]);
+
+                // If we aren't getting the correctly rounded result
+                if (t[j] != q[j])
+                {
+                    int fail;
+                    cl_half test = ((cl_half *)q)[j];
+                    double ref1 = (double)f->dfunc.f_fff(
+                        HTF(hp0[j]), HTF(hp1[j]), HTF(hp2[j]));
+                    cl_half correct = HFD(ref1);
+
+                    // Per section 10 paragraph 6, accept any result if an input
+                    // or output is a infinity or NaN or overflow
+                    if (skipNanInf)
+                    {
+                        if (overflow[j] || IsHalfInfinity(correct)
+                            || IsHalfNaN(correct) || IsHalfInfinity(hp0[j])
+                            || IsHalfNaN(hp0[j]) || IsHalfInfinity(hp1[j])
+                            || IsHalfNaN(hp1[j]) || IsHalfInfinity(hp2[j])
+                            || IsHalfNaN(hp2[j]))
+                            continue;
+                    }
+
+                    float err =
+                        test != correct ? Ulp_Error_Half(test, ref1) : 0.f;
+                    fail = !(fabsf(err) <= half_ulps);
+
+                    if (fail && ftz)
+                    {
+                        // retry per section 6.5.3.2  with flushing on
+                        if (0.0f == test
+                            && 0.0f
+                                == f->func.f_fma(HTF(hp0[j]), HTF(hp1[j]),
+                                                 HTF(hp2[j]), FLUSHED))
+                        {
+                            fail = 0;
+                            err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if (fail && IsHalfSubnormal(hp0[j]))
+                        { // look at me,
+                            if (skipNanInf) feclearexcept(FE_OVERFLOW);
+
+                            float ref2 =
+                                f->func.f_fma(0.0f, HTF(hp1[j]), HTF(hp2[j]),
+                                              CORRECTLY_ROUNDED);
+                            cl_half correct2 = HFF(ref2);
+                            float ref3 =
+                                f->func.f_fma(-0.0f, HTF(hp1[j]), HTF(hp2[j]),
+                                              CORRECTLY_ROUNDED);
+                            cl_half correct3 = HFF(ref3);
+
+                            if (skipNanInf)
+                            {
+                                if (fetestexcept(FE_OVERFLOW)) continue;
+
+                                // Note: no double rounding here.  Reference
+                                // functions calculate in single precision.
+                                if (IsHalfInfinity(correct2)
+                                    || IsHalfNaN(correct2)
+                                    || IsHalfInfinity(correct3)
+                                    || IsHalfNaN(correct3))
+                                    continue;
+                            }
+
+                            float err2 = test != correct2
+                                ? Ulp_Error_Half(test, ref2)
+                                : 0.f;
+                            float err3 = test != correct3
+                                ? Ulp_Error_Half(test, ref3)
+                                : 0.f;
+                            fail = fail
+                                && ((!(fabsf(err2) <= half_ulps))
+                                    && (!(fabsf(err3) <= half_ulps)));
+                            if (fabsf(err2) < fabsf(err)) err = err2;
+                            if (fabsf(err3) < fabsf(err)) err = err3;
+
+                            // retry per section 6.5.3.4
+                            if (0.0f == test
+                                && (0.0f
+                                        == f->func.f_fma(0.0f, HTF(hp1[j]),
+                                                         HTF(hp2[j]), FLUSHED)
+                                    || 0.0f
+                                        == f->func.f_fma(-0.0f, HTF(hp1[j]),
+                                                         HTF(hp2[j]), FLUSHED)))
+                            {
+                                fail = 0;
+                                err = 0.0f;
+                            }
+
+                            // try with first two args as zero
+                            if (IsHalfSubnormal(hp1[j]))
+                            { // its fun to have fun,
+                                if (skipNanInf) feclearexcept(FE_OVERFLOW);
+
+                                ref2 = f->func.f_fma(0.0f, 0.0f, HTF(hp2[j]),
+                                                     CORRECTLY_ROUNDED);
+                                correct2 = HFF(ref2);
+                                ref3 = f->func.f_fma(-0.0f, 0.0f, HTF(hp2[j]),
+                                                     CORRECTLY_ROUNDED);
+                                correct3 = HFF(ref3);
+                                float ref4 =
+                                    f->func.f_fma(0.0f, -0.0f, HTF(hp2[j]),
+                                                  CORRECTLY_ROUNDED);
+                                cl_half correct4 = HFF(ref4);
+                                float ref5 =
+                                    f->func.f_fma(-0.0f, -0.0f, HTF(hp2[j]),
+                                                  CORRECTLY_ROUNDED);
+                                cl_half correct5 = HFF(ref5);
+
+                                // Per section 10 paragraph 6, accept any result
+                                // if an input or output is a infinity or NaN or
+                                // overflow
+                                if (!gInfNanSupport)
+                                {
+                                    if (fetestexcept(FE_OVERFLOW)) continue;
+
+                                    // Note: no double rounding here.  Reference
+                                    // functions calculate in single precision.
+                                    if (IsHalfInfinity(correct2)
+                                        || IsHalfNaN(correct2)
+                                        || IsHalfInfinity(correct3)
+                                        || IsHalfNaN(correct3)
+                                        || IsHalfInfinity(correct4)
+                                        || IsHalfNaN(correct4)
+                                        || IsHalfInfinity(correct5)
+                                        || IsHalfNaN(correct5))
+                                        continue;
+                                }
+
+                                err2 = test != correct2
+                                    ? Ulp_Error_Half(test, ref2)
+                                    : 0.f;
+                                err3 = test != correct3
+                                    ? Ulp_Error_Half(test, ref3)
+                                    : 0.f;
+                                float err4 = test != correct4
+                                    ? Ulp_Error_Half(test, ref4)
+                                    : 0.f;
+                                float err5 = test != correct5
+                                    ? Ulp_Error_Half(test, ref5)
+                                    : 0.f;
+                                fail = fail
+                                    && ((!(fabsf(err2) <= half_ulps))
+                                        && (!(fabsf(err3) <= half_ulps))
+                                        && (!(fabsf(err4) <= half_ulps))
+                                        && (!(fabsf(err5) <= half_ulps)));
+                                if (fabsf(err2) < fabsf(err)) err = err2;
+                                if (fabsf(err3) < fabsf(err)) err = err3;
+                                if (fabsf(err4) < fabsf(err)) err = err4;
+                                if (fabsf(err5) < fabsf(err)) err = err5;
+
+                                // retry per section 6.5.3.4
+                                if (0.0f == test
+                                    && (0.0f
+                                            == f->func.f_fma(0.0f, 0.0f,
+                                                             HTF(hp2[j]),
+                                                             FLUSHED)
+                                        || 0.0f
+                                            == f->func.f_fma(-0.0f, 0.0f,
+                                                             HTF(hp2[j]),
+                                                             FLUSHED)
+                                        || 0.0f
+                                            == f->func.f_fma(0.0f, -0.0f,
+                                                             HTF(hp2[j]),
+                                                             FLUSHED)
+                                        || 0.0f
+                                            == f->func.f_fma(-0.0f, -0.0f,
+                                                             HTF(hp2[j]),
+                                                             FLUSHED)))
+                                {
+                                    fail = 0;
+                                    err = 0.0f;
+                                }
+
+                                if (IsHalfSubnormal(hp2[j]))
+                                {
+                                    if (test == 0.0f) // 0*0+0 is 0
+                                    {
+                                        fail = 0;
+                                        err = 0.0f;
+                                    }
+                                }
+                            }
+                            else if (IsHalfSubnormal(hp2[j]))
+                            {
+                                if (skipNanInf) feclearexcept(FE_OVERFLOW);
+
+                                ref2 = f->func.f_fma(0.0f, HTF(hp1[j]), 0.0f,
+                                                     CORRECTLY_ROUNDED);
+                                correct2 = HFF(ref2);
+                                ref3 = f->func.f_fma(-0.0f, HTF(hp1[j]), 0.0f,
+                                                     CORRECTLY_ROUNDED);
+                                correct3 = HFF(ref3);
+                                float ref4 =
+                                    f->func.f_fma(0.0f, HTF(hp1[j]), -0.0f,
+                                                  CORRECTLY_ROUNDED);
+                                cl_half correct4 = HFF(ref4);
+                                float ref5 =
+                                    f->func.f_fma(-0.0f, HTF(hp1[j]), -0.0f,
+                                                  CORRECTLY_ROUNDED);
+                                cl_half correct5 = HFF(ref5);
+
+                                // Per section 10 paragraph 6, accept any result
+                                // if an input or output is a infinity or NaN or
+                                // overflow
+                                if (!gInfNanSupport)
+                                {
+                                    if (fetestexcept(FE_OVERFLOW)) continue;
+
+                                    // Note: no double rounding here.  Reference
+                                    // functions calculate in single precision.
+                                    if (IsHalfInfinity(correct2)
+                                        || IsHalfNaN(correct2)
+                                        || IsHalfInfinity(correct3)
+                                        || IsHalfNaN(correct3)
+                                        || IsHalfInfinity(correct4)
+                                        || IsHalfNaN(correct4)
+                                        || IsHalfInfinity(correct5)
+                                        || IsHalfNaN(correct5))
+                                        continue;
+                                }
+
+                                err2 = test != correct2
+                                    ? Ulp_Error_Half(test, ref2)
+                                    : 0.f;
+                                err3 = test != correct3
+                                    ? Ulp_Error_Half(test, ref3)
+                                    : 0.f;
+                                float err4 = test != correct4
+                                    ? Ulp_Error_Half(test, ref4)
+                                    : 0.f;
+                                float err5 = test != correct5
+                                    ? Ulp_Error_Half(test, ref5)
+                                    : 0.f;
+                                fail = fail
+                                    && ((!(fabsf(err2) <= half_ulps))
+                                        && (!(fabsf(err3) <= half_ulps))
+                                        && (!(fabsf(err4) <= half_ulps))
+                                        && (!(fabsf(err5) <= half_ulps)));
+                                if (fabsf(err2) < fabsf(err)) err = err2;
+                                if (fabsf(err3) < fabsf(err)) err = err3;
+                                if (fabsf(err4) < fabsf(err)) err = err4;
+                                if (fabsf(err5) < fabsf(err)) err = err5;
+
+                                // retry per section 6.5.3.4
+                                if (0.0f == test
+                                    && (0.0f
+                                            == f->func.f_fma(0.0f, HTF(hp1[j]),
+                                                             0.0f, FLUSHED)
+                                        || 0.0f
+                                            == f->func.f_fma(-0.0f, HTF(hp1[j]),
+                                                             0.0f, FLUSHED)
+                                        || 0.0f
+                                            == f->func.f_fma(0.0f, HTF(hp1[j]),
+                                                             -0.0f, FLUSHED)
+                                        || 0.0f
+                                            == f->func.f_fma(-0.0f, HTF(hp1[j]),
+                                                             -0.0f, FLUSHED)))
+                                {
+                                    fail = 0;
+                                    err = 0.0f;
+                                }
+                            }
+                        }
+                        else if (fail && IsHalfSubnormal(hp1[j]))
+                        {
+                            if (skipNanInf) feclearexcept(FE_OVERFLOW);
+
+                            float ref2 =
+                                f->func.f_fma(HTF(hp0[j]), 0.0f, HTF(hp2[j]),
+                                              CORRECTLY_ROUNDED);
+                            cl_half correct2 = HFF(ref2);
+                            float ref3 =
+                                f->func.f_fma(HTF(hp0[j]), -0.0f, HTF(hp2[j]),
+                                              CORRECTLY_ROUNDED);
+                            cl_half correct3 = HFF(ref3);
+
+                            if (skipNanInf)
+                            {
+                                if (fetestexcept(FE_OVERFLOW)) continue;
+
+                                // Note: no double rounding here.  Reference
+                                // functions calculate in single precision.
+                                if (IsHalfInfinity(correct2)
+                                    || IsHalfNaN(correct2)
+                                    || IsHalfInfinity(correct3)
+                                    || IsHalfNaN(correct3))
+                                    continue;
+                            }
+
+                            float err2 = test != correct2
+                                ? Ulp_Error_Half(test, ref2)
+                                : 0.f;
+                            float err3 = test != correct3
+                                ? Ulp_Error_Half(test, ref3)
+                                : 0.f;
+                            fail = fail
+                                && ((!(fabsf(err2) <= half_ulps))
+                                    && (!(fabsf(err3) <= half_ulps)));
+                            if (fabsf(err2) < fabsf(err)) err = err2;
+                            if (fabsf(err3) < fabsf(err)) err = err3;
+
+                            // retry per section 6.5.3.4
+                            if (0.0f == test
+                                && (0.0f
+                                        == f->func.f_fma(HTF(hp0[j]), 0.0f,
+                                                         HTF(hp2[j]), FLUSHED)
+                                    || 0.0f
+                                        == f->func.f_fma(HTF(hp0[j]), -0.0f,
+                                                         HTF(hp2[j]), FLUSHED)))
+                            {
+                                fail = 0;
+                                err = 0.0f;
+                            }
+
+                            // try with second two args as zero
+                            if (IsHalfSubnormal(hp2[j]))
+                            {
+                                if (skipNanInf) feclearexcept(FE_OVERFLOW);
+
+                                ref2 = f->func.f_fma(HTF(hp0[j]), 0.0f, 0.0f,
+                                                     CORRECTLY_ROUNDED);
+                                correct2 = HFF(ref2);
+                                ref3 = f->func.f_fma(HTF(hp0[j]), -0.0f, 0.0f,
+                                                     CORRECTLY_ROUNDED);
+                                correct3 = HFF(ref3);
+                                float ref4 =
+                                    f->func.f_fma(HTF(hp0[j]), 0.0f, -0.0f,
+                                                  CORRECTLY_ROUNDED);
+                                cl_half correct4 = HFF(ref4);
+                                float ref5 =
+                                    f->func.f_fma(HTF(hp0[j]), -0.0f, -0.0f,
+                                                  CORRECTLY_ROUNDED);
+                                cl_half correct5 = HFF(ref5);
+
+                                // Per section 10 paragraph 6, accept any result
+                                // if an input or output is a infinity or NaN or
+                                // overflow
+                                if (!gInfNanSupport)
+                                {
+                                    if (fetestexcept(FE_OVERFLOW)) continue;
+
+                                    // Note: no double rounding here.  Reference
+                                    // functions calculate in single precision.
+                                    if (IsHalfInfinity(correct2)
+                                        || IsHalfNaN(correct2)
+                                        || IsHalfInfinity(correct3)
+                                        || IsHalfNaN(correct3)
+                                        || IsHalfInfinity(correct4)
+                                        || IsHalfNaN(correct4)
+                                        || IsHalfInfinity(correct5)
+                                        || IsHalfNaN(correct5))
+                                        continue;
+                                }
+
+                                err2 = test != correct2
+                                    ? Ulp_Error_Half(test, ref2)
+                                    : 0.f;
+                                err3 = test != correct3
+                                    ? Ulp_Error_Half(test, ref3)
+                                    : 0.f;
+                                float err4 = test != correct4
+                                    ? Ulp_Error_Half(test, ref4)
+                                    : 0.f;
+                                float err5 = test != correct5
+                                    ? Ulp_Error_Half(test, ref5)
+                                    : 0.f;
+                                fail = fail
+                                    && ((!(fabsf(err2) <= half_ulps))
+                                        && (!(fabsf(err3) <= half_ulps))
+                                        && (!(fabsf(err4) <= half_ulps))
+                                        && (!(fabsf(err5) <= half_ulps)));
+                                if (fabsf(err2) < fabsf(err)) err = err2;
+                                if (fabsf(err3) < fabsf(err)) err = err3;
+                                if (fabsf(err4) < fabsf(err)) err = err4;
+                                if (fabsf(err5) < fabsf(err)) err = err5;
+
+                                // retry per section 6.5.3.4
+                                if (0.0f == test
+                                    && (0.0f
+                                            == f->func.f_fma(HTF(hp0[j]), 0.0f,
+                                                             0.0f, FLUSHED)
+                                        || 0.0f
+                                            == f->func.f_fma(HTF(hp0[j]), -0.0f,
+                                                             0.0f, FLUSHED)
+                                        || 0.0f
+                                            == f->func.f_fma(HTF(hp0[j]), 0.0f,
+                                                             -0.0f, FLUSHED)
+                                        || 0.0f
+                                            == f->func.f_fma(HTF(hp0[j]), -0.0f,
+                                                             -0.0f, FLUSHED)))
+                                {
+                                    fail = 0;
+                                    err = 0.0f;
+                                }
+                            }
+                        }
+                        else if (fail && IsHalfSubnormal(hp2[j]))
+                        {
+                            if (skipNanInf) feclearexcept(FE_OVERFLOW);
+
+                            float ref2 = f->func.f_fma(HTF(hp0[j]), HTF(hp1[j]),
+                                                       0.0f, CORRECTLY_ROUNDED);
+                            cl_half correct2 = HFF(ref2);
+                            float ref3 =
+                                f->func.f_fma(HTF(hp0[j]), HTF(hp1[j]), -0.0f,
+                                              CORRECTLY_ROUNDED);
+                            cl_half correct3 = HFF(ref3);
+
+                            if (skipNanInf)
+                            {
+                                if (fetestexcept(FE_OVERFLOW)) continue;
+
+                                // Note: no double rounding here.  Reference
+                                // functions calculate in single precision.
+                                if (IsHalfInfinity(correct2)
+                                    || IsHalfNaN(correct2)
+                                    || IsHalfInfinity(correct3)
+                                    || IsHalfNaN(correct3))
+                                    continue;
+                            }
+
+                            float err2 = test != correct2
+                                ? Ulp_Error_Half(test, correct2)
+                                : 0.f;
+                            float err3 = test != correct3
+                                ? Ulp_Error_Half(test, correct3)
+                                : 0.f;
+                            fail = fail
+                                && ((!(fabsf(err2) <= half_ulps))
+                                    && (!(fabsf(err3) <= half_ulps)));
+                            if (fabsf(err2) < fabsf(err)) err = err2;
+                            if (fabsf(err3) < fabsf(err)) err = err3;
+
+                            // retry per section 6.5.3.4
+                            if (0.0f == test
+                                && (0.0f
+                                        == f->func.f_fma(HTF(hp0[j]),
+                                                         HTF(hp1[j]), 0.0f,
+                                                         FLUSHED)
+                                    || 0.0f
+                                        == f->func.f_fma(HTF(hp0[j]),
+                                                         HTF(hp1[j]), -0.0f,
+                                                         FLUSHED)))
+                            {
+                                fail = 0;
+                                err = 0.0f;
+                            }
+                        }
+                    }
+
+                    if (fabsf(err) > maxError)
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = HTF(hp0[j]);
+                        maxErrorVal2 = HTF(hp1[j]);
+                        maxErrorVal3 = HTF(hp2[j]);
+                    }
+
+                    if (fail)
+                    {
+                        vlog_error(
+                            "\nERROR: %s%s: %f ulp error at {%a, %a, %a} "
+                            "({0x%4.4x, 0x%4.4x, 0x%4.4x}): *%a vs. %a\n",
+                            f->name, sizeNames[k], err, HTF(hp0[j]),
+                            HTF(hp1[j]), HTF(hp2[j]), hp0[j], hp1[j], hp2[j],
+                            HTF(res[j]), HTF(test));
+                        return -1;
+                    }
+                }
+            }
+        }
+
+        if (0 == (i & 0x0fffffff))
+        {
+            if (gVerboseBruteForce)
+            {
+                vlog("base:%14" PRIu64 " step:%10" PRIu64 " bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
+            }
+            else
+            {
+                vlog(".");
+            }
+            fflush(stdout);
+        }
+    }
+
+    if (!gSkipCorrectnessTesting)
+    {
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
+             maxErrorVal3);
+    }
+
+    vlog("\n");
+
+    return CL_SUCCESS;
+}
diff --git a/test_conformance/math_brute_force/test_functions.h b/test_conformance/math_brute_force/test_functions.h
index 78aef9c9a6..9abb41f157 100644
--- a/test_conformance/math_brute_force/test_functions.h
+++ b/test_conformance/math_brute_force/test_functions.h
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2021 The Khronos Group Inc.
+// Copyright (c) 2021-2024 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -24,6 +24,9 @@ int TestFunc_Float_Float(const Func *f, MTdata, bool relaxedMode);
 // double foo(double)
 int TestFunc_Double_Double(const Func *f, MTdata, bool relaxedMode);
 
+// half foo(half)
+int TestFunc_Half_Half(const Func *f, MTdata, bool relaxedMode);
+
 // int foo(float)
 int TestFunc_Int_Float(const Func *f, MTdata, bool relaxedMode);
 
@@ -36,6 +39,9 @@ int TestFunc_Float_UInt(const Func *f, MTdata, bool relaxedMode);
 // double foo(ulong)
 int TestFunc_Double_ULong(const Func *f, MTdata, bool relaxedMode);
 
+// half (Ushort)
+int TestFunc_Half_UShort(const Func *f, MTdata, bool relaxedMode);
+
 // Returns {0, 1} for scalar and {0, -1} for vector.
 // int foo(float)
 int TestMacro_Int_Float(const Func *f, MTdata, bool relaxedMode);
@@ -44,21 +50,34 @@ int TestMacro_Int_Float(const Func *f, MTdata, bool relaxedMode);
 // int foo(double)
 int TestMacro_Int_Double(const Func *f, MTdata, bool relaxedMode);
 
+// int foo(half,half)
+int TestMacro_Int_Half_Half(const Func *f, MTdata, bool relaxedMode);
+
+// int foo(half)
+int TestMacro_Int_Half(const Func *f, MTdata, bool relaxedMode);
+
+// int foo(half)
+int TestFunc_Int_Half(const Func *f, MTdata, bool relaxedMode);
+
 // float foo(float, float)
 int TestFunc_Float_Float_Float(const Func *f, MTdata, bool relaxedMode);
 
 // double foo(double, double)
 int TestFunc_Double_Double_Double(const Func *f, MTdata, bool relaxedMode);
 
+// Half foo(half, half)
+int TestFunc_Half_Half_Half(const Func *f, MTdata, bool relaxedMode);
 // Special handling for nextafter.
-// float foo(float, float)
-int TestFunc_Float_Float_Float_nextafter(const Func *f, MTdata,
-                                         bool relaxedMode);
+// Half foo(Half, Half)
+int TestFunc_Half_Half_Half_nextafter(const Func *f, MTdata, bool relaxedMode);
+
+// Half foo(Half, Half)
+int TestFunc_Half_Half_Half_common(const Func *f, MTdata, int isNextafter,
+                                   bool relaxedMode);
+
+// Half foo(Half, int)
+int TestFunc_Half_Half_Int(const Func *f, MTdata, bool relaxedMode);
 
-// Special handling for nextafter.
-// double foo(double, double)
-int TestFunc_Double_Double_Double_nextafter(const Func *f, MTdata,
-                                            bool relaxedMode);
 
 // float op float
 int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata,
@@ -68,6 +87,9 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata,
 int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata,
                                            bool relaxedMode);
 
+// half op half
+int TestFunc_Half_Half_Half_Operator(const Func *f, MTdata, bool relaxedMode);
+
 // float foo(float, int)
 int TestFunc_Float_Float_Int(const Func *f, MTdata, bool relaxedMode);
 
@@ -89,24 +111,36 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata, bool relaxedMode);
 int TestFunc_Double_Double_Double_Double(const Func *f, MTdata,
                                          bool relaxedMode);
 
+// half foo(half, half, half)
+int TestFunc_Half_Half_Half_Half(const Func *f, MTdata, bool relaxedMode);
+
 // float foo(float, float*)
 int TestFunc_Float2_Float(const Func *f, MTdata, bool relaxedMode);
 
 // double foo(double, double*)
 int TestFunc_Double2_Double(const Func *f, MTdata, bool relaxedMode);
 
+// half foo(half, half*)
+int TestFunc_Half2_Half(const Func *f, MTdata, bool relaxedMode);
+
 // float foo(float, int*)
 int TestFunc_FloatI_Float(const Func *f, MTdata, bool relaxedMode);
 
 // double foo(double, int*)
 int TestFunc_DoubleI_Double(const Func *f, MTdata, bool relaxedMode);
 
+// half foo(half, int*)
+int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode);
+
 // float foo(float, float, int*)
 int TestFunc_FloatI_Float_Float(const Func *f, MTdata, bool relaxedMode);
 
 // double foo(double, double, int*)
 int TestFunc_DoubleI_Double_Double(const Func *f, MTdata, bool relaxedMode);
 
+// half foo(half, half, int*)
+int TestFunc_HalfI_Half_Half(const Func *f, MTdata d, bool relaxedMode);
+
 // Special handling for mad.
 // float mad(float, float, float)
 int TestFunc_mad_Float(const Func *f, MTdata, bool relaxedMode);
@@ -115,4 +149,7 @@ int TestFunc_mad_Float(const Func *f, MTdata, bool relaxedMode);
 // double mad(double, double, double)
 int TestFunc_mad_Double(const Func *f, MTdata, bool relaxedMode);
 
+// half mad(half, half, half)
+int TestFunc_mad_Half(const Func *f, MTdata, bool relaxedMode);
+
 #endif
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 5da18f84b0..f3157fdf60 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -288,9 +288,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 {
                     vlog_error("\nERROR: %s%s: %f ulp error at %.13la "
                                "(0x%16.16" PRIx64 "): *%.13la vs. %.13la\n",
-                               job->f->name, sizeNames[k], err,
-                               ((cl_double *)gIn)[j], ((cl_ulong *)gIn)[j],
-                               ((cl_double *)gOut_Ref)[j], test);
+                               job->f->name, sizeNames[k], err, s[j],
+                               ((cl_ulong *)s)[j], ((cl_double *)t)[j], test);
                     return -1;
                 }
             }
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index 9666d5ea49..cd93d3c972 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -489,7 +489,6 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
-    int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
@@ -583,7 +582,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         return error;
 
     // Run the kernels
-    if (!gSkipCorrectnessTesting || skipTestingRelaxed)
+    if (!gSkipCorrectnessTesting)
     {
         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
         if (error) return error;
@@ -603,12 +602,6 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         else
             vlog("passed");
 
-        if (skipTestingRelaxed)
-        {
-            vlog(" (rlx skip correctness testing)\n");
-            return error;
-        }
-
         vlog("\t%8.2f @ %a", maxError, maxErrorVal);
     }
 
diff --git a/test_conformance/math_brute_force/unary_half.cpp b/test_conformance/math_brute_force/unary_half.cpp
new file mode 100644
index 0000000000..0980fb16ca
--- /dev/null
+++ b/test_conformance/math_brute_force/unary_half.cpp
@@ -0,0 +1,483 @@
+//
+// Copyright (c) 2017-2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+#include "function_list.h"
+#include "test_functions.h"
+#include "utility.h"
+
+#include <cstring>
+
+namespace {
+
+cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+{
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Half,
+                              ParameterType::Half, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
+}
+
+// Thread specific data for a worker thread
+typedef struct ThreadInfo
+{
+    clMemWrapper inBuf; // input buffer for the thread
+    clMemWrapper outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    float maxError; // max error value. Init to 0.
+    double maxErrorValue; // position of the max error value.  Init to 0.
+    clCommandQueueWrapper
+        tQueue; // per thread command queue to improve performance
+} ThreadInfo;
+
+struct TestInfoBase
+{
+    size_t subBufferSize; // Size of the sub-buffer in elements
+    const Func *f; // A pointer to the function info
+    cl_uint threadCount; // Number of worker threads
+    cl_uint jobCount; // Number of jobs
+    cl_uint step; // step between each chunk and the next.
+    cl_uint scale; // stride between individual test values
+    float ulps; // max_allowed ulps
+    int ftz; // non-zero if running in flush to zero mode
+
+    int isRangeLimited; // 1 if the function is only to be evaluated over a
+                        // range
+    float half_sin_cos_tan_limit;
+};
+
+struct TestInfo : public TestInfoBase
+{
+    TestInfo(const TestInfoBase &base): TestInfoBase(base) {}
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
+    // Programs for various vector sizes.
+    Programs programs;
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+};
+
+cl_int TestHalf(cl_uint job_id, cl_uint thread_id, void *data)
+{
+    TestInfo *job = (TestInfo *)data;
+    size_t buffer_elements = job->subBufferSize;
+    size_t buffer_size = buffer_elements * sizeof(cl_half);
+    cl_uint scale = job->scale;
+    cl_uint base = job_id * (cl_uint)job->step;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
+    float ulps = job->ulps;
+    fptr func = job->f->func;
+    cl_uint j, k;
+    cl_int error = CL_SUCCESS;
+
+    int isRangeLimited = job->isRangeLimited;
+    float half_sin_cos_tan_limit = job->half_sin_cos_tan_limit;
+    int ftz = job->ftz;
+
+    std::vector<float> s(0);
+
+    cl_event e[VECTOR_SIZE_COUNT];
+    cl_ushort *out[VECTOR_SIZE_COUNT];
+
+    if (gHostFill)
+    {
+        // start the map of the output arrays
+        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            out[j] = (uint16_t *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
+        }
+
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
+
+    // Write the new values to the input array
+    cl_ushort *p = (cl_ushort *)gIn + thread_id * buffer_elements;
+    for (j = 0; j < buffer_elements; j++)
+    {
+        p[j] = base + j * scale;
+    }
+
+    if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf, CL_FALSE, 0,
+                                      buffer_size, p, 0, NULL, NULL)))
+    {
+        vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
+        return error;
+    }
+
+    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    {
+        if (gHostFill)
+        {
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
+        }
+
+        // Fill the result buffer with garbage, so that old results don't carry
+        // over
+        uint32_t pattern = 0xacdcacdc;
+        if (gHostFill)
+        {
+            memset_pattern4(out[j], &pattern, buffer_size);
+            error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
+                                            out[j], 0, NULL, NULL);
+            test_error(error, "clEnqueueUnmapMemObject failed!\n");
+        }
+        else
+        {
+            error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                        &pattern, sizeof(pattern), 0,
+                                        buffer_size, 0, NULL, NULL);
+            test_error(error, "clEnqueueFillBuffer failed!\n");
+        }
+
+        // run the kernel
+        size_t vectorCount =
+            (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
+        cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
+                                                 // own copy of the cl_kernel
+        cl_program program = job->programs[j];
+
+        if ((error = clSetKernelArg(kernel, 0, sizeof(tinfo->outBuf[j]),
+                                    &tinfo->outBuf[j])))
+        {
+            LogBuildError(program);
+            return error;
+        }
+        if ((error = clSetKernelArg(kernel, 1, sizeof(tinfo->inBuf),
+                                    &tinfo->inBuf)))
+        {
+            LogBuildError(program);
+            return error;
+        }
+
+        if ((error = clEnqueueNDRangeKernel(tinfo->tQueue, kernel, 1, NULL,
+                                            &vectorCount, NULL, 0, NULL, NULL)))
+        {
+            vlog_error("FAILED -- could not execute kernel\n");
+            return error;
+        }
+    }
+
+
+    // Get that moving
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 2 failed\n");
+
+    if (gSkipCorrectnessTesting) return CL_SUCCESS;
+
+    // Calculate the correctly rounded reference result
+    cl_half *r = (cl_half *)gOut_Ref + thread_id * buffer_elements;
+    s.resize(buffer_elements);
+    for (j = 0; j < buffer_elements; j++)
+    {
+        s[j] = (float)cl_half_to_float(p[j]);
+        r[j] = HFF(func.f_f(s[j]));
+    }
+
+    // Read the data back -- no need to wait for the first N-1 buffers. This is
+    // an in order queue.
+    for (j = gMinVectorSizeIndex; j + 1 < gMaxVectorSizeIndex; j++)
+    {
+        out[j] = (uint16_t *)clEnqueueMapBuffer(
+            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_READ, 0,
+            buffer_size, 0, NULL, NULL, &error);
+        if (error || NULL == out[j])
+        {
+            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                       error);
+            return error;
+        }
+    }
+    // Wait for the last buffer
+    out[j] = (uint16_t *)clEnqueueMapBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                            CL_TRUE, CL_MAP_READ, 0,
+                                            buffer_size, 0, NULL, NULL, &error);
+    if (error || NULL == out[j])
+    {
+        vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j, error);
+        return error;
+    }
+
+    // Verify data
+    for (j = 0; j < buffer_elements; j++)
+    {
+        for (k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+        {
+            cl_ushort *q = out[k];
+
+            // If we aren't getting the correctly rounded result
+            if (r[j] != q[j])
+            {
+                float test = cl_half_to_float(q[j]);
+                double correct = func.f_f(s[j]);
+                float err = Ulp_Error_Half(q[j], correct);
+                int fail = !(fabsf(err) <= ulps);
+
+                // half_sin/cos/tan are only valid between +-2**16, Inf, NaN
+                if (isRangeLimited
+                    && fabsf(s[j]) > MAKE_HEX_FLOAT(0x1.0p16f, 0x1L, 16)
+                    && fabsf(s[j]) < INFINITY)
+                {
+                    if (fabsf(test) <= half_sin_cos_tan_limit)
+                    {
+                        err = 0;
+                        fail = 0;
+                    }
+                }
+
+                if (fail)
+                {
+                    if (ftz)
+                    {
+                        // retry per section 6.5.3.2
+                        if (IsHalfResultSubnormal(correct, ulps))
+                        {
+                            fail = fail && (test != 0.0f);
+                            if (!fail) err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if (IsHalfSubnormal(p[j]))
+                        {
+                            double correct2 = func.f_f(0.0);
+                            double correct3 = func.f_f(-0.0);
+                            float err2 = Ulp_Error_Half(q[j], correct2);
+                            float err3 = Ulp_Error_Half(q[j], correct3);
+                            fail = fail
+                                && ((!(fabsf(err2) <= ulps))
+                                    && (!(fabsf(err3) <= ulps)));
+                            if (fabsf(err2) < fabsf(err)) err = err2;
+                            if (fabsf(err3) < fabsf(err)) err = err3;
+
+                            // retry per section 6.5.3.4
+                            if (IsHalfResultSubnormal(correct2, ulps)
+                                || IsHalfResultSubnormal(correct3, ulps))
+                            {
+                                fail = fail && (test != 0.0f);
+                                if (!fail) err = 0.0f;
+                            }
+                        }
+                    }
+                }
+                if (fabsf(err) > tinfo->maxError)
+                {
+                    tinfo->maxError = fabsf(err);
+                    tinfo->maxErrorValue = s[j];
+                }
+                if (fail)
+                {
+                    vlog_error("\nERROR: %s%s: %f ulp error at %a "
+                               "(half 0x%04x)\nExpected: %a (half 0x%04x) "
+                               "\nActual: %a (half 0x%04x)\n",
+                               job->f->name, sizeNames[k], err, s[j], p[j],
+                               cl_half_to_float(r[j]), r[j], test, q[j]);
+                    error = -1;
+                    return error;
+                }
+            }
+        }
+    }
+
+    for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    {
+        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
+                                             out[j], 0, NULL, NULL)))
+        {
+            vlog_error("Error: clEnqueueUnmapMemObject %d failed 2! err: %d\n",
+                       j, error);
+            return error;
+        }
+    }
+
+    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
+
+
+    if (0 == (base & 0x0fffffff))
+    {
+        if (gVerboseBruteForce)
+        {
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zd ulps:%5.3f "
+                 "ThreadCount:%2u\n",
+                 base, job->step, job->scale, buffer_elements, job->ulps,
+                 job->threadCount);
+        }
+        else
+        {
+            vlog(".");
+        }
+        fflush(stdout);
+    }
+
+    return error;
+}
+
+} // anonymous namespace
+
+int TestFunc_Half_Half(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfoBase test_info_base;
+    cl_int error;
+    size_t i, j;
+    float maxError = 0.0f;
+    double maxErrorVal = 0.0;
+
+    logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
+
+    // Init test_info
+    memset(&test_info_base, 0, sizeof(test_info_base));
+    TestInfo test_info(test_info_base);
+
+    test_info.threadCount = GetThreadCount();
+
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_half) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_half));
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount =
+            std::max((cl_uint)1,
+                     (cl_uint)((1ULL << sizeof(cl_half) * 8) / test_info.step));
+    }
+
+    test_info.f = f;
+    test_info.ulps = f->half_ulps;
+    test_info.ftz =
+        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
+
+    test_info.tinfo.resize(test_info.threadCount);
+
+    for (i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = { i * test_info.subBufferSize
+                                        * sizeof(cl_half),
+                                    test_info.subBufferSize * sizeof(cl_half) };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            return error;
+        }
+
+        for (j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of gOutBuffer "
+                           "for region {%zd, %zd}\n",
+                           region.origin, region.size);
+                return error;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            return error;
+        }
+    }
+
+    // Check for special cases for unary float
+    test_info.isRangeLimited = 0;
+    test_info.half_sin_cos_tan_limit = 0;
+    if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
+    {
+        test_info.isRangeLimited = 1;
+        test_info.half_sin_cos_tan_limit = 1.0f
+            + test_info.ulps
+                * (FLT_EPSILON / 2.0f); // out of range results from finite
+                                        // inputs must be in [-1,1]
+    }
+    else if (0 == strcmp(f->name, "half_tan"))
+    {
+        test_info.isRangeLimited = 1;
+        test_info.half_sin_cos_tan_limit =
+            INFINITY; // out of range resut from finite inputs must be numeric
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = { test_info.threadCount, test_info.k,
+                                       test_info.programs, f->nameInCode };
+        error = ThreadPool_Do(BuildKernel_HalfFn,
+                              gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                              &build_info);
+        test_error(error, "ThreadPool_Do: BuildKernel_HalfFn failed\n");
+    }
+
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(TestHalf, test_info.jobCount, &test_info);
+
+        // Accumulate the arithmetic errors
+        for (i = 0; i < test_info.threadCount; i++)
+        {
+            if (test_info.tinfo[i].maxError > maxError)
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            }
+        }
+
+        test_error(error, "ThreadPool_Do: TestHalf failed\n");
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+    }
+
+    if (!gSkipCorrectnessTesting) vlog("\t%8.2f @ %a", maxError, maxErrorVal);
+    vlog("\n");
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/unary_two_results_half.cpp b/test_conformance/math_brute_force/unary_two_results_half.cpp
new file mode 100644
index 0000000000..7d75c67aef
--- /dev/null
+++ b/test_conformance/math_brute_force/unary_two_results_half.cpp
@@ -0,0 +1,452 @@
+//
+// Copyright (c) 2017-2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+#include "function_list.h"
+#include "test_functions.h"
+#include "utility.h"
+
+#include <cinttypes>
+#include <cstring>
+
+namespace {
+
+cl_int BuildKernelFn_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+{
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Half,
+                              ParameterType::Half, ParameterType::Half,
+                              vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
+}
+
+} // anonymous namespace
+
+int TestFunc_Half2_Half(const Func *f, MTdata d, bool relaxedMode)
+{
+    int error;
+    Programs programs;
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
+    float maxError0 = 0.0f;
+    float maxError1 = 0.0f;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
+    float maxErrorVal0 = 0.0f;
+    float maxErrorVal1 = 0.0f;
+    uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
+
+    size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_half),
+                                     size_t(1ULL << (sizeof(cl_half) * 8)));
+    size_t bufferSize = bufferElements * sizeof(cl_half);
+
+    std::vector<cl_uchar> overflow(bufferElements);
+    int isFract = 0 == strcmp("fract", f->nameInCode);
+    int skipNanInf = isFract;
+
+    logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
+
+    float half_ulps = f->half_ulps;
+
+    // Init the kernels
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode };
+    if ((error = ThreadPool_Do(BuildKernelFn_HalfFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
+
+    for (uint64_t i = 0; i < (1ULL << 16); i += step)
+    {
+        // Init input array
+        cl_half *pIn = (cl_half *)gIn;
+        for (size_t j = 0; j < bufferElements; j++) pIn[j] = (cl_ushort)i + j;
+
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
+                                          bufferSize, gIn, 0, NULL, NULL)))
+        {
+            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
+            return error;
+        }
+
+        // Write garbage into output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            uint32_t pattern = 0xacdcacdc;
+            if (gHostFill)
+            {
+                memset_pattern4(gOut[j], &pattern, bufferSize);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, bufferSize,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+
+                memset_pattern4(gOut2[j], &pattern, bufferSize);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
+                                                  CL_FALSE, 0, bufferSize,
+                                                  gOut2[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
+            {
+                error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
+                                            sizeof(pattern), 0, bufferSize, 0,
+                                            NULL, NULL);
+                test_error(error, "clEnqueueFillBuffer 1 failed!\n");
+
+                error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
+                                            sizeof(pattern), 0, bufferSize, 0,
+                                            NULL, NULL);
+                test_error(error, "clEnqueueFillBuffer 2 failed!\n");
+            }
+        }
+
+        // Run the kernels
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_half);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error =
+                     clSetKernelArg(kernels[j][thread_id], 1,
+                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer), &gInBuffer)))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
+            {
+                vlog_error("FAILED -- could not execute kernel\n");
+                return error;
+            }
+        }
+
+        // Get that moving
+        if ((error = clFlush(gQueue)))
+        {
+            vlog_error("clFlush failed\n");
+            return error;
+        }
+
+        FPU_mode_type oldMode;
+        RoundingMode oldRoundMode = kRoundToNearestEven;
+        if (isFract)
+        {
+            // Calculate the correctly rounded reference result
+            memset(&oldMode, 0, sizeof(oldMode));
+            if (ftz) ForceFTZ(&oldMode);
+
+            // Set the rounding mode to match the device
+            if (gIsInRTZMode)
+                oldRoundMode = set_round(kRoundTowardZero, kfloat);
+        }
+
+        // Calculate the correctly rounded reference result
+        cl_half *ref1 = (cl_half *)gOut_Ref;
+        cl_half *ref2 = (cl_half *)gOut_Ref2;
+
+        if (skipNanInf)
+        {
+            for (size_t j = 0; j < bufferElements; j++)
+            {
+                double dd;
+                feclearexcept(FE_OVERFLOW);
+
+                ref1[j] = HFF((float)f->func.f_fpf(HTF(pIn[j]), &dd));
+                ref2[j] = HFF((float)dd);
+
+                // ensure correct rounding of fract result is not reaching 1
+                if (isFract && HTF(ref1[j]) >= 1.f) ref1[j] = 0x3bff;
+
+                overflow[j] =
+                    FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW));
+            }
+        }
+        else
+        {
+            for (size_t j = 0; j < bufferElements; j++)
+            {
+                double dd;
+                ref1[j] = HFF((float)f->func.f_fpf(HTF(pIn[j]), &dd));
+                ref2[j] = HFF((float)dd);
+            }
+        }
+
+        if (isFract && ftz) RestoreFPState(&oldMode);
+
+        // Read the data back
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            if ((error =
+                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
+                                         bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error("ReadArray failed %d\n", error);
+                return error;
+            }
+            if ((error =
+                     clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
+                                         bufferSize, gOut2[j], 0, NULL, NULL)))
+            {
+                vlog_error("ReadArray2 failed %d\n", error);
+                return error;
+            }
+        }
+
+        if (gSkipCorrectnessTesting)
+        {
+            if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
+            break;
+        }
+
+        // Verify data
+        for (size_t j = 0; j < bufferElements; j++)
+        {
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            {
+                cl_half *test1 = (cl_half *)gOut[k];
+                cl_half *test2 = (cl_half *)gOut2[k];
+
+                // If we aren't getting the correctly rounded result
+                if (ref1[j] != test1[j] || ref2[j] != test2[j])
+                {
+                    double fp_correct1 = 0, fp_correct2 = 0;
+                    float err = 0, err2 = 0;
+
+                    fp_correct1 = f->func.f_fpf(HTF(pIn[j]), &fp_correct2);
+
+                    cl_half correct1 = HFF(fp_correct1);
+                    cl_half correct2 = HFF(fp_correct2);
+
+                    // Per section 10 paragraph 6, accept any result if an input
+                    // or output is a infinity or NaN or overflow
+                    if (skipNanInf)
+                    {
+                        if (skipNanInf && overflow[j]) continue;
+                        // Note: no double rounding here.  Reference functions
+                        // calculate in single precision.
+                        if (IsHalfInfinity(correct1) || IsHalfNaN(correct1)
+                            || IsHalfInfinity(correct2) || IsHalfNaN(correct2)
+                            || IsHalfInfinity(pIn[j]) || IsHalfNaN(pIn[j]))
+                            continue;
+                    }
+
+                    err = Ulp_Error_Half(test1[j], fp_correct1);
+                    err2 = Ulp_Error_Half(test2[j], fp_correct2);
+
+                    int fail =
+                        !(fabsf(err) <= half_ulps && fabsf(err2) <= half_ulps);
+
+                    if (ftz)
+                    {
+                        // retry per section 6.5.3.2
+                        if (IsHalfResultSubnormal(fp_correct1, half_ulps))
+                        {
+                            if (IsHalfResultSubnormal(fp_correct2, half_ulps))
+                            {
+                                fail = fail
+                                    && !(HTF(test1[j]) == 0.0f
+                                         && HTF(test2[j]) == 0.0f);
+                                if (!fail)
+                                {
+                                    err = 0.0f;
+                                    err2 = 0.0f;
+                                }
+                            }
+                            else
+                            {
+                                fail = fail
+                                    && !(HTF(test1[j]) == 0.0f
+                                         && fabsf(err2) <= half_ulps);
+                                if (!fail) err = 0.0f;
+                            }
+                        }
+                        else if (IsHalfResultSubnormal(fp_correct2, half_ulps))
+                        {
+                            fail = fail
+                                && !(HTF(test2[j]) == 0.0f
+                                     && fabsf(err) <= half_ulps);
+                            if (!fail) err2 = 0.0f;
+                        }
+
+
+                        // retry per section 6.5.3.3
+                        if (IsHalfSubnormal(pIn[j]))
+                        {
+                            double fp_correctp, fp_correctn;
+                            double fp_correct2p, fp_correct2n;
+                            float errp, err2p, errn, err2n;
+
+                            if (skipNanInf) feclearexcept(FE_OVERFLOW);
+                            fp_correctp = f->func.f_fpf(0.0, &fp_correct2p);
+                            fp_correctn = f->func.f_fpf(-0.0, &fp_correct2n);
+
+                            cl_half correctp = HFF(fp_correctp);
+                            cl_half correctn = HFF(fp_correctn);
+                            cl_half correct2p = HFF(fp_correct2p);
+                            cl_half correct2n = HFF(fp_correct2n);
+
+                            // Per section 10 paragraph 6, accept any result if
+                            // an input or output is a infinity or NaN or
+                            // overflow
+                            if (skipNanInf)
+                            {
+                                if (fetestexcept(FE_OVERFLOW)) continue;
+
+                                // Note: no double rounding here.  Reference
+                                // functions calculate in single precision.
+                                if (IsHalfInfinity(correctp)
+                                    || IsHalfNaN(correctp)
+                                    || IsHalfInfinity(correctn)
+                                    || IsHalfNaN(correctn)
+                                    || IsHalfInfinity(correct2p)
+                                    || IsHalfNaN(correct2p)
+                                    || IsHalfInfinity(correct2n)
+                                    || IsHalfNaN(correct2n))
+                                    continue;
+                            }
+
+                            errp = Ulp_Error_Half(test1[j], fp_correctp);
+                            err2p = Ulp_Error_Half(test1[j], fp_correct2p);
+                            errn = Ulp_Error_Half(test1[j], fp_correctn);
+                            err2n = Ulp_Error_Half(test1[j], fp_correct2n);
+
+                            fail = fail
+                                && ((!(fabsf(errp) <= half_ulps))
+                                    && (!(fabsf(err2p) <= half_ulps))
+                                    && ((!(fabsf(errn) <= half_ulps))
+                                        && (!(fabsf(err2n) <= half_ulps))));
+                            if (fabsf(errp) < fabsf(err)) err = errp;
+                            if (fabsf(errn) < fabsf(err)) err = errn;
+                            if (fabsf(err2p) < fabsf(err2)) err2 = err2p;
+                            if (fabsf(err2n) < fabsf(err2)) err2 = err2n;
+
+                            // retry per section 6.5.3.4
+                            if (IsHalfResultSubnormal(fp_correctp, half_ulps)
+                                || IsHalfResultSubnormal(fp_correctn,
+                                                         half_ulps))
+                            {
+                                if (IsHalfResultSubnormal(fp_correct2p,
+                                                          half_ulps)
+                                    || IsHalfResultSubnormal(fp_correct2n,
+                                                             half_ulps))
+                                {
+                                    fail = fail
+                                        && !(HTF(test1[j]) == 0.0f
+                                             && HTF(test2[j]) == 0.0f);
+                                    if (!fail) err = err2 = 0.0f;
+                                }
+                                else
+                                {
+                                    fail = fail
+                                        && !(HTF(test1[j]) == 0.0f
+                                             && fabsf(err2) <= half_ulps);
+                                    if (!fail) err = 0.0f;
+                                }
+                            }
+                            else if (IsHalfResultSubnormal(fp_correct2p,
+                                                           half_ulps)
+                                     || IsHalfResultSubnormal(fp_correct2n,
+                                                              half_ulps))
+                            {
+                                fail = fail
+                                    && !(HTF(test2[j]) == 0.0f
+                                         && (fabsf(err) <= half_ulps));
+                                if (!fail) err2 = 0.0f;
+                            }
+                        }
+                    }
+                    if (fabsf(err) > maxError0)
+                    {
+                        maxError0 = fabsf(err);
+                        maxErrorVal0 = HTF(pIn[j]);
+                    }
+                    if (fabsf(err2) > maxError1)
+                    {
+                        maxError1 = fabsf(err2);
+                        maxErrorVal1 = HTF(pIn[j]);
+                    }
+                    if (fail)
+                    {
+                        vlog_error("\nERROR: %s%s: {%f, %f} ulp error at %a: "
+                                   "*{%a, %a} vs. {%a, %a}\n",
+                                   f->name, sizeNames[k], err, err2,
+                                   HTF(pIn[j]), HTF(ref1[j]), HTF(ref2[j]),
+                                   HTF(test1[j]), HTF(test2[j]));
+                        return -1;
+                    }
+                }
+            }
+        }
+
+        if (isFract && gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
+
+        if (0 == (i & 0x0fffffff))
+        {
+            if (gVerboseBruteForce)
+            {
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10zu \n",
+                     i, step, bufferSize);
+            }
+            else
+            {
+                vlog(".");
+            }
+            fflush(stdout);
+        }
+    }
+
+    if (!gSkipCorrectnessTesting)
+    {
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        vlog("\t{%8.2f, %8.2f} @ {%a, %a}", maxError0, maxError1, maxErrorVal0,
+             maxErrorVal1);
+    }
+
+    vlog("\n");
+
+    return CL_SUCCESS;
+}
diff --git a/test_conformance/math_brute_force/unary_two_results_i_half.cpp b/test_conformance/math_brute_force/unary_two_results_i_half.cpp
new file mode 100644
index 0000000000..d81ff12cb1
--- /dev/null
+++ b/test_conformance/math_brute_force/unary_two_results_i_half.cpp
@@ -0,0 +1,347 @@
+//
+// Copyright (c) 2017-2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+#include "function_list.h"
+#include "test_functions.h"
+#include "utility.h"
+
+#include <cinttypes>
+#include <climits>
+#include <cstring>
+
+namespace {
+
+cl_int BuildKernelFn_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+{
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Half,
+                              ParameterType::Int, ParameterType::Half,
+                              vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
+}
+
+cl_ulong abs_cl_long(cl_long i)
+{
+    cl_long mask = i >> 63;
+    return (i ^ mask) - mask;
+}
+
+} // anonymous namespace
+
+int TestFunc_HalfI_Half(const Func *f, MTdata d, bool relaxedMode)
+{
+    int error;
+    Programs programs;
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
+    float maxError = 0.0f;
+    int64_t maxError2 = 0;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
+    float maxErrorVal = 0.0f;
+    float maxErrorVal2 = 0.0f;
+    uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
+
+    // sizeof(cl_half) < sizeof (int32_t)
+    // to prevent overflowing gOut_Ref2 it is necessary to use
+    // bigger type as denominator for buffer size calculation
+    size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_int),
+                                     size_t(1ULL << (sizeof(cl_half) * 8)));
+
+    size_t bufferSizeLo = bufferElements * sizeof(cl_half);
+    size_t bufferSizeHi = bufferElements * sizeof(cl_int);
+
+    cl_ulong maxiError = 0;
+
+    logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
+
+    float half_ulps = f->half_ulps;
+
+    maxiError = half_ulps == INFINITY ? CL_ULONG_MAX : 0;
+
+    // Init the kernels
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode };
+    if ((error = ThreadPool_Do(BuildKernelFn_HalfFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
+
+    for (uint64_t i = 0; i < (1ULL << 16); i += step)
+    {
+        // Init input array
+        cl_half *pIn = (cl_half *)gIn;
+        for (size_t j = 0; j < bufferElements; j++) pIn[j] = (cl_ushort)i + j;
+
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
+                                          bufferSizeLo, gIn, 0, NULL, NULL)))
+        {
+            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
+            return error;
+        }
+
+        // Write garbage into output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            uint32_t pattern = 0xacdcacdc;
+            if (gHostFill)
+            {
+                memset_pattern4(gOut[j], &pattern, bufferSizeLo);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, bufferSizeLo,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+
+                memset_pattern4(gOut2[j], &pattern, bufferSizeHi);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
+                                                  CL_FALSE, 0, bufferSizeHi,
+                                                  gOut2[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
+            {
+                error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
+                                            sizeof(pattern), 0, bufferSizeLo, 0,
+                                            NULL, NULL);
+                test_error(error, "clEnqueueFillBuffer 1 failed!\n");
+
+                error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j], &pattern,
+                                            sizeof(pattern), 0, bufferSizeHi, 0,
+                                            NULL, NULL);
+                test_error(error, "clEnqueueFillBuffer 2 failed!\n");
+            }
+        }
+
+        // Run the kernels
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            // align working group size with the bigger output type
+            size_t vectorSize = sizeValues[j] * sizeof(cl_int);
+            size_t localCount = (bufferSizeHi + vectorSize - 1) / vectorSize;
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error =
+                     clSetKernelArg(kernels[j][thread_id], 1,
+                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer), &gInBuffer)))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
+            {
+                vlog_error("FAILED -- could not execute kernel\n");
+                return error;
+            }
+        }
+
+        // Get that moving
+        if ((error = clFlush(gQueue)))
+        {
+            vlog_error("clFlush failed\n");
+            return error;
+        }
+
+        // Calculate the correctly rounded reference result
+        cl_half *ref1 = (cl_half *)gOut_Ref;
+        int32_t *ref2 = (int32_t *)gOut_Ref2;
+        for (size_t j = 0; j < bufferElements; j++)
+            ref1[j] = HFF((float)f->func.f_fpI(HTF(pIn[j]), ref2 + j));
+
+        // Read the data back
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            cl_bool blocking =
+                (j + 1 < gMaxVectorSizeIndex) ? CL_FALSE : CL_TRUE;
+            if ((error =
+                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], blocking, 0,
+                                         bufferSizeLo, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error("ReadArray failed %d\n", error);
+                return error;
+            }
+            if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer2[j], blocking,
+                                             0, bufferSizeHi, gOut2[j], 0, NULL,
+                                             NULL)))
+            {
+                vlog_error("ReadArray2 failed %d\n", error);
+                return error;
+            }
+        }
+
+        if (gSkipCorrectnessTesting) break;
+
+        // Verify data
+        for (size_t j = 0; j < bufferElements; j++)
+        {
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            {
+                cl_half *test1 = (cl_half *)(gOut[k]);
+                int32_t *test2 = (int32_t *)(gOut2[k]);
+
+                // If we aren't getting the correctly rounded result
+                if (ref1[j] != test1[j] || ref2[j] != test2[j])
+                {
+                    cl_half test = ((cl_half *)test1)[j];
+                    int correct2 = INT_MIN;
+                    float fp_correct =
+                        (float)f->func.f_fpI(HTF(pIn[j]), &correct2);
+                    cl_half correct = HFF(fp_correct);
+                    float err = correct != test
+                        ? Ulp_Error_Half(test, fp_correct)
+                        : 0.f;
+                    cl_long iErr = (int64_t)test2[j] - (int64_t)correct2;
+                    int fail = !(fabsf(err) <= half_ulps
+                                 && abs_cl_long(iErr) <= maxiError);
+                    if (ftz)
+                    {
+                        // retry per section 6.5.3.2
+                        if (IsHalfResultSubnormal(fp_correct, half_ulps))
+                        {
+                            fail = fail && !(test == 0.0f && iErr == 0);
+                            if (!fail) err = 0.0f;
+                        }
+
+                        // retry per section 6.5.3.3
+                        if (IsHalfSubnormal(pIn[j]))
+                        {
+                            int correct5, correct6;
+                            double fp_correct3 = f->func.f_fpI(0.0, &correct5);
+                            double fp_correct4 = f->func.f_fpI(-0.0, &correct6);
+
+                            float err2 = Ulp_Error_Half(test, fp_correct3);
+                            float err3 = Ulp_Error_Half(test, fp_correct4);
+
+                            cl_long iErr2 =
+                                (long long)test2[j] - (long long)correct5;
+                            cl_long iErr3 =
+                                (long long)test2[j] - (long long)correct6;
+
+                            // Did +0 work?
+                            if (fabsf(err2) <= half_ulps
+                                && abs_cl_long(iErr2) <= maxiError)
+                            {
+                                err = err2;
+                                iErr = iErr2;
+                                fail = 0;
+                            }
+                            // Did -0 work?
+                            else if (fabsf(err3) <= half_ulps
+                                     && abs_cl_long(iErr3) <= maxiError)
+                            {
+                                err = err3;
+                                iErr = iErr3;
+                                fail = 0;
+                            }
+
+                            // retry per section 6.5.3.4
+                            if (fail
+                                && (IsHalfResultSubnormal(correct2, half_ulps)
+                                    || IsHalfResultSubnormal(fp_correct3,
+                                                             half_ulps)))
+                            {
+                                fail = fail
+                                    && !(test == 0.0f
+                                         && (abs_cl_long(iErr2) <= maxiError
+                                             || abs_cl_long(iErr3)
+                                                 <= maxiError));
+                                if (!fail)
+                                {
+                                    err = 0.0f;
+                                    iErr = 0;
+                                }
+                            }
+                        }
+                    }
+                    if (fabsf(err) > maxError)
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = pIn[j];
+                    }
+                    if (llabs(iErr) > maxError2)
+                    {
+                        maxError2 = llabs(iErr);
+                        maxErrorVal2 = pIn[j];
+                    }
+
+                    if (fail)
+                    {
+                        vlog_error("\nERROR: %s%s: {%f, %d} ulp error at %a: "
+                                   "*{%a, %d} vs. {%a, %d}\n",
+                                   f->name, sizeNames[k], err, (int)iErr,
+                                   HTF(pIn[j]), HTF(ref1[j]),
+                                   ((int *)gOut_Ref2)[j], HTF(test), test2[j]);
+                        return -1;
+                    }
+                }
+            }
+        }
+
+        if (0 == (i & 0x0fffffff))
+        {
+            if (gVerboseBruteForce)
+            {
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10zu \n",
+                     i, step, bufferSizeHi);
+            }
+            else
+            {
+                vlog(".");
+            }
+            fflush(stdout);
+        }
+    }
+
+    if (!gSkipCorrectnessTesting)
+    {
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+             maxErrorVal, maxErrorVal2);
+    }
+
+    vlog("\n");
+
+    return CL_SUCCESS;
+}
diff --git a/test_conformance/math_brute_force/unary_u_half.cpp b/test_conformance/math_brute_force/unary_u_half.cpp
new file mode 100644
index 0000000000..5c83d04e43
--- /dev/null
+++ b/test_conformance/math_brute_force/unary_u_half.cpp
@@ -0,0 +1,239 @@
+//
+// Copyright (c) 2017-2024 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+#include "function_list.h"
+#include "test_functions.h"
+#include "utility.h"
+#include "reference_math.h"
+
+#include <cstring>
+#include <cinttypes>
+
+namespace {
+
+static cl_int BuildKernel_HalfFn(cl_uint job_id, cl_uint thread_id UNUSED,
+                                 void *p)
+{
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Half,
+                              ParameterType::UShort, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
+}
+
+} // anonymous namespace
+
+int TestFunc_Half_UShort(const Func *f, MTdata d, bool relaxedMode)
+{
+    int error;
+    Programs programs;
+    KernelMatrix kernels;
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    float maxError = 0.0f;
+    int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gHalfCapabilities);
+    float maxErrorVal = 0.0f;
+    uint64_t step = getTestStep(sizeof(cl_half), BUFFER_SIZE);
+    size_t bufferElements = std::min(BUFFER_SIZE / sizeof(cl_half),
+                                     size_t(1ULL << (sizeof(cl_half) * 8)));
+    size_t bufferSize = bufferElements * sizeof(cl_half);
+    logFunctionInfo(f->name, sizeof(cl_half), relaxedMode);
+    const char *name = f->name;
+    float half_ulps = f->half_ulps;
+
+    // Init the kernels
+    BuildKernelInfo build_info = { 1, kernels, programs, f->nameInCode };
+    if ((error = ThreadPool_Do(BuildKernel_HalfFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+    {
+        return error;
+    }
+
+    for (uint64_t i = 0; i < (1ULL << 32); i += step)
+    {
+        // Init input array
+        cl_ushort *p = (cl_ushort *)gIn;
+        for (size_t j = 0; j < bufferElements; j++) p[j] = (uint16_t)i + j;
+
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
+                                          bufferSize, gIn, 0, NULL, NULL)))
+        {
+            vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
+            return error;
+        }
+
+        // write garbage into output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            uint32_t pattern = 0xacdcacdc;
+            if (gHostFill)
+            {
+                memset_pattern4(gOut[j], &pattern, bufferSize);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, bufferSize,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
+            {
+                error = clEnqueueFillBuffer(gQueue, gOutBuffer[j], &pattern,
+                                            sizeof(pattern), 0, bufferSize, 0,
+                                            NULL, NULL);
+                test_error(error, "clEnqueueFillBuffer failed!\n");
+            }
+        }
+
+        // Run the kernels
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            size_t vectorSize = sizeValues[j] * sizeof(cl_half);
+            size_t localCount = (bufferSize + vectorSize - 1) / vectorSize;
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
+                                        sizeof(gInBuffer), &gInBuffer)))
+            {
+                LogBuildError(programs[j]);
+                return error;
+            }
+
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
+            {
+                vlog_error("FAILED -- could not execute kernel\n");
+                return error;
+            }
+        }
+
+        // Get that moving
+        if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
+
+        // Calculate the correctly rounded reference result
+        cl_half *r = (cl_half *)gOut_Ref;
+        for (size_t j = 0; j < bufferElements; j++)
+        {
+            if (!strcmp(name, "nan"))
+                r[j] = reference_nanh(p[j]);
+            else
+                r[j] = HFF(f->func.f_u(p[j]));
+        }
+        // Read the data back
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            if ((error =
+                     clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
+                                         bufferSize, gOut[j], 0, NULL, NULL)))
+            {
+                vlog_error("ReadArray failed %d\n", error);
+                return error;
+            }
+        }
+
+        if (gSkipCorrectnessTesting) break;
+
+        // Verify data
+        cl_ushort *t = (cl_ushort *)gOut_Ref;
+        for (size_t j = 0; j < bufferElements; j++)
+        {
+            for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
+            {
+                cl_ushort *q = (cl_ushort *)(gOut[k]);
+
+                // If we aren't getting the correctly rounded result
+                if (t[j] != q[j])
+                {
+                    double test = cl_half_to_float(q[j]);
+                    double correct;
+                    if (!strcmp(name, "nan"))
+                        correct = cl_half_to_float(reference_nanh(p[j]));
+                    else
+                        correct = f->func.f_u(p[j]);
+
+                    float err = Ulp_Error_Half(q[j], correct);
+                    int fail = !(fabsf(err) <= half_ulps);
+
+                    if (fail)
+                    {
+                        if (ftz)
+                        {
+                            // retry per section 6.5.3.2
+                            if (IsHalfResultSubnormal(correct, half_ulps))
+                            {
+                                fail = fail && (test != 0.0f);
+                                if (!fail) err = 0.0f;
+                            }
+                        }
+                    }
+                    if (fabsf(err) > maxError)
+                    {
+                        maxError = fabsf(err);
+                        maxErrorVal = p[j];
+                    }
+                    if (fail)
+                    {
+                        vlog_error(
+                            "\n%s%s: %f ulp error at 0x%04x \nExpected: %a "
+                            "(0x%04x) \nActual: %a (0x%04x)\n",
+                            f->name, sizeNames[k], err, p[j],
+                            cl_half_to_float(r[j]), r[j], test, q[j]);
+                        return -1;
+                    }
+                }
+            }
+        }
+
+        if (0 == (i & 0x0fffffff))
+        {
+            if (gVerboseBruteForce)
+            {
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10zd \n",
+                     i, step, bufferSize);
+            }
+            else
+            {
+                vlog(".");
+            }
+            fflush(stdout);
+        }
+    }
+
+    if (!gSkipCorrectnessTesting)
+    {
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+    }
+
+    if (!gSkipCorrectnessTesting) vlog("\t%8.2f @ %a", maxError, maxErrorVal);
+    vlog("\n");
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h
index 652d990a21..8e9b3a1ad9 100644
--- a/test_conformance/math_brute_force/utility.h
+++ b/test_conformance/math_brute_force/utility.h
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
+// Copyright (c) 2017-2024 The Khronos Group Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@
 #include "harness/testHarness.h"
 #include "harness/ThreadPool.h"
 #include "harness/conversions.h"
+#include "CL/cl_half.h"
 
 #define BUFFER_SIZE (1024 * 1024 * 2)
 #define EMBEDDED_REDUCTION_FACTOR (64)
@@ -61,10 +62,21 @@ extern int gFastRelaxedDerived;
 extern int gWimpyMode;
 extern int gHostFill;
 extern int gIsInRTZMode;
+extern int gHasHalf;
+extern int gInfNanSupport;
+extern int gIsEmbedded;
 extern int gVerboseBruteForce;
 extern uint32_t gMaxVectorSizeIndex;
 extern uint32_t gMinVectorSizeIndex;
 extern cl_device_fp_config gFloatCapabilities;
+extern cl_device_fp_config gHalfCapabilities;
+extern RoundingMode gFloatToHalfRoundingMode;
+
+extern cl_half_rounding_mode gHalfRoundingMode;
+
+#define HFF(num) cl_half_from_float(num, gHalfRoundingMode)
+#define HFD(num) cl_half_from_double(num, gHalfRoundingMode)
+#define HTF(num) cl_half_to_float(num)
 
 #define LOWER_IS_BETTER 0
 #define HIGHER_IS_BETTER 1
@@ -115,6 +127,12 @@ inline int IsFloatResultSubnormal(double x, float ulps)
     return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
 }
 
+inline int IsHalfResultSubnormal(float x, float ulps)
+{
+    x = fabs(x) - MAKE_HEX_FLOAT(0x1.0p-24, 0x1, -24) * ulps;
+    return x < MAKE_HEX_FLOAT(0x1.0p-14, 0x1, -14);
+}
+
 inline int IsFloatResultSubnormalAbsError(double x, float abs_err)
 {
     x = x - abs_err;
@@ -157,6 +175,26 @@ inline int IsFloatNaN(double x)
     return ((u.u & 0x7fffffffU) > 0x7F800000U);
 }
 
+inline bool IsHalfNaN(const cl_half v)
+{
+    // Extract FP16 exponent and mantissa
+    uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
+    uint16_t h_mant = ((cl_half)v) & 0x3FF;
+
+    // NaN test
+    return (h_exp == 0x1F && h_mant != 0);
+}
+
+inline bool IsHalfInfinity(const cl_half v)
+{
+    // Extract FP16 exponent and mantissa
+    uint16_t h_exp = (((cl_half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
+    uint16_t h_mant = ((cl_half)v) & 0x3FF;
+
+    // Inf test
+    return (h_exp == 0x1F && h_mant == 0);
+}
+
 cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
 
 // Windows (since long double got deprecated) sets the x87 to 53-bit precision
diff --git a/test_conformance/mem_host_flags/CMakeLists.txt b/test_conformance/mem_host_flags/CMakeLists.txt
index 4f2b960d6b..73a36f0d42 100644
--- a/test_conformance/mem_host_flags/CMakeLists.txt
+++ b/test_conformance/mem_host_flags/CMakeLists.txt
@@ -6,6 +6,4 @@ set(${MODULE_NAME}_SOURCES
     mem_host_image.cpp
 )
 
-set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
-
 include(../CMakeCommon.txt)
diff --git a/test_conformance/mem_host_flags/checker_image_mem_host_no_access.hpp b/test_conformance/mem_host_flags/checker_image_mem_host_no_access.hpp
index a6f90d068b..ddcf6adad5 100644
--- a/test_conformance/mem_host_flags/checker_image_mem_host_no_access.hpp
+++ b/test_conformance/mem_host_flags/checker_image_mem_host_no_access.hpp
@@ -135,6 +135,14 @@ cl_int cImage_check_mem_host_no_access<T>::verify_RW_Image_Mapping()
         err = FAILURE;
         return err;
     }
+    else if (dataPtr != nullptr)
+    {
+        log_error("Calling clEnqueueMapImage (CL_MAP_WRITE) on a memory object "
+                  "created with the CL_MEM_HOST_NO_ACCESS flag should fail "
+                  "and return NULL\n");
+        err = FAILURE;
+        return err;
+    }
     else
     {
         log_info("Test succeeded\n\n");
@@ -154,6 +162,14 @@ cl_int cImage_check_mem_host_no_access<T>::verify_RW_Image_Mapping()
         err = FAILURE;
         return err;
     }
+    else if (dataPtr != nullptr)
+    {
+        log_error("Calling clEnqueueMapImage (CL_MAP_READ) on a memory object "
+                  "created with the CL_MEM_HOST_NO_ACCESS flag should fail "
+                  "and return NULL\n");
+        err = FAILURE;
+        return err;
+    }
     else
     {
         log_info("Test succeeded\n\n");
diff --git a/test_conformance/mem_host_flags/checker_mem_host_no_access.hpp b/test_conformance/mem_host_flags/checker_mem_host_no_access.hpp
index babbeea919..eb3341a8fa 100644
--- a/test_conformance/mem_host_flags/checker_mem_host_no_access.hpp
+++ b/test_conformance/mem_host_flags/checker_mem_host_no_access.hpp
@@ -190,6 +190,14 @@ cl_int cBuffer_check_mem_host_no_access<T>::verify_RW_Buffer_mapping()
         err = FAILURE;
         return FAILURE;
     }
+    else if (dataPtr != nullptr)
+    {
+        log_error("Calling clEnqueueMapBuffer (CL_MAP_READ) on a memory object "
+                  "created with the CL_MEM_HOST_NO_ACCESS flag should fail "
+                  "and return NULL\n");
+        err = FAILURE;
+        return err;
+    }
     else
     {
         log_info("Test succeeded\n\n");
@@ -207,6 +215,15 @@ cl_int cBuffer_check_mem_host_no_access<T>::verify_RW_Buffer_mapping()
         err = FAILURE;
         return FAILURE;
     }
+    else if (dataPtr != nullptr)
+    {
+        log_error(
+            "Calling clEnqueueMapBuffer (CL_MAP_WRITE) on a memory object "
+            "created with the CL_MEM_HOST_NO_ACCESS flag should fail "
+            "and return NULL\n");
+        err = FAILURE;
+        return err;
+    }
     else
     {
         log_info("Test succeeded\n\n");
diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index e43e302f1e..3d539ed572 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp
@@ -14,10 +14,18 @@
 // limitations under the License.
 //
 #include "harness/os_helpers.h"
-
-#include <string.h>
+#include "harness/typeWrappers.h"
+#include "harness/stringHelpers.h"
+#include "harness/conversions.h"
+
+#include <algorithm>
+#include <array>
+#include <cstdarg>
+#include <cstdint>
 #include <errno.h>
 #include <memory>
+#include <string.h>
+#include <vector>
 
 #if ! defined( _WIN32)
 #if defined(__APPLE__)
@@ -41,6 +49,7 @@
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
 #include "harness/parseParameters.h"
+#include "harness/rounding_mode.h"
 
 #include <CL/cl_ext.h>
 
@@ -49,45 +58,49 @@ typedef  unsigned int uint32_t;
 
 test_status InitCL( cl_device_id device );
 
+namespace {
+
 //-----------------------------------------
-// Static helper functions declaration
+// helper functions declaration
 //-----------------------------------------
 
-static void printUsage( void );
-
 //Stream helper functions
 
 //Associate stdout stream with the file(gFileName):i.e redirect stdout stream to the specific files (gFileName)
-static int acquireOutputStream(int* error);
+int acquireOutputStream(int* error);
 
 //Close the file(gFileName) associated with the stdout stream and disassociates it.
-static void releaseOutputStream(int fd);
+void releaseOutputStream(int fd);
 
 //Get analysis buffer to verify the correctess of printed data
-static void getAnalysisBuffer(char* analysisBuffer);
+void getAnalysisBuffer(char* analysisBuffer);
 
 //Kernel builder helper functions
 
 //Check if the test case is for kernel that has argument
-static int isKernelArgument(testCase* pTestCase,size_t testId);
+int isKernelArgument(testCase* pTestCase, size_t testId);
 
 //Check if the test case treats %p format for void*
-static int isKernelPFormat(testCase* pTestCase,size_t testId);
+int isKernelPFormat(testCase* pTestCase, size_t testId);
 
 //-----------------------------------------
 // Static functions declarations
 //-----------------------------------------
 // Make a program that uses printf for the given type/format,
-static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context context,const unsigned int testId,const unsigned int testNum,bool isLongSupport = true,bool is64bAddrSpace = false);
+cl_program makePrintfProgram(cl_kernel* kernel_ptr, const cl_context context,
+                             cl_device_id device, const unsigned int testId,
+                             const unsigned int testNum,
+                             const unsigned int formatNum);
 
 // Creates and execute the printf test for the given device, context, type/format
-static int doTest(cl_command_queue queue, cl_context context, const unsigned int testId, const unsigned int testNum, cl_device_id device);
+int doTest(cl_command_queue queue, cl_context context,
+           const unsigned int testId, cl_device_id device);
 
 // Check if device supports long
-static bool isLongSupported(cl_device_id  device_id);
+bool isLongSupported(cl_device_id device_id);
 
 // Check if device address space is 64 bits
-static bool is64bAddressSpace(cl_device_id  device_id);
+bool is64bAddressSpace(cl_device_id device_id);
 
 //Wait until event status is CL_COMPLETE
 int waitForEvent(cl_event* event);
@@ -102,22 +115,27 @@ int waitForEvent(cl_event* event);
 // tracks the subtests
 int s_test_cnt = 0;
 int s_test_fail = 0;
+int s_test_skip = 0;
+
+cl_context gContext;
+cl_command_queue gQueue;
+int gFd;
 
+char gFileName[256];
 
-static cl_context        gContext;
-static cl_command_queue  gQueue;
-static int               gFd;
+MTdataHolder gMTdata;
 
-static char gFileName[256];
+// For the sake of proper logging of negative results
+std::string gLatestKernelSource;
 
 //-----------------------------------------
-// Static helper functions definition
+// helper functions definition
 //-----------------------------------------
 
 //-----------------------------------------
 // acquireOutputStream
 //-----------------------------------------
-static int acquireOutputStream(int* error)
+int acquireOutputStream(int* error)
 {
     int fd = streamDup(fileno(stdout));
     *error = 0;
@@ -132,7 +150,7 @@ static int acquireOutputStream(int* error)
 //-----------------------------------------
 // releaseOutputStream
 //-----------------------------------------
-static void releaseOutputStream(int fd)
+void releaseOutputStream(int fd)
 {
     fflush(stdout);
     streamDup2(fd,fileno(stdout));
@@ -142,7 +160,8 @@ static void releaseOutputStream(int fd)
 //-----------------------------------------
 // printfCallBack
 //-----------------------------------------
-static void CL_CALLBACK printfCallBack(const char *printf_data, size_t len, size_t final, void *user_data)
+void CL_CALLBACK printfCallBack(const char* printf_data, size_t len,
+                                size_t final, void* user_data)
 {
     fwrite(printf_data, 1, len, stdout);
 }
@@ -150,30 +169,33 @@ static void CL_CALLBACK printfCallBack(const char *printf_data, size_t len, size
 //-----------------------------------------
 // getAnalysisBuffer
 //-----------------------------------------
-static void getAnalysisBuffer(char* analysisBuffer)
+void getAnalysisBuffer(char* analysisBuffer)
 {
     FILE *fp;
     memset(analysisBuffer,0,ANALYSIS_BUFFER_SIZE);
 
-    fp = fopen(gFileName,"r");
-    if(NULL == fp)
+    fp = fopen(gFileName, "r");
+    if (NULL == fp)
         log_error("Failed to open analysis buffer ('%s')\n", strerror(errno));
-    else
-        while(fgets(analysisBuffer, ANALYSIS_BUFFER_SIZE, fp) != NULL );
+    else if (0
+             == std::fread(analysisBuffer, sizeof(analysisBuffer[0]),
+                           ANALYSIS_BUFFER_SIZE, fp))
+        log_error("No data read from analysis buffer\n");
+
     fclose(fp);
 }
 
 //-----------------------------------------
 // isKernelArgument
 //-----------------------------------------
-static int isKernelArgument(testCase* pTestCase,size_t testId)
+int isKernelArgument(testCase* pTestCase, size_t testId)
 {
     return strcmp(pTestCase->_genParameters[testId].addrSpaceArgumentTypeQualifier,"");
 }
 //-----------------------------------------
 // isKernelPFormat
 //-----------------------------------------
-static int isKernelPFormat(testCase* pTestCase,size_t testId)
+int isKernelPFormat(testCase* pTestCase, size_t testId)
 {
     return strcmp(pTestCase->_genParameters[testId].addrSpacePAdd,"");
 }
@@ -200,13 +222,159 @@ int waitForEvent(cl_event* event)
 }
 
 //-----------------------------------------
-// Static helper functions definition
+// makeMixedFormatPrintfProgram
+// Generates in-flight printf kernel with format string including:
+//     -data before conversion flags (randomly generated ascii string)
+//     -randomly generated conversion flags (integer or floating point)
+//     -data after conversion flags (randomly generated ascii string).
+// Moreover it generates suitable arguments.
+// example: printf("zH, %u, %a, D+{gy\n", -929240879, 24295.671875f)
 //-----------------------------------------
+cl_program makeMixedFormatPrintfProgram(cl_kernel* kernel_ptr,
+                                        const cl_context context,
+                                        const cl_device_id device,
+                                        const unsigned int testId,
+                                        const unsigned int testNum,
+                                        const std::string& testname)
+{
+    auto gen_char = [&]() {
+        static const char dict[] = {
+            " \t!#$&()*+,-./"
+            "123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`"
+            "abcdefghijklmnopqrstuvwxyz{|}~"
+        };
+        return dict[genrand_int32(gMTdata) % ((int)sizeof(dict) - 1)];
+    };
+
+    std::array<std::vector<std::string>, 2> formats = {
+        { { "%f", "%e", "%g", "%a", "%F", "%E", "%G", "%A" },
+          { "%d", "%i", "%u", "%x", "%o", "%X" } }
+    };
+    std::vector<char> data_before(2 + genrand_int32(gMTdata) % 8);
+    std::vector<char> data_after(2 + genrand_int32(gMTdata) % 8);
+
+    std::generate(data_before.begin(), data_before.end(), gen_char);
+    std::generate(data_after.begin(), data_after.end(), gen_char);
+
+    cl_uint num_args = 2 + genrand_int32(gMTdata) % 4;
+
+    // Map device rounding to CTS rounding type
+    // get_default_rounding_mode supports RNE and RTZ
+    auto get_rounding = [](const cl_device_fp_config& fpConfig) {
+        if (fpConfig == CL_FP_ROUND_TO_NEAREST)
+        {
+            return kRoundToNearestEven;
+        }
+        else if (fpConfig == CL_FP_ROUND_TO_ZERO)
+        {
+            return kRoundTowardZero;
+        }
+        else
+        {
+            assert(false && "Unreachable");
+        }
+        return kDefaultRoundingMode;
+    };
+
+    const RoundingMode hostRound = get_round();
+    RoundingMode deviceRound = get_rounding(get_default_rounding_mode(device));
+
+    std::ostringstream format_str;
+    std::ostringstream ref_str;
+    std::ostringstream source_gen;
+    std::ostringstream args_str;
+    source_gen << "__kernel void " << testname
+               << "(void)\n"
+                  "{\n"
+                  "   printf(\"";
+    for (auto it : data_before)
+    {
+        format_str << it;
+        ref_str << it;
+    }
+    format_str << ", ";
+    ref_str << ", ";
+
+
+    for (cl_uint i = 0; i < num_args; i++)
+    {
+        std::uint8_t is_int = genrand_int32(gMTdata) % 2;
+
+        // Set CPU rounding mode to match that of the device
+        set_round(deviceRound, is_int != 0 ? kint : kfloat);
+
+        std::string format =
+            formats[is_int][genrand_int32(gMTdata) % formats[is_int].size()];
+        format_str << format << ", ";
+
+        if (is_int)
+        {
+            int arg = genrand_int32(gMTdata);
+            args_str << str_sprintf("%d", arg) << ", ";
+            ref_str << str_sprintf(format, arg) << ", ";
+        }
+        else
+        {
+            const float max_range = 100000.f;
+            float arg = get_random_float(-max_range, max_range, gMTdata);
+            args_str << str_sprintf("%f", arg) << "f, ";
+            ref_str << str_sprintf(format, arg) << ", ";
+        }
+    }
+    // Restore the original CPU rounding mode
+    set_round(hostRound, kfloat);
+
+    for (auto it : data_after)
+    {
+        format_str << it;
+        ref_str << it;
+    }
+
+    {
+        std::ostringstream args_cpy;
+        args_cpy << args_str.str();
+        args_cpy.seekp(-2, std::ios_base::end);
+        args_cpy << ")\n";
+        log_info("%d) testing printf(\"%s\\n\", %s", testNum,
+                 format_str.str().c_str(), args_cpy.str().c_str());
+    }
+
+    args_str.seekp(-2, std::ios_base::end);
+    args_str << ");\n}\n";
+
+
+    source_gen << format_str.str() << "\\n\""
+               << ", " << args_str.str();
+
+    std::string kernel_source = source_gen.str();
+    const char* ptr = kernel_source.c_str();
+
+    cl_program program;
+    cl_int err = create_single_kernel_helper(context, &program, kernel_ptr, 1,
+                                             &ptr, testname.c_str());
+
+    gLatestKernelSource = kernel_source.c_str();
+
+    // Save the reference result
+    allTestCase[testId]->_correctBuffer.push_back(ref_str.str());
+
+    if (!program || err)
+    {
+        log_error("create_single_kernel_helper failed\n");
+        return NULL;
+    }
+
+    return program;
+}
 
 //-----------------------------------------
 // makePrintfProgram
 //-----------------------------------------
-static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context context,const unsigned int testId,const unsigned int testNum,bool isLongSupport,bool is64bAddrSpace)
+cl_program makePrintfProgram(cl_kernel* kernel_ptr, const cl_context context,
+                             const cl_device_id device,
+                             const unsigned int testId,
+                             const unsigned int testNum,
+                             const unsigned int formatNum)
 {
     int err;
     cl_program program;
@@ -215,58 +383,6 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
     char addrSpacePAddArgument[256] = {0};
     char extension[128] = { 0 };
 
-    //Program Source code for int,float,octal,hexadecimal,char,string
-    const char* sourceGen[] = {
-        extension,
-        "__kernel void ",
-        testname,
-        "(void)\n",
-        "{\n"
-        "   printf(\"",
-        allTestCase[testId]->_genParameters[testNum].genericFormat,
-        "\\n\",",
-        allTestCase[testId]->_genParameters[testNum].dataRepresentation,
-        ");",
-        "}\n"
-    };
-    //Program Source code for vector
-    const char* sourceVec[] = {
-        extension,
-        "__kernel void ",
-        testname,
-        "(void)\n",
-        "{\n",
-        allTestCase[testId]->_genParameters[testNum].dataType,
-        allTestCase[testId]->_genParameters[testNum].vectorSize,
-        " tmp = (",
-        allTestCase[testId]->_genParameters[testNum].dataType,
-        allTestCase[testId]->_genParameters[testNum].vectorSize,
-        ")",
-        allTestCase[testId]->_genParameters[testNum].dataRepresentation,
-        ";",
-        "   printf(\"",
-        allTestCase[testId]->_genParameters[testNum].vectorFormatFlag,
-        "v",
-        allTestCase[testId]->_genParameters[testNum].vectorSize,
-        allTestCase[testId]->_genParameters[testNum].vectorFormatSpecifier,
-        "\\n\",",
-        "tmp);",
-        "}\n"
-    };
-    //Program Source code for address space
-    const char *sourceAddrSpace[] = {
-        "__kernel void ", testname,"(",addrSpaceArgument,
-        ")\n{\n",
-        allTestCase[testId]->_genParameters[testNum].addrSpaceVariableTypeQualifier,
-        "printf(",
-        allTestCase[testId]->_genParameters[testNum].genericFormat,
-        ",",
-        allTestCase[testId]->_genParameters[testNum].addrSpaceParameter,
-        "); ",
-        addrSpacePAddArgument,
-        "\n}\n"
-    };
-
     //Update testname
     std::snprintf(testname, sizeof(testname), "%s%d", "test", testId);
 
@@ -301,22 +417,107 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
             strcpy(extension,
                    "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n");
 
+        // Program Source code for vector
+        const char* sourceVec[] = {
+            extension,
+            "__kernel void ",
+            testname,
+            "(void)\n",
+            "{\n",
+            allTestCase[testId]->_genParameters[testNum].dataType,
+            allTestCase[testId]->_genParameters[testNum].vectorSize,
+            " tmp = (",
+            allTestCase[testId]->_genParameters[testNum].dataType,
+            allTestCase[testId]->_genParameters[testNum].vectorSize,
+            ")",
+            allTestCase[testId]->_genParameters[testNum].dataRepresentation,
+            ";",
+            "   printf(\"",
+            allTestCase[testId]->_genParameters[testNum].vectorFormatFlag,
+            "v",
+            allTestCase[testId]->_genParameters[testNum].vectorSize,
+            allTestCase[testId]->_genParameters[testNum].vectorFormatSpecifier,
+            "\\n\",",
+            "tmp);",
+            "}\n"
+        };
+
         err = create_single_kernel_helper(
             context, &program, kernel_ptr,
             sizeof(sourceVec) / sizeof(sourceVec[0]), sourceVec, testname);
+
+        gLatestKernelSource =
+            concat_kernel(sourceVec, sizeof(sourceVec) / sizeof(sourceVec[0]));
     }
     else if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE)
     {
+        // Program Source code for address space
+        const char* sourceAddrSpace[] = {
+            "__kernel void ",
+            testname,
+            "(",
+            addrSpaceArgument,
+            ")\n{\n",
+            allTestCase[testId]
+                ->_genParameters[testNum]
+                .addrSpaceVariableTypeQualifier,
+            "printf(",
+            allTestCase[testId]
+                ->_genParameters[testNum]
+                .genericFormats[formatNum]
+                .c_str(),
+            ",",
+            allTestCase[testId]->_genParameters[testNum].addrSpaceParameter,
+            "); ",
+            addrSpacePAddArgument,
+            "\n}\n"
+        };
+
         err = create_single_kernel_helper(context, &program, kernel_ptr,
                                           sizeof(sourceAddrSpace)
                                               / sizeof(sourceAddrSpace[0]),
                                           sourceAddrSpace, testname);
+
+        gLatestKernelSource =
+            concat_kernel(sourceAddrSpace,
+                          sizeof(sourceAddrSpace) / sizeof(sourceAddrSpace[0]));
+    }
+    else if (allTestCase[testId]->_type == TYPE_MIXED_FORMAT_RANDOM)
+    {
+        return makeMixedFormatPrintfProgram(kernel_ptr, context, device, testId,
+                                            testNum, testname);
     }
     else
     {
-        err = create_single_kernel_helper(
-            context, &program, kernel_ptr,
-            sizeof(sourceGen) / sizeof(sourceGen[0]), sourceGen, testname);
+        // Program Source code for int,float,octal,hexadecimal,char,string
+        std::ostringstream sourceGen;
+        sourceGen << extension << "__kernel void " << testname
+                  << "(void)\n"
+                     "{\n"
+                     "   printf(\""
+                  << allTestCase[testId]
+                         ->_genParameters[testNum]
+                         .genericFormats[formatNum]
+                         .c_str()
+                  << "\\n\"";
+
+        if (allTestCase[testId]->_genParameters[testNum].dataRepresentation)
+        {
+            sourceGen << ","
+                      << allTestCase[testId]
+                             ->_genParameters[testNum]
+                             .dataRepresentation;
+        }
+
+        sourceGen << ");\n}\n";
+
+        std::string kernel_source = sourceGen.str();
+        const char* ptr = kernel_source.c_str();
+
+        err = create_single_kernel_helper(context, &program, kernel_ptr, 1,
+                                          &ptr, testname);
+
+        gLatestKernelSource = kernel_source.c_str();
     }
 
     if (!program || err) {
@@ -330,7 +531,7 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
 //-----------------------------------------
 // isLongSupported
 //-----------------------------------------
-static bool isLongSupported(cl_device_id device_id)
+bool isLongSupported(cl_device_id device_id)
 {
     size_t tempSize = 0;
     cl_int status;
@@ -374,7 +575,7 @@ static bool isLongSupported(cl_device_id device_id)
 //-----------------------------------------
 // is64bAddressSpace
 //-----------------------------------------
-static bool is64bAddressSpace(cl_device_id  device_id)
+bool is64bAddressSpace(cl_device_id device_id)
 {
     cl_int status;
     cl_uint addrSpaceB;
@@ -396,521 +597,396 @@ static bool is64bAddressSpace(cl_device_id  device_id)
     else
         return false;
 }
+
 //-----------------------------------------
-// doTest
+// subtest_fail
 //-----------------------------------------
-static int doTest(cl_command_queue queue, cl_context context, const unsigned int testId, const unsigned int testNum, cl_device_id device)
+void subtest_fail(const char* msg, ...)
 {
-    if ((allTestCase[testId]->_type == TYPE_HALF
-         || allTestCase[testId]->_type == TYPE_HALF_LIMITS)
-        && !is_extension_available(device, "cl_khr_fp16"))
+    if (msg)
     {
-        log_info(
-            "Skipping half because cl_khr_fp16 extension is not supported.\n");
-        return TEST_SKIPPED_ITSELF;
+        va_list argptr;
+        va_start(argptr, msg);
+        vfprintf(stderr, msg, argptr);
+        va_end(argptr);
     }
+    ++s_test_fail;
+    ++s_test_cnt;
+}
 
-    if(allTestCase[testId]->_type == TYPE_VECTOR)
-    {
-        if ((strcmp(allTestCase[testId]->_genParameters[testNum].dataType,
-                    "half")
-             == 0)
-            && !is_extension_available(device, "cl_khr_fp16"))
-        {
-            log_info("Skipping half because cl_khr_fp16 extension is not "
-                     "supported.\n");
-            return TEST_SKIPPED_ITSELF;
-        }
+//-----------------------------------------
+// logTestType - printout test details
+//-----------------------------------------
 
-        log_info("%d)testing printf(\"%sv%s%s\",%s)\n",testNum,allTestCase[testId]->_genParameters[testNum].vectorFormatFlag,allTestCase[testId]->_genParameters[testNum].vectorSize,
-                 allTestCase[testId]->_genParameters[testNum].vectorFormatSpecifier,allTestCase[testId]->_genParameters[testNum].dataRepresentation);
+void logTestType(const unsigned testId, const unsigned testNum,
+                 unsigned formatNum)
+{
+    if (allTestCase[testId]->_type == TYPE_VECTOR)
+    {
+        log_info(
+            "%d)testing printf(\"%sv%s%s\",%s)\n", testNum,
+            allTestCase[testId]->_genParameters[testNum].vectorFormatFlag,
+            allTestCase[testId]->_genParameters[testNum].vectorSize,
+            allTestCase[testId]->_genParameters[testNum].vectorFormatSpecifier,
+            allTestCase[testId]->_genParameters[testNum].dataRepresentation);
     }
-    else if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE)
+    else if (allTestCase[testId]->_type == TYPE_ADDRESS_SPACE)
     {
-        if(isKernelArgument(allTestCase[testId], testNum))
+        if (isKernelArgument(allTestCase[testId], testNum))
         {
-            log_info("%d)testing kernel //argument %s \n   printf(%s,%s)\n",testNum,allTestCase[testId]->_genParameters[testNum].addrSpaceArgumentTypeQualifier,
-                     allTestCase[testId]->_genParameters[testNum].genericFormat,allTestCase[testId]->_genParameters[testNum].addrSpaceParameter);
+            log_info("%d)testing kernel //argument %s \n   printf(%s,%s)\n",
+                     testNum,
+                     allTestCase[testId]
+                         ->_genParameters[testNum]
+                         .addrSpaceArgumentTypeQualifier,
+                     allTestCase[testId]
+                         ->_genParameters[testNum]
+                         .genericFormats[formatNum]
+                         .c_str(),
+                     allTestCase[testId]
+                         ->_genParameters[testNum]
+                         .addrSpaceParameter);
         }
         else
         {
-            log_info("%d)testing kernel //variable %s \n   printf(%s,%s)\n",testNum,allTestCase[testId]->_genParameters[testNum].addrSpaceVariableTypeQualifier,
-                     allTestCase[testId]->_genParameters[testNum].genericFormat,allTestCase[testId]->_genParameters[testNum].addrSpaceParameter);
+            log_info("%d)testing kernel //variable %s \n   printf(%s,%s)\n",
+                     testNum,
+                     allTestCase[testId]
+                         ->_genParameters[testNum]
+                         .addrSpaceVariableTypeQualifier,
+                     allTestCase[testId]
+                         ->_genParameters[testNum]
+                         .genericFormats[formatNum]
+                         .c_str(),
+                     allTestCase[testId]
+                         ->_genParameters[testNum]
+                         .addrSpaceParameter);
         }
     }
-    else
+    else if (allTestCase[testId]->_type != TYPE_MIXED_FORMAT_RANDOM)
     {
-        log_info("%d)testing printf(\"%s\",%s)\n",testNum,allTestCase[testId]->_genParameters[testNum].genericFormat,allTestCase[testId]->_genParameters[testNum].dataRepresentation);
+        log_info("%d)testing printf(\"%s\"", testNum,
+                 allTestCase[testId]
+                     ->_genParameters[testNum]
+                     .genericFormats[formatNum]
+                     .c_str());
+        if (allTestCase[testId]->_genParameters[testNum].dataRepresentation)
+            log_info(",%s",
+                     allTestCase[testId]
+                         ->_genParameters[testNum]
+                         .dataRepresentation);
+        log_info(")\n");
     }
 
-    // Long support for varible type
-    if(allTestCase[testId]->_type == TYPE_VECTOR && !strcmp(allTestCase[testId]->_genParameters[testNum].dataType,"long") && !isLongSupported(device))
-    {
-        log_info( "Long is not supported, test not run.\n" );
-        return 0;
-    }
+    fflush(stdout);
+}
 
-    // Long support for address in FULL_PROFILE/EMBEDDED_PROFILE
-    bool isLongSupport = true;
-    if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE && isKernelPFormat(allTestCase[testId],testNum) && !isLongSupported(device))
+//-----------------------------------------
+// doTest
+//-----------------------------------------
+int doTest(cl_command_queue queue, cl_context context,
+           const unsigned int testId, cl_device_id device)
+{
+    int err = TEST_FAIL;
+
+    if ((allTestCase[testId]->_type == TYPE_HALF
+         || allTestCase[testId]->_type == TYPE_HALF_LIMITS)
+        && !is_extension_available(device, "cl_khr_fp16"))
     {
-        isLongSupport = false;
+        log_info("Skipping half because cl_khr_fp16 extension is not "
+                 "supported.\n");
+        return TEST_SKIPPED_ITSELF;
     }
 
-    int err;
-    cl_program program;
-    cl_kernel  kernel;
-    cl_mem d_out = NULL;
-    cl_mem d_a = NULL;
-    char _analysisBuffer[ANALYSIS_BUFFER_SIZE];
-    cl_uint out32 = 0;
-    cl_ulong out64 = 0;
-    int fd = -1;
-
-   // Define an index space (global work size) of threads for execution.
-   size_t globalWorkSize[1];
-
-    program = makePrintfProgram(&kernel, context,testId,testNum,isLongSupport,is64bAddressSpace(device));
-    if (!program || !kernel) {
-        ++s_test_fail;
-        ++s_test_cnt;
-        return -1;
-    }
+    auto& genParams = allTestCase[testId]->_genParameters;
+
+    auto fail_count = s_test_fail;
+    auto pass_count = s_test_cnt;
+    auto skip_count = s_test_skip;
 
-    //For address space test if there is kernel argument - set it
-    if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE )
+    for (unsigned testNum = 0; testNum < genParams.size(); testNum++)
     {
-        if(isKernelArgument(allTestCase[testId],testNum))
+        if (allTestCase[testId]->_type == TYPE_VECTOR)
         {
-            int a = 2;
-            d_a = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
-                sizeof(int), &a, &err);
-            if(err!= CL_SUCCESS || d_a == NULL) {
-                log_error("clCreateBuffer failed\n");
-                goto exit;
+            if ((strcmp(allTestCase[testId]->_genParameters[testNum].dataType,
+                        "half")
+                 == 0)
+                && !is_extension_available(device, "cl_khr_fp16"))
+            {
+                log_info("Skipping half because cl_khr_fp16 extension is not "
+                         "supported.\n");
+
+                s_test_skip++;
+                s_test_cnt++;
+                continue;
             }
-            err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
-            if(err!= CL_SUCCESS) {
-                log_error("clSetKernelArg failed\n");
-                goto exit;
+
+            // Long support for varible type
+            if (!strcmp(allTestCase[testId]->_genParameters[testNum].dataType,
+                        "long")
+                && !isLongSupported(device))
+            {
+                log_info("Long is not supported, test not run.\n");
+                s_test_skip++;
+                s_test_cnt++;
+                continue;
             }
         }
-        //For address space test if %p is tested
-        if(isKernelPFormat(allTestCase[testId],testNum))
+
+        auto genParamsVec = allTestCase[testId]->_genParameters;
+        auto genFormatVec = genParamsVec[testNum].genericFormats;
+
+        for (unsigned formatNum = 0; formatNum < genFormatVec.size();
+             formatNum++)
         {
-            d_out = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                sizeof(cl_ulong), NULL, &err);
-            if(err!= CL_SUCCESS || d_out == NULL) {
-                log_error("clCreateBuffer failed\n");
-                goto exit;
+            logTestType(testId, testNum, formatNum);
+
+            clProgramWrapper program;
+            clKernelWrapper kernel;
+            clMemWrapper d_out;
+            clMemWrapper d_a;
+            char _analysisBuffer[ANALYSIS_BUFFER_SIZE];
+            cl_uint out32 = 0;
+            cl_ulong out64 = 0;
+            int fd = -1;
+
+            // Define an index space (global work size) of threads for
+            // execution.
+            size_t globalWorkSize[1];
+
+            program = makePrintfProgram(&kernel, context, device, testId,
+                                        testNum, formatNum);
+            if (!program || !kernel)
+            {
+                subtest_fail(nullptr);
+                continue;
             }
-            err  = clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_out);
-            if(err!= CL_SUCCESS) {
-                log_error("clSetKernelArg failed\n");
-                goto exit;
+
+            // For address space test if there is kernel argument - set it
+            if (allTestCase[testId]->_type == TYPE_ADDRESS_SPACE)
+            {
+                if (isKernelArgument(allTestCase[testId], testNum))
+                {
+                    int a = 2;
+                    d_a = clCreateBuffer(
+                        context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                        sizeof(int), &a, &err);
+                    if (err != CL_SUCCESS || d_a == NULL)
+                    {
+                        subtest_fail("clCreateBuffer failed\n");
+                        continue;
+                    }
+                    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
+                    if (err != CL_SUCCESS)
+                    {
+                        subtest_fail("clSetKernelArg failed\n");
+                        continue;
+                    }
+                }
+                // For address space test if %p is tested
+                if (isKernelPFormat(allTestCase[testId], testNum))
+                {
+                    d_out = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                           sizeof(cl_ulong), NULL, &err);
+                    if (err != CL_SUCCESS || d_out == NULL)
+                    {
+                        subtest_fail("clCreateBuffer failed\n");
+                        continue;
+                    }
+                    err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_out);
+                    if (err != CL_SUCCESS)
+                    {
+                        subtest_fail("clSetKernelArg failed\n");
+                        continue;
+                    }
+                }
             }
-        }
-    }
 
-    fd = acquireOutputStream(&err);
-    if (err != 0)
-    {
-        log_error("Error while redirection stdout to file");
-        goto exit;
-    }
-    globalWorkSize[0] = 1;
-    cl_event ndrEvt;
-    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL,&ndrEvt);
-    if (err != CL_SUCCESS) {
-        releaseOutputStream(fd);
-        log_error("\n clEnqueueNDRangeKernel failed errcode:%d\n", err);
-        ++s_test_fail;
-        goto exit;
-    }
+            fd = acquireOutputStream(&err);
+            if (err != 0)
+            {
+                subtest_fail("Error while redirection stdout to file");
+                continue;
+            }
+            globalWorkSize[0] = 1;
+            cl_event ndrEvt;
+            err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalWorkSize,
+                                         NULL, 0, NULL, &ndrEvt);
+            if (err != CL_SUCCESS)
+            {
+                releaseOutputStream(fd);
+                subtest_fail("\n clEnqueueNDRangeKernel failed errcode:%d\n",
+                             err);
+                continue;
+            }
 
-    fflush(stdout);
-    err = clFlush(queue);
-    if(err != CL_SUCCESS)
-    {
-        releaseOutputStream(fd);
-        log_error("clFlush failed\n");
-        goto exit;
-    }
-    //Wait until kernel finishes its execution and (thus) the output printed from the kernel
-    //is immediately printed
-    err = waitForEvent(&ndrEvt);
+            fflush(stdout);
+            err = clFlush(queue);
+            if (err != CL_SUCCESS)
+            {
+                releaseOutputStream(fd);
+                subtest_fail("clFlush failed : %d\n", err);
+                continue;
+            }
+            // Wait until kernel finishes its execution and (thus) the output
+            // printed from the kernel is immediately printed
+            err = waitForEvent(&ndrEvt);
 
-    releaseOutputStream(fd);
+            releaseOutputStream(fd);
 
-    if(err != CL_SUCCESS)
-    {
-        log_error("waitforEvent failed\n");
-        goto exit;
-    }
-    fflush(stdout);
+            if (err != CL_SUCCESS)
+            {
+                subtest_fail("waitforEvent failed : %d\n", err);
+                continue;
+            }
+            fflush(stdout);
 
-    if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE && isKernelPFormat(allTestCase[testId],testNum))
-    {
-        // Read the OpenCL output buffer (d_out) to the host output array (out)
-        if(!is64bAddressSpace(device))//32-bit address space
-        {
-            clEnqueueReadBuffer(queue, d_out, CL_TRUE, 0, sizeof(cl_int),&out32,
-                0, NULL, NULL);
-        }
-        else //64-bit address space
-        {
-            clEnqueueReadBuffer(queue, d_out, CL_TRUE, 0, sizeof(cl_ulong),&out64,
-                0, NULL, NULL);
+            if (allTestCase[testId]->_type == TYPE_ADDRESS_SPACE
+                && isKernelPFormat(allTestCase[testId], testNum))
+            {
+                // Read the OpenCL output buffer (d_out) to the host output
+                // array (out)
+                if (!is64bAddressSpace(device)) // 32-bit address space
+                {
+                    clEnqueueReadBuffer(queue, d_out, CL_TRUE, 0,
+                                        sizeof(cl_int), &out32, 0, NULL, NULL);
+                }
+                else // 64-bit address space
+                {
+                    clEnqueueReadBuffer(queue, d_out, CL_TRUE, 0,
+                                        sizeof(cl_ulong), &out64, 0, NULL,
+                                        NULL);
+                }
+            }
+
+            //
+            // Get the output printed from the kernel to _analysisBuffer
+            // and verify its correctness
+            getAnalysisBuffer(_analysisBuffer);
+            if (!is64bAddressSpace(device)) // 32-bit address space
+            {
+                if (0
+                    != verifyOutputBuffer(_analysisBuffer, allTestCase[testId],
+                                          testNum, (cl_ulong)out32))
+                {
+                    subtest_fail(
+                        "verifyOutputBuffer failed with kernel: "
+                        "\n%s\n expected: %s\n got:      %s\n",
+                        gLatestKernelSource.c_str(),
+                        allTestCase[testId]->_correctBuffer[testNum].c_str(),
+                        _analysisBuffer);
+                    continue;
+                }
+            }
+            else // 64-bit address space
+            {
+                if (0
+                    != verifyOutputBuffer(_analysisBuffer, allTestCase[testId],
+                                          testNum, out64))
+                {
+                    subtest_fail(
+                        "verifyOutputBuffer failed with kernel: "
+                        "\n%s\n expected: %s\n got:      %s\n",
+                        gLatestKernelSource.c_str(),
+                        allTestCase[testId]->_correctBuffer[testNum].c_str(),
+                        _analysisBuffer);
+                    continue;
+                }
+            }
         }
+        ++s_test_cnt;
     }
 
-    //
-    //Get the output printed from the kernel to _analysisBuffer
-    //and verify its correctness
-    getAnalysisBuffer(_analysisBuffer);
-    if(!is64bAddressSpace(device)) //32-bit address space
-    {
-        if(0 != verifyOutputBuffer(_analysisBuffer,allTestCase[testId],testNum,(cl_ulong) out32))
-            err = ++s_test_fail;
-    }
-    else //64-bit address space
-    {
-        if(0 != verifyOutputBuffer(_analysisBuffer,allTestCase[testId],testNum,out64))
-            err = ++s_test_fail;
-    }
-exit:
-    if(clReleaseKernel(kernel) != CL_SUCCESS)
-        log_error("clReleaseKernel failed\n");
-    if(clReleaseProgram(program) != CL_SUCCESS)
-        log_error("clReleaseProgram failed\n");
-    if(d_out)
-        clReleaseMemObject(d_out);
-    if(d_a)
-        clReleaseMemObject(d_a);
-    ++s_test_cnt;
-    return err;
+    // all subtests skipped ?
+    if (s_test_skip - skip_count == s_test_cnt - pass_count)
+        return TEST_SKIPPED_ITSELF;
+    return s_test_fail - fail_count;
 }
 
-
-int test_int_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_INT, 0, deviceID);
-}
-int test_int_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_INT, 1, deviceID);
-}
-int test_int_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_INT, 2, deviceID);
-}
-int test_int_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_INT, 3, deviceID);
-}
-int test_int_4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_INT, 4, deviceID);
-}
-int test_int_5(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_INT, 5, deviceID);
-}
-int test_int_6(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_INT, 6, deviceID);
-}
-int test_int_7(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_INT, 7, deviceID);
-}
-int test_int_8(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_INT, 8, deviceID);
 }
 
-
-int test_half_0(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HALF, 0, deviceID);
-}
-int test_half_1(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HALF, 1, deviceID);
-}
-int test_half_2(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HALF, 2, deviceID);
-}
-int test_half_3(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, int num_elements)
+int test_int(cl_device_id deviceID, cl_context context, cl_command_queue queue,
+             int num_elements)
 {
-    return doTest(gQueue, gContext, TYPE_HALF, 3, deviceID);
+    return doTest(gQueue, gContext, TYPE_INT, deviceID);
 }
-int test_half_4(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HALF, 4, deviceID);
-}
-int test_half_5(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HALF, 5, deviceID);
-}
-int test_half_6(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HALF, 6, deviceID);
-}
-int test_half_7(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HALF, 7, deviceID);
-}
-int test_half_8(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HALF, 8, deviceID);
-}
-int test_half_9(cl_device_id deviceID, cl_context context,
-                cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HALF, 9, deviceID);
-}
-
 
-int test_half_limits_0(cl_device_id deviceID, cl_context context,
-                       cl_command_queue queue, int num_elements)
+int test_half(cl_device_id deviceID, cl_context context, cl_command_queue queue,
+              int num_elements)
 {
-    return doTest(gQueue, gContext, TYPE_HALF_LIMITS, 0, deviceID);
+    return doTest(gQueue, gContext, TYPE_HALF, deviceID);
 }
-int test_half_limits_1(cl_device_id deviceID, cl_context context,
-                       cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HALF_LIMITS, 1, deviceID);
-}
-int test_half_limits_2(cl_device_id deviceID, cl_context context,
-                       cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HALF_LIMITS, 2, deviceID);
-}
-
 
-int test_float_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 0, deviceID);
-}
-int test_float_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 1, deviceID);
-}
-int test_float_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 2, deviceID);
-}
-int test_float_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 3, deviceID);
-}
-int test_float_4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 4, deviceID);
-}
-int test_float_5(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 5, deviceID);
-}
-int test_float_6(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 6, deviceID);
-}
-int test_float_7(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 7, deviceID);
-}
-int test_float_8(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 8, deviceID);
-}
-int test_float_9(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 9, deviceID);
-}
-int test_float_10(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 10, deviceID);
-}
-int test_float_11(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 11, deviceID);
-}
-int test_float_12(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 12, deviceID);
-}
-int test_float_13(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 13, deviceID);
-}
-int test_float_14(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 14, deviceID);
-}
-int test_float_15(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 15, deviceID);
-}
-int test_float_16(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT, 16, deviceID);
-}
-int test_float_17(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_half_limits(cl_device_id deviceID, cl_context context,
+                     cl_command_queue queue, int num_elements)
 {
-    return doTest(gQueue, gContext, TYPE_FLOAT, 17, deviceID);
+    return doTest(gQueue, gContext, TYPE_HALF_LIMITS, deviceID);
 }
 
-
-int test_float_limits_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT_LIMITS, 0, deviceID);
-}
-int test_float_limits_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_FLOAT_LIMITS, 1, deviceID);
-}
-int test_float_limits_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_float(cl_device_id deviceID, cl_context context,
+               cl_command_queue queue, int num_elements)
 {
-    return doTest(gQueue, gContext, TYPE_FLOAT_LIMITS, 2, deviceID);
+    return doTest(gQueue, gContext, TYPE_FLOAT, deviceID);
 }
 
-
-int test_octal_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_OCTAL, 0, deviceID);
-}
-int test_octal_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_OCTAL, 1, deviceID);
-}
-int test_octal_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_float_limits(cl_device_id deviceID, cl_context context,
+                      cl_command_queue queue, int num_elements)
 {
-    return doTest(gQueue, gContext, TYPE_OCTAL, 2, deviceID);
+    return doTest(gQueue, gContext, TYPE_FLOAT_LIMITS, deviceID);
 }
-int test_octal_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_OCTAL, 3, deviceID);
-}
-
 
-int test_unsigned_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_octal(cl_device_id deviceID, cl_context context,
+               cl_command_queue queue, int num_elements)
 {
-    return doTest(gQueue, gContext, TYPE_UNSIGNED, 0, deviceID);
+    return doTest(gQueue, gContext, TYPE_OCTAL, deviceID);
 }
-int test_unsigned_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_UNSIGNED, 1, deviceID);
-}
-
 
-int test_hexadecimal_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HEXADEC, 0, deviceID);
-}
-int test_hexadecimal_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HEXADEC, 1, deviceID);
-}
-int test_hexadecimal_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HEXADEC, 2, deviceID);
-}
-int test_hexadecimal_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_HEXADEC, 3, deviceID);
-}
-int test_hexadecimal_4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_unsigned(cl_device_id deviceID, cl_context context,
+                  cl_command_queue queue, int num_elements)
 {
-    return doTest(gQueue, gContext, TYPE_HEXADEC, 4, deviceID);
+    return doTest(gQueue, gContext, TYPE_UNSIGNED, deviceID);
 }
 
-
-int test_char_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_CHAR, 0, deviceID);
-}
-int test_char_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_CHAR, 1, deviceID);
-}
-int test_char_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_hexadecimal(cl_device_id deviceID, cl_context context,
+                     cl_command_queue queue, int num_elements)
 {
-    return doTest(gQueue, gContext, TYPE_CHAR, 2, deviceID);
+    return doTest(gQueue, gContext, TYPE_HEXADEC, deviceID);
 }
 
-
-int test_string_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_char(cl_device_id deviceID, cl_context context, cl_command_queue queue,
+              int num_elements)
 {
-    return doTest(gQueue, gContext, TYPE_STRING, 0, deviceID);
-}
-int test_string_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_STRING, 1, deviceID);
-}
-int test_string_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_STRING, 2, deviceID);
+    return doTest(gQueue, gContext, TYPE_CHAR, deviceID);
 }
 
-
-int test_vector_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_VECTOR, 0, deviceID);
-}
-int test_vector_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_VECTOR, 1, deviceID);
-}
-int test_vector_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_VECTOR, 2, deviceID);
-}
-int test_vector_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_VECTOR, 3, deviceID);
-}
-int test_vector_4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_VECTOR, 4, deviceID);
-}
-int test_vector_5(cl_device_id deviceID, cl_context context,
-                  cl_command_queue queue, int num_elements)
+int test_string(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
 {
-    return doTest(gQueue, gContext, TYPE_VECTOR, 5, deviceID);
+    return doTest(gQueue, gContext, TYPE_STRING, deviceID);
 }
 
-
-int test_address_space_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-    return doTest(gQueue, gContext, TYPE_ADDRESS_SPACE, 0, deviceID);
-}
-int test_address_space_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_format_string(cl_device_id deviceID, cl_context context,
+                       cl_command_queue queue, int num_elements)
 {
-    return doTest(gQueue, gContext, TYPE_ADDRESS_SPACE, 1, deviceID);
+    return doTest(gQueue, gContext, TYPE_FORMAT_STRING, deviceID);
 }
-int test_address_space_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+
+int test_vector(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
 {
-    return doTest(gQueue, gContext, TYPE_ADDRESS_SPACE, 2, deviceID);
+    return doTest(gQueue, gContext, TYPE_VECTOR, deviceID);
 }
-int test_address_space_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+
+int test_address_space(cl_device_id deviceID, cl_context context,
+                       cl_command_queue queue, int num_elements)
 {
-    return doTest(gQueue, gContext, TYPE_ADDRESS_SPACE, 3, deviceID);
+    return doTest(gQueue, gContext, TYPE_ADDRESS_SPACE, deviceID);
 }
-int test_address_space_4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+
+int test_mixed_format_random(cl_device_id deviceID, cl_context context,
+                             cl_command_queue queue, int num_elements)
 {
-    return doTest(gQueue, gContext, TYPE_ADDRESS_SPACE, 4, deviceID);
+    return doTest(gQueue, gContext, TYPE_MIXED_FORMAT_RANDOM, deviceID);
 }
 
 int test_buffer_size(cl_device_id deviceID, cl_context context,
@@ -939,62 +1015,39 @@ int test_buffer_size(cl_device_id deviceID, cl_context context,
 }
 
 test_definition test_list[] = {
-    ADD_TEST(int_0),           ADD_TEST(int_1),
-    ADD_TEST(int_2),           ADD_TEST(int_3),
-    ADD_TEST(int_4),           ADD_TEST(int_5),
-    ADD_TEST(int_6),           ADD_TEST(int_7),
-    ADD_TEST(int_8),
-
-    ADD_TEST(half_0),          ADD_TEST(half_1),
-    ADD_TEST(half_2),          ADD_TEST(half_3),
-    ADD_TEST(half_4),          ADD_TEST(half_5),
-    ADD_TEST(half_6),          ADD_TEST(half_7),
-    ADD_TEST(half_8),          ADD_TEST(half_9),
-
-    ADD_TEST(half_limits_0),   ADD_TEST(half_limits_1),
-    ADD_TEST(half_limits_2),
-
-    ADD_TEST(float_0),         ADD_TEST(float_1),
-    ADD_TEST(float_2),         ADD_TEST(float_3),
-    ADD_TEST(float_4),         ADD_TEST(float_5),
-    ADD_TEST(float_6),         ADD_TEST(float_7),
-    ADD_TEST(float_8),         ADD_TEST(float_9),
-    ADD_TEST(float_10),        ADD_TEST(float_11),
-    ADD_TEST(float_12),        ADD_TEST(float_13),
-    ADD_TEST(float_14),        ADD_TEST(float_15),
-    ADD_TEST(float_16),        ADD_TEST(float_17),
-
-    ADD_TEST(float_limits_0),  ADD_TEST(float_limits_1),
-    ADD_TEST(float_limits_2),
-
-    ADD_TEST(octal_0),         ADD_TEST(octal_1),
-    ADD_TEST(octal_2),         ADD_TEST(octal_3),
-
-    ADD_TEST(unsigned_0),      ADD_TEST(unsigned_1),
-
-    ADD_TEST(hexadecimal_0),   ADD_TEST(hexadecimal_1),
-    ADD_TEST(hexadecimal_2),   ADD_TEST(hexadecimal_3),
-    ADD_TEST(hexadecimal_4),
-
-    ADD_TEST(char_0),          ADD_TEST(char_1),
-    ADD_TEST(char_2),
-
-    ADD_TEST(string_0),        ADD_TEST(string_1),
-    ADD_TEST(string_2),
-
-    ADD_TEST(vector_0),        ADD_TEST(vector_1),
-    ADD_TEST(vector_2),        ADD_TEST(vector_3),
-    ADD_TEST(vector_4),        ADD_TEST(vector_5),
-
-    ADD_TEST(address_space_0), ADD_TEST(address_space_1),
-    ADD_TEST(address_space_2), ADD_TEST(address_space_3),
-    ADD_TEST(address_space_4),
-
+    ADD_TEST(int),
+    ADD_TEST(half),
+    ADD_TEST(half_limits),
+    ADD_TEST(float),
+    ADD_TEST(float_limits),
+    ADD_TEST(octal),
+    ADD_TEST(unsigned),
+    ADD_TEST(hexadecimal),
+    ADD_TEST(char),
+    ADD_TEST(string),
+    ADD_TEST(format_string),
+    ADD_TEST(vector),
+    ADD_TEST(address_space),
     ADD_TEST(buffer_size),
+    ADD_TEST(mixed_format_random),
 };
 
 const int test_num = ARRAY_SIZE( test_list );
 
+//-----------------------------------------
+// printUsage
+//-----------------------------------------
+static void printUsage(void)
+{
+    log_info("test_printf: <optional: testnames> \n");
+    log_info("\tdefault is to run the full test on the default device\n");
+    log_info("\n");
+    for (int i = 0; i < test_num; i++)
+    {
+        log_info("\t%s\n", test_list[i].name);
+    }
+}
+
 //-----------------------------------------
 // main
 //-----------------------------------------
@@ -1059,6 +1112,8 @@ int main(int argc, const char* argv[])
         return -1;
     }
 
+    gMTdata = MTdataHolder(gRandomSeed);
+
     int err = runTestHarnessWithCheck( argCount, argList, test_num, test_list, true, 0, InitCL );
 
     if(gQueue)
@@ -1080,20 +1135,6 @@ int main(int argc, const char* argv[])
     return err;
 }
 
-//-----------------------------------------
-// printUsage
-//-----------------------------------------
-static void printUsage( void )
-{
-    log_info("test_printf: <optional: testnames> \n");
-    log_info("\tdefault is to run the full test on the default device\n");
-    log_info("\n");
-    for( int i = 0; i < test_num; i++ )
-    {
-        log_info( "\t%s\n", test_list[i].name );
-    }
-}
-
 test_status InitCL( cl_device_id device )
 {
     uint32_t device_frequency = 0;
diff --git a/test_conformance/printf/test_printf.h b/test_conformance/printf/test_printf.h
index 8eb2a03249..a2cd9ed2be 100644
--- a/test_conformance/printf/test_printf.h
+++ b/test_conformance/printf/test_printf.h
@@ -55,14 +55,16 @@ enum PrintfTestType
     TYPE_HEXADEC,
     TYPE_CHAR,
     TYPE_STRING,
+    TYPE_FORMAT_STRING,
     TYPE_VECTOR,
     TYPE_ADDRESS_SPACE,
+    TYPE_MIXED_FORMAT_RANDOM,
     TYPE_COUNT
 };
 
 struct printDataGenParameters
 {
-    const char* genericFormat;
+    std::vector<std::string> genericFormats;
     const char* dataRepresentation;
     const char* vectorFormatFlag;
     const char* vectorFormatSpecifier;
diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp
index 6b310a994d..292d95b303 100644
--- a/test_conformance/printf/util_printf.cpp
+++ b/test_conformance/printf/util_printf.cpp
@@ -46,39 +46,41 @@ std::vector<printDataGenParameters> printIntGenParameters = {
 
     //(Minimum)Five-wide,default(right)-justified
 
-    {"%5d","10"},
+    { { "%5d" }, "10" },
 
-        //(Minimum)Five-wide,left-justified
+    //(Minimum)Five-wide,left-justified
 
-    {"%-5d","10"},
+    { { "%-5d" }, "10" },
 
-        //(Minimum)Five-wide,default(right)-justified,zero-filled
+    //(Minimum)Five-wide,default(right)-justified,zero-filled
 
-    {"%05d","10"},
+    { { "%05d" }, "10" },
 
-        //(Minimum)Five-wide,default(right)-justified,with sign
+    //(Minimum)Five-wide,default(right)-justified,with sign
 
-    {"%+5d","10"},
+    { { "%+5d" }, "10" },
 
-         //(Minimum)Five-wide ,left-justified,with sign
+    //(Minimum)Five-wide ,left-justified,with sign
 
-    {"%-+5d","10"},
+    { { "%-+5d" }, "10" },
 
-        //(Minimum)Five-digit(zero-filled in absent digits),default(right)-justified
+    //(Minimum)Five-digit(zero-filled in absent digits),default(right)-justified
 
-    {"%.5i","100"},
+    { { "%.5i" }, "100" },
 
-        //(Minimum)Six-wide,Five-digit(zero-filled in absent digits),default(right)-justified
+    //(Minimum)Six-wide,Five-digit(zero-filled in absent
+    // digits),default(right)-justified
 
-    {"%6.5i","100"},
+    { { "%6.5i" }, "100" },
 
-        //0 and - flag both apper ==>0 is ignored,left-justified,capital I
+    // 0 and - flag both apper ==>0 is ignored,left-justified,capital I
 
-    {"%-06i","100"},
+    { { "%-06i" }, "100" },
 
-        //(Minimum)Six-wide,Five-digit(zero-filled in absent digits),default(right)-justified
+    //(Minimum)Six-wide,Five-digit(zero-filled in absent
+    // digits),default(right)-justified
 
-    {"%06.5i","100"}
+    { { "%06.5i" }, "100" }
 
 };
 
@@ -119,50 +121,50 @@ std::vector<printDataGenParameters> printHalfGenParameters = {
 
     // Default(right)-justified
 
-    { "%f", "1.234h" },
+    { { "%f" }, "1.234h" },
 
     // One position after the decimal,default(right)-justified
 
-    { "%4.2f", "1.2345h" },
+    { { "%4.2f" }, "1.2345h" },
 
     // Zero positions after the
     // decimal([floor]rounding),default(right)-justified
 
-    { "%.0f", "0.1h" },
+    { { "%.0f" }, "0.1h" },
 
     // Zero positions after the decimal([ceil]rounding),default(right)-justified
 
-    { "%.0f", "0.6h" },
+    { { "%.0f" }, "0.6h" },
 
     // Zero-filled,default positions number after the
     // decimal,default(right)-justified
 
-    { "%0f", "0.6h" },
+    { { "%0f" }, "0.6h" },
 
     // Double argument representing floating-point,used by f
     // style,default(right)-justified
 
-    { "%4g", "5.678h" },
+    { { "%4g" }, "5.678h" },
 
     // Double argument representing floating-point,used by e
     // style,default(right)-justified
 
-    { "%4.2g", "5.678h" },
+    { { "%4.2g" }, "5.678h" },
 
     // Double argument representing floating-point,used by e
     // style,default(right)-justified
 
-    { "%4G", "0.000062h" },
+    { { "%4G" }, "0.000062h" },
 
     // Double argument representing floating-point,with
     // exponent,left-justified,default(right)-justified
 
-    { "%-#20.15e", "65504.0h" },
+    { { "%-#20.15e" }, "65504.0h" },
 
     // Double argument representing floating-point,with
     // exponent,left-justified,with sign,capital E,default(right)-justified
 
-    { "%+#21.15E", "-65504.0h" },
+    { { "%+#21.15E" }, "-65504.0h" },
 };
 
 //---------------------------------------------------------
@@ -203,16 +205,22 @@ testCase testCaseHalf = {
 std::vector<printDataGenParameters> printHalfLimitsGenParameters = {
 
     // Infinity (1.0/0.0)
+    { { "%f", "%e", "%g", "%a" }, "1.0h/0.0h" },
 
-    { "%f", "1.0h/0.0h" },
+    // NaN
+    { { "%f", "%e", "%g", "%a" }, "nan((ushort)0)" },
 
     // NaN
+    { { "%f", "%e", "%g", "%a" }, "acospi(2.0h)" },
 
-    { "%f", "sqrt(-1.0h)" },
+    // Infinity (1.0/0.0)
+    { { "%F", "%E", "%G", "%A" }, "1.0h/0.0h" },
 
     // NaN
-    { "%f", "acospi(2.0h)" }
+    { { "%F", "%E", "%G", "%A" }, "nan((ushort)0)" },
 
+    // NaN
+    { { "%F", "%E", "%G", "%A" }, "acospi(2.0h)" }
 };
 //--------------------------------------------------------
 
@@ -224,9 +232,15 @@ std::vector<std::string> correctBufferHalfLimits = {
 
     "inf",
 
-    "-nan",
+    "nan",
+
+    "nan",
+
+    "INF",
 
-    "nan"
+    "NAN",
+
+    "NAN"
 
 };
 
@@ -265,77 +279,91 @@ testCase testCaseHalfLimits = {
 
 std::vector<printDataGenParameters> printFloatGenParameters = {
 
-    //Default(right)-justified
+    // Default(right)-justified
 
-    {"%f","10.3456"},
+    { { "%f" }, "10.3456" },
 
-    //One position after the decimal,default(right)-justified
+    // One position after the decimal,default(right)-justified
 
-    {"%.1f","10.3456"},
+    { { "%.1f" }, "10.3456" },
 
-    //Two positions after the decimal,default(right)-justified
+    // Two positions after the decimal,default(right)-justified
 
-    {"%.2f","10.3456"},
+    { { "%.2f" }, "10.3456" },
 
-    //(Minimum)Eight-wide,three positions after the decimal,default(right)-justified
+    //(Minimum)Eight-wide,three positions after the
+    // decimal,default(right)-justified
 
-    {"%8.3f","10.3456"},
+    { { "%8.3f" }, "10.3456" },
 
-    //(Minimum)Eight-wide,two positions after the decimal,zero-filled,default(right)-justified
+    //(Minimum)Eight-wide,two positions after the
+    // decimal,zero-filled,default(right)-justified
 
-    {"%08.2f","10.3456"},
+    { { "%08.2f" }, "10.3456" },
 
     //(Minimum)Eight-wide,two positions after the decimal,left-justified
 
-    {"%-8.2f","10.3456"},
+    { { "%-8.2f" }, "10.3456" },
 
-    //(Minimum)Eight-wide,two positions after the decimal,with sign,default(right)-justified
+    //(Minimum)Eight-wide,two positions after the decimal,with
+    // sign,default(right)-justified
 
-    {"%+8.2f","-10.3456"},
+    { { "%+8.2f" }, "-10.3456" },
 
-    //Zero positions after the decimal([floor]rounding),default(right)-justified
+    // Zero positions after the
+    // decimal([floor]rounding),default(right)-justified
 
-    {"%.0f","0.1"},
+    { { "%.0f" }, "0.1" },
 
-    //Zero positions after the decimal([ceil]rounding),default(right)-justified
+    // Zero positions after the decimal([ceil]rounding),default(right)-justified
 
-    {"%.0f","0.6"},
+    { { "%.0f" }, "0.6" },
 
-    //Zero-filled,default positions number after the decimal,default(right)-justified
+    // Zero-filled,default positions number after the
+    // decimal,default(right)-justified
 
-    {"%0f","0.6"},
+    { { "%0f" }, "0.6" },
 
-    //Double argument representing floating-point,used by f style,default(right)-justified
+    // Double argument representing floating-point,used by f
+    // style,default(right)-justified
 
-    {"%4g","12345.6789"},
+    { { "%4g" }, "12345.6789" },
 
-    //Double argument representing floating-point,used by e style,default(right)-justified
+    // Double argument representing floating-point,used by e
+    // style,default(right)-justified
 
-    {"%4.2g","12345.6789"},
+    { { "%4.2g" }, "12345.6789" },
 
-    //Double argument representing floating-point,used by f style,default(right)-justified
+    // Double argument representing floating-point,used by f
+    // style,default(right)-justified
 
-    {"%4G","0.0000023"},
+    { { "%4G" }, "0.0000023" },
 
-    //Double argument representing floating-point,used by e style,default(right)-justified
+    // Double argument representing floating-point,used by e
+    // style,default(right)-justified
 
-    {"%4G","0.023"},
+    { { "%4G" }, "0.023" },
 
-    //Double argument representing floating-point,with exponent,left-justified,default(right)-justified
+    // Double argument representing floating-point,with
+    // exponent,left-justified,default(right)-justified
+    // Use a value that is exactly representable as 32-bit float.
 
-    {"%-#20.15e","789456123.0"},
+    { { "%-#20.15e" }, "789456128.0" },
 
-    //Double argument representing floating-point,with exponent,left-justified,with sign,capital E,default(right)-justified
+    // Double argument representing floating-point,with
+    // exponent,left-justified,with sign,capital E,default(right)-justified
+    // Use a value that is exactly representable as 32-bit float.
 
-    {"%+#21.15E","789456123.0"},
+    { { "%+#21.15E" }, "789456128.0" },
 
-    //Double argument representing floating-point,in [-]xh.hhhhpAd style
+    // Double argument representing floating-point,in [-]xh.hhhhpAd style
 
-    {"%.6a","0.1"},
+    { { "%.6a" }, "0.1" },
 
-    //(Minimum)Ten-wide,Double argument representing floating-point,in xh.hhhhpAd style,default(right)-justified
+    //(Minimum)Ten-wide,Double argument representing floating-point,in
+    // xh.hhhhpAd style,default(right)-justified
 
-    {"%10.2a","9990.235"},
+    { { "%10.2a" }, "9990.235" },
 };
 
 //---------------------------------------------------------
@@ -376,16 +404,22 @@ testCase testCaseFloat = {
 std::vector<printDataGenParameters> printFloatLimitsGenParameters = {
 
     // Infinity (1.0/0.0)
+    { { "%f", "%e", "%g", "%a" }, "1.0f/0.0f" },
 
-    { "%f", "1.0f/0.0f" },
+    // NaN
+    { { "%f", "%e", "%g", "%a" }, "nan(0U)" },
 
     // NaN
+    { { "%f", "%e", "%g", "%a" }, "acospi(2.0f)" },
 
-    { "%f", "sqrt(-1.0f)" },
+    // Infinity (1.0/0.0)
+    { { "%F", "%E", "%G", "%A" }, "1.0f/0.0f" },
 
     // NaN
-    { "%f", "acospi(2.0f)" }
+    { { "%F", "%E", "%G", "%A" }, "nan(0U)" },
 
+    // NaN
+    { { "%F", "%E", "%G", "%A" }, "acospi(2.0f)" }
 };
 //--------------------------------------------------------
 
@@ -397,9 +431,15 @@ std::vector<std::string> correctBufferFloatLimits = {
 
     "inf",
 
-    "-nan",
+    "nan",
+
+    "nan",
 
-    "nan"
+    "INF",
+
+    "NAN",
+
+    "NAN"
 
 };
 
@@ -437,21 +477,22 @@ testCase testCaseFloatLimits = {
 
 std::vector<printDataGenParameters> printOctalGenParameters = {
 
-    //Default(right)-justified
+    // Default(right)-justified
 
-    {"%o","10"},
+    { { "%o" }, "10" },
 
-    //Five-digit,default(right)-justified
+    // Five-digit,default(right)-justified
 
-    {"%.5o","10"},
+    { { "%.5o" }, "10" },
 
-    //Default(right)-justified,increase precision
+    // Default(right)-justified,increase precision
 
-    {"%#o","100000000"},
+    { { "%#o" }, "100000000" },
 
-    //(Minimum)Four-wide,Five-digit,0-flag ignored(because of precision),default(right)-justified
+    //(Minimum)Four-wide,Five-digit,0-flag ignored(because of
+    // precision),default(right)-justified
 
-    {"%04.5o","10"}
+    { { "%04.5o" }, "10" }
 
 };
 
@@ -493,19 +534,19 @@ testCase testCaseOctal = {
 
 std::vector<printDataGenParameters> printUnsignedGenParameters = {
 
-    //Default(right)-justified
+    // Default(right)-justified
 
-    {"%u","10"},
+    { { "%u" }, "10" },
 
-    //Zero precision for zero,default(right)-justified
+    // Zero precision for zero,default(right)-justified
 
-    {"%.0u","0"},
+    { { "%.0u" }, "0" },
 
 };
 
 //-------------------------------------------------------
 
-//Test case for octal                                   |
+// Test case for unsigned                                 |
 
 //-------------------------------------------------------
 
@@ -541,25 +582,25 @@ testCase testCaseUnsigned = {
 
 std::vector<printDataGenParameters> printHexadecimalGenParameters = {
 
-    //Add 0x,low x,default(right)-justified
+    // Add 0x,low x,default(right)-justified
 
-    {"%#x","0xABCDEF"},
+    { { "%#x" }, "0xABCDEF" },
 
-    //Add 0x,capital X,default(right)-justified
+    // Add 0x,capital X,default(right)-justified
 
-    {"%#X","0xABCDEF"},
+    { { "%#X" }, "0xABCDEF" },
 
-    //Not add 0x,if zero,default(right)-justified
+    // Not add 0x,if zero,default(right)-justified
 
-    {"%#X","0"},
+    { { "%#X" }, "0" },
 
     //(Minimum)Eight-wide,default(right)-justified
 
-    {"%8x","399"},
+    { { "%8x" }, "399" },
 
     //(Minimum)Four-wide,zero-filled,default(right)-justified
 
-    {"%04x","399"}
+    { { "%04x" }, "399" }
 
 };
 
@@ -601,17 +642,17 @@ testCase testCaseHexadecimal = {
 
 std::vector<printDataGenParameters> printCharGenParameters = {
 
-    //Four-wide,zero-filled,default(right)-justified
+    // Four-wide,zero-filled,default(right)-justified
 
-    {"%4c","\'1\'"},
+    { { "%4c" }, "\'1\'" },
 
-        //Four-wide,left-justified
+    // Four-wide,left-justified
 
-    {"%-4c","\'1\'"},
+    { { "%-4c" }, "\'1\'" },
 
-        //(unsigned) int argument,default(right)-justified
+    //(unsigned) int argument,default(right)-justified
 
-    {"%c","66"}
+    { { "%c" }, "66" }
 
 };
 
@@ -669,20 +710,58 @@ testCase testCaseChar = {
 // [string]format | [string] string-data representation  |
 
 //--------------------------------------------------------
+// clang-format off
 
 std::vector<printDataGenParameters> printStringGenParameters = {
 
+    // empty format
+    { {""}, "\"foo\"" },
+
+    // empty argument
+    { {"%s"}, "\"\"" },
+
     //(Minimum)Four-wide,zero-filled,default(right)-justified
 
-    {"%4s","\"foo\""},
+    { { "%4s" }, "\"foo\"" },
 
-    //One-digit(precision ignored),left-justified
+    // One-digit(precision ignored),left-justified
 
-    {"%.1s","\"foo\""},
+    { { "%.1s" }, "\"foo\"" },
 
     //%% specification
 
-    {"%s","\"%%\""},
+    { {"%s"}, "\"%%\"" },
+
+    { {"%s"}, "\"foo%%bar%%bar%%foo\"" },
+
+    { {"%%%s%%"}, "\"foo\"" },
+
+    { {"%%s%s"}, "\"foo\"" },
+
+    // special symbols
+    // nested
+
+    { {"%s"}, "\"\\\"%%\\\"\"" },
+
+    { {"%s"}, "\"\\\'%%\\\'\"" },
+
+    // tabs
+
+    { {"%s"}, "\"foo\\tfoo\"" },
+
+    // newlines
+
+    { {"%s"}, "\"foo\\nfoo\"" },
+
+    // terminator
+    { {"%s"}, "\"foo\\0foo\"" },
+
+    // all ascii characters
+    { {"%s"},
+      "\" "
+      "!\\\"#$%&\'()*+,-./"
+      "0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`"
+      "abcdefghijklmnopqrstuvwxyz{|}~\"" }
 };
 
 //---------------------------------------------------------
@@ -693,13 +772,37 @@ std::vector<printDataGenParameters> printStringGenParameters = {
 
 std::vector<std::string> correctBufferString = {
 
+    "",
+
+    "",
+
     " foo",
 
     "f",
 
     "%%",
-};
 
+    "foo%%bar%%bar%%foo",
+
+    "%foo%",
+
+    "%sfoo",
+
+    "\"%%\"",
+
+    "\'%%\'",
+
+    "foo\tfoo",
+
+R"(foo
+foo)",
+
+    "foo",
+
+    " !\"#$%&\'()*+,-./"
+    "0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`"
+    "abcdefghijklmnopqrstuvwxyz{|}~"
+};
 
 //---------------------------------------------------------
 
@@ -721,7 +824,90 @@ testCase testCaseString = {
 
 };
 
+//--------------------------------------------------------
+
+// [string]format |
+
+//--------------------------------------------------------
+
+std::vector<printDataGenParameters> printFormatStringGenParameters = {
+
+    //%% specification
 
+    { {"%%"} },
+
+    // special symbols
+    // nested
+
+    { {"\\\"%%\\\""} },
+
+    { {"\'%%\'"} },
+
+    { {"\'foo%%bar%%bar%%foo\'"} },
+
+    // tabs
+
+    { {"foo\\t\\t\\tfoo"} },
+
+    // newlines
+
+    { {"foo\\nfoo"} },
+
+    // all ascii characters
+    { {
+          " !\\\"#$%%&\'()*+,-./"
+          "0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\\]^_`"
+          "abcdefghijklmnopqrstuvwxyz{|}~"
+      } }
+};
+
+//---------------------------------------------------------
+
+// Lookup table -[string] string-correct buffer           |
+
+//---------------------------------------------------------
+
+std::vector<std::string> correctBufferFormatString = {
+
+    "%",
+
+    "\"%\"",
+
+    "\'%\'",
+
+    "\'foo%bar%bar%foo\'",
+
+    "foo\t\t\tfoo",
+
+R"(foo
+foo)",
+
+    " !\"#$%&\'()*+,-./"
+    "0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`"
+    "abcdefghijklmnopqrstuvwxyz{|}~"
+};
+
+//---------------------------------------------------------
+
+//Test case for string                                    |
+
+//---------------------------------------------------------
+
+testCase testCaseFormatString = {
+
+    TYPE_FORMAT_STRING,
+
+    correctBufferFormatString,
+
+    printFormatStringGenParameters,
+
+    NULL,
+
+    kchar
+
+};
+
+// clang-format on
 
 //=========================================================
 
@@ -741,27 +927,27 @@ std::vector<printDataGenParameters> printVectorGenParameters = {
 
     //(Minimum)Two-wide,two positions after decimal
 
-    { NULL, "(1.0f,2.0f,3.0f,4.0f)", "%2.2", "hlf", "float", "4" },
+    { { "" }, "(1.0f,2.0f,3.0f,4.0f)", "%2.2", "hlf", "float", "4" },
 
     // Alternative form,uchar argument
 
-    { NULL, "(0xFA,0xFB)", "%#", "hhx", "uchar", "2" },
+    { { "" }, "(0xFA,0xFB)", "%#", "hhx", "uchar", "2" },
 
     // Alternative form,ushort argument
 
-    { NULL, "(0x1234,0x8765)", "%#", "hx", "ushort", "2" },
+    { { "" }, "(0x1234,0x8765)", "%#", "hx", "ushort", "2" },
 
     // Alternative form,uint argument
 
-    { NULL, "(0x12345678,0x87654321)", "%#", "hlx", "uint", "2" },
+    { { "" }, "(0x12345678,0x87654321)", "%#", "hlx", "uint", "2" },
 
     // Alternative form,long argument
 
-    { NULL, "(12345678,98765432)", "%", "ld", "long", "2" },
+    { { "" }, "(12345678,98765432)", "%", "ld", "long", "2" },
 
     //(Minimum)Two-wide,two positions after decimal
 
-    { NULL, "(1.0h,2.0h,3.0h,4.0h)", "%2.2", "hf", "half", "4" }
+    { { "" }, "(1.0h,2.0h,3.0h,4.0h)", "%2.2", "hf", "half", "4" }
 };
 
 //------------------------------------------------------------
@@ -821,28 +1007,72 @@ testCase testCaseVector = {
 //-------------------------------------------------------------------------------------------------------------------------------------------------------------------
 
 
-
 std::vector<printDataGenParameters> printAddrSpaceGenParameters = {
 
-    //Global memory region
-
-    {"\"%d\\n\"",NULL,NULL,NULL,NULL,NULL,"__global int* x","","*x",""},
-
-    //Global,constant, memory region
-
-    {"\"%d\\n\"",NULL,NULL,NULL,NULL,NULL,"constant int* x","","*x",""},
-
-    //Local memory region
-
-    {"\"%+d\\n\"",NULL,NULL,NULL,NULL,NULL,"","local int x;\n x= (int)3;\n","x",""},
-
-    //Private memory region
-
-    {"\"%i\\n\"",NULL,NULL,NULL,NULL,NULL,"","private int x;\n x = (int)-1;\n","x",""},
-
-    //Address of void * from global memory region
-
-    {"\"%p\\n\"",NULL,NULL,NULL,NULL,NULL,"__global void* x,__global intptr_t*  xAddr","","x","*xAddr = (intptr_t)x;\n"}
+    // Global memory region
+
+    { { "\"%d\\n\"" },
+      NULL,
+      NULL,
+      NULL,
+      NULL,
+      NULL,
+      "__global int* x",
+      "",
+      "*x",
+      "" },
+
+    // Global,constant, memory region
+
+    { { "\"%d\\n\"" },
+      NULL,
+      NULL,
+      NULL,
+      NULL,
+      NULL,
+      "constant int* x",
+      "",
+      "*x",
+      "" },
+
+    // Local memory region
+
+    { { "\"%+d\\n\"" },
+      NULL,
+      NULL,
+      NULL,
+      NULL,
+      NULL,
+      "",
+      "local int x;\n x= (int)3;\n",
+      "x",
+      "" },
+
+    // Private memory region
+
+    { { "\"%i\\n\"" },
+      NULL,
+      NULL,
+      NULL,
+      NULL,
+      NULL,
+      "",
+      "private int x;\n x = (int)-1;\n",
+      "x",
+      "" },
+
+    // Address of void * from global memory region
+
+    { { "\"%p\\n\"" },
+      NULL,
+      NULL,
+      NULL,
+      NULL,
+      NULL,
+      "__global void* x,__global intptr_t*  xAddr",
+      "",
+      "x",
+      "*xAddr = (intptr_t)x;\n" }
 
 };
 
@@ -876,7 +1106,26 @@ testCase testCaseAddrSpace = {
 
 };
 
+//=========================================================
+// mixed format
+//=========================================================
+
+//----------------------------------------------------------
+// Container related to mixed format tests.
+// Empty records for which the format string and reference string are generated
+// at run time. The size of this vector specifies the number of random tests
+// that will be run.
+std::vector<printDataGenParameters> printMixedFormatGenParameters(64,
+                                                                  { { "" } });
+
+std::vector<std::string> correctBufferMixedFormat;
 
+//----------------------------------------------------------
+// Test case for mixed-args
+//----------------------------------------------------------
+testCase testCaseMixedFormat = { TYPE_MIXED_FORMAT_RANDOM,
+                                 correctBufferMixedFormat,
+                                 printMixedFormatGenParameters, NULL };
 
 //-------------------------------------------------------------------------------
 
@@ -885,10 +1134,11 @@ testCase testCaseAddrSpace = {
 //-------------------------------------------------------------------------------
 
 std::vector<testCase*> allTestCase = {
-    &testCaseInt,      &testCaseHalf,        &testCaseHalfLimits,
-    &testCaseFloat,    &testCaseFloatLimits, &testCaseOctal,
-    &testCaseUnsigned, &testCaseHexadecimal, &testCaseChar,
-    &testCaseString,   &testCaseVector,      &testCaseAddrSpace
+    &testCaseInt,       &testCaseHalf,         &testCaseHalfLimits,
+    &testCaseFloat,     &testCaseFloatLimits,  &testCaseOctal,
+    &testCaseUnsigned,  &testCaseHexadecimal,  &testCaseChar,
+    &testCaseString,    &testCaseFormatString, &testCaseVector,
+    &testCaseAddrSpace, &testCaseMixedFormat
 };
 
 //-----------------------------------------
@@ -913,14 +1163,14 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId
     if(pTestCase->_type == TYPE_ADDRESS_SPACE && strcmp(pTestCase->_genParameters[testId].addrSpacePAdd,""))
 
     {
-        char analysisBufferTmp[ANALYSIS_BUFFER_SIZE];
+        char analysisBufferTmp[ANALYSIS_BUFFER_SIZE + 1];
 
         if(strstr(analysisBuffer,"0x") == NULL)
         // Need to prepend 0x to ASCII number before calling strtol.
         strcpy(analysisBufferTmp,"0x");
 
         else analysisBufferTmp[0]='\0';
-        strcat(analysisBufferTmp,analysisBuffer);
+        strncat(analysisBufferTmp, analysisBuffer, ANALYSIS_BUFFER_SIZE);
         if (sizeof(long) == 8) {
             if(strtoul(analysisBufferTmp,NULL,0) == pAddr) return 0;
         }
@@ -931,14 +1181,29 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId
 
     }
 
-    char* exp;
-    //Exponenent representation
-    if((exp = strstr(analysisBuffer,"E+")) != NULL || (exp = strstr(analysisBuffer,"e+")) != NULL || (exp = strstr(analysisBuffer,"E-")) != NULL || (exp = strstr(analysisBuffer,"e-")) != NULL)
+    char* exp = nullptr;
+    std::string copy_str;
+    std::vector<char> staging(strlen(analysisBuffer) + 1);
+    std::vector<char> staging_correct(pTestCase->_correctBuffer[testId].size()
+                                      + 1);
+    std::snprintf(staging.data(), staging.size(), "%s", analysisBuffer);
+    std::snprintf(staging_correct.data(), staging_correct.size(), "%s",
+                  pTestCase->_correctBuffer[testId].c_str());
+    // Exponenent representation
+    while ((exp = strstr(staging.data(), "E+")) != NULL
+           || (exp = strstr(staging.data(), "e+")) != NULL
+           || (exp = strstr(staging.data(), "E-")) != NULL
+           || (exp = strstr(staging.data(), "e-")) != NULL)
     {
         char correctExp[3]={0};
         strncpy(correctExp,exp,2);
 
-        char* eCorrectBuffer = strstr((char*)pTestCase->_correctBuffer[testId].c_str(),correctExp);
+        // check if leading data is equal
+        int ret = strncmp(staging_correct.data(), staging.data(),
+                          exp - staging.data());
+        if (ret) return ret;
+
+        char* eCorrectBuffer = strstr(staging_correct.data(), correctExp);
         if(eCorrectBuffer == NULL)
             return 1;
 
@@ -953,19 +1218,41 @@ size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId
             ++exp;
         while(*eCorrectBuffer == '0')
             ++eCorrectBuffer;
-        return strcmp(eCorrectBuffer,exp);
-    }
-    if(!strcmp(pTestCase->_correctBuffer[testId].c_str(),"inf"))
-    return strcmp(analysisBuffer,"inf")&&strcmp(analysisBuffer,"infinity")&&strcmp(analysisBuffer,"1.#INF00")&&strcmp(analysisBuffer,"Inf");
-    if(!strcmp(pTestCase->_correctBuffer[testId].c_str(),"nan") || !strcmp(pTestCase->_correctBuffer[testId].c_str(),"-nan")) {
-       return strcmp(analysisBuffer,"nan")&&strcmp(analysisBuffer,"-nan")&&strcmp(analysisBuffer,"1.#IND00")&&strcmp(analysisBuffer,"-1.#IND00")&&strcmp(analysisBuffer,"NaN")&&strcmp(analysisBuffer,"nan(ind)")&&strcmp(analysisBuffer,"nan(snan)")&&strcmp(analysisBuffer,"-nan(ind)");
+
+        copy_str = std::string(eCorrectBuffer);
+        std::snprintf(staging_correct.data(), staging_correct.size(), "%s",
+                      copy_str.c_str());
+
+        copy_str = std::string(exp);
+        std::snprintf(staging.data(), staging.size(), "%s", copy_str.c_str());
+
+        if (strstr(staging.data(), "E+") != NULL
+            || strstr(staging.data(), "e+") != NULL
+            || strstr(staging.data(), "E-") != NULL
+            || strstr(staging.data(), "e-") != NULL)
+            continue;
+
+        return strcmp(staging_correct.data(), copy_str.c_str());
     }
-    return strcmp(analysisBuffer,pTestCase->_correctBuffer[testId].c_str());
+
+    if (pTestCase->_correctBuffer[testId] == "inf")
+        return strcmp(analysisBuffer, "inf")
+            && strcmp(analysisBuffer, "infinity");
+    else if (pTestCase->_correctBuffer[testId] == "INF")
+        return strcmp(analysisBuffer, "INF")
+            && strcmp(analysisBuffer, "INFINITY");
+    else if (pTestCase->_correctBuffer[testId] == "nan")
+        return strcmp(analysisBuffer, "nan") && strcmp(analysisBuffer, "-nan");
+    else if (pTestCase->_correctBuffer[testId] == "NAN")
+        return strcmp(analysisBuffer, "NAN") && strcmp(analysisBuffer, "-NAN");
+
+    return strcmp(analysisBuffer, pTestCase->_correctBuffer[testId].c_str());
 }
 
 static void intRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
 {
-    snprintf(refResult, refSize, params.genericFormat, atoi(params.dataRepresentation));
+    snprintf(refResult, refSize, params.genericFormats.front().c_str(),
+             atoi(params.dataRepresentation));
 }
 
 static void halfRefBuilder(printDataGenParameters& params, char* refResult,
@@ -973,30 +1260,32 @@ static void halfRefBuilder(printDataGenParameters& params, char* refResult,
 {
     cl_half val = cl_half_from_float(strtof(params.dataRepresentation, NULL),
                                      half_rounding_mode);
-    snprintf(refResult, refSize, params.genericFormat, cl_half_to_float(val));
+    snprintf(refResult, refSize, params.genericFormats.front().c_str(),
+             cl_half_to_float(val));
 }
 
 static void floatRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
 {
-    snprintf(refResult, refSize, params.genericFormat, strtof(params.dataRepresentation, NULL));
+    snprintf(refResult, refSize, params.genericFormats.front().c_str(),
+             strtof(params.dataRepresentation, NULL));
 }
 
 static void octalRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
 {
     const unsigned long int data = strtoul(params.dataRepresentation, NULL, 10);
-    snprintf(refResult, refSize, params.genericFormat, data);
+    snprintf(refResult, refSize, params.genericFormats.front().c_str(), data);
 }
 
 static void unsignedRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
 {
     const unsigned long int data = strtoul(params.dataRepresentation, NULL, 10);
-    snprintf(refResult, refSize, params.genericFormat, data);
+    snprintf(refResult, refSize, params.genericFormats.front().c_str(), data);
 }
 
 static void hexRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
 {
     const unsigned long int data = strtoul(params.dataRepresentation, NULL, 0);
-    snprintf(refResult, refSize, params.genericFormat, data);
+    snprintf(refResult, refSize, params.genericFormats.front().c_str(), data);
 }
 
 /*
diff --git a/test_conformance/relationals/test_shuffles.cpp b/test_conformance/relationals/test_shuffles.cpp
index de67f339cc..2fb8ab3be6 100644
--- a/test_conformance/relationals/test_shuffles.cpp
+++ b/test_conformance/relationals/test_shuffles.cpp
@@ -15,7 +15,7 @@
 //
 
 #include <iomanip>
-
+#include <vector>
 #include "testBase.h"
 #include "harness/conversions.h"
 #include "harness/typeWrappers.h"
@@ -618,31 +618,25 @@ int test_shuffle_dual_kernel(cl_context context, cl_command_queue queue,
     if( error != 0 )
         return error;
 
-    typeSize = get_explicit_type_size( vecType );
-
-#if !(defined(_WIN32) && defined (_MSC_VER))
-    cl_long inData[ inVecSize * numOrders ];
-    cl_long inSecondData[ inVecSize * numOrders ];
-    cl_long outData[ outRealVecSize * numOrders ];
-#else
-    cl_long* inData  = (cl_long*)_malloca(inVecSize * numOrders * sizeof(cl_long));
-    cl_long* inSecondData  = (cl_long*)_malloca(inVecSize * numOrders * sizeof(cl_long));
-    cl_long* outData = (cl_long*)_malloca(outRealVecSize * numOrders * sizeof(cl_long));
-#endif
-    memset(outData, 0, outRealVecSize * numOrders * sizeof(cl_long) );
+    typeSize = get_explicit_type_size(vecType);
+    std::vector<cl_long> inData(inVecSize * numOrders);
+    std::vector<cl_long> inSecondData(inVecSize * numOrders);
+    std::vector<cl_long> outData(outRealVecSize * numOrders);
 
-    generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inData );
+    generate_random_data(vecType, (unsigned int)(numOrders * inVecSize), d,
+                         inData.data());
     if( shuffleMode == kBuiltInDualInputFnMode )
-        generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inSecondData );
+        generate_random_data(vecType, (unsigned int)(numOrders * inVecSize), d,
+                             inSecondData.data());
 
     streams[0] =
         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       typeSize * inVecSize * numOrders, inData, &error);
+                       typeSize * inVecSize * numOrders, inData.data(), &error);
     test_error( error, "Unable to create input stream" );
 
-    streams[1] =
-        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                       typeSize * outRealVecSize * numOrders, outData, &error);
+    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                                typeSize * outRealVecSize * numOrders,
+                                outData.data(), &error);
     test_error( error, "Unable to create output stream" );
 
     int argIndex = 0;
@@ -650,7 +644,7 @@ int test_shuffle_dual_kernel(cl_context context, cl_command_queue queue,
     {
         streams[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
                                     typeSize * inVecSize * numOrders,
-                                    inSecondData, &error);
+                                    inSecondData.data(), &error);
         test_error( error, "Unable to create second input stream" );
 
         error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 2 ] ), &streams[ 2 ] );
@@ -675,12 +669,14 @@ int test_shuffle_dual_kernel(cl_context context, cl_command_queue queue,
 
 
     // Read the results back
-    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, typeSize * numOrders * outRealVecSize, outData, 0, NULL, NULL );
+    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
+                                typeSize * numOrders * outRealVecSize,
+                                outData.data(), 0, NULL, NULL);
     test_error( error, "Unable to read results" );
 
-    unsigned char *inDataPtr = (unsigned char *)inData;
-    unsigned char *inSecondDataPtr = (unsigned char *)inSecondData;
-    unsigned char *outDataPtr = (unsigned char *)outData;
+    unsigned char *inDataPtr = (unsigned char *)inData.data();
+    unsigned char *inSecondDataPtr = (unsigned char *)inSecondData.data();
+    unsigned char *outDataPtr = (unsigned char *)outData.data();
     int ret = 0;
     int errors_printed = 0;
     for( size_t i = 0; i < numOrders; i++ )
@@ -882,7 +878,8 @@ int test_shuffle_random(cl_device_id device, cl_context context, cl_command_queu
                 int numTests = NUM_TESTS*NUM_ITERATIONS_PER_TEST;
                 for( int i = 0; i < numTests /*&& error == 0*/; i++ )
                 {
-                    ShuffleOrder src, dst;
+                    ShuffleOrder src{ 0 };
+                    ShuffleOrder dst;
                     if( shuffleMode == kBuiltInFnMode )
                     {
                         build_random_shuffle_order( dst, vecSizes[ dstIdx ], vecSizes[ srcIdx ], true, d );
diff --git a/test_conformance/spirv_new/spirv_asm/expect_bool.spvasm32 b/test_conformance/spirv_new/spirv_asm/expect_bool.spvasm32
new file mode 100644
index 0000000000..600d64afe1
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/expect_bool.spvasm32
@@ -0,0 +1,111 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos LLVM/SPIR-V Translator; 14
+; Bound: 58
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Vector16
+               OpCapability ExpectAssumeKHR
+               OpExtension "SPV_KHR_expect_assume"
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical32 OpenCL
+               OpEntryPoint Kernel %expect_bool "expect_bool"
+               OpSource OpenCL_C 102000
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %dst Alignment 64
+       %void = OpTypeVoid
+       %bool = OpTypeBool
+      %bool2 = OpTypeVector %bool 2
+      %bool3 = OpTypeVector %bool 3
+      %bool4 = OpTypeVector %bool 4
+      %bool8 = OpTypeVector %bool 8
+     %bool16 = OpTypeVector %bool 16
+       %uint = OpTypeInt 32 0
+      %uint2 = OpTypeVector %uint 2
+      %uint3 = OpTypeVector %uint 3
+      %uint4 = OpTypeVector %uint 4
+      %uint8 = OpTypeVector %uint 8
+     %uint16 = OpTypeVector %uint 16
+     %uint_0 = OpConstantNull %uint
+    %uint2_0 = OpConstantNull %uint2
+    %uint3_0 = OpConstantNull %uint3
+    %uint4_0 = OpConstantNull %uint4
+    %uint8_0 = OpConstantNull %uint8
+   %uint16_0 = OpConstantNull %uint16
+ %bool_false = OpConstantNull %bool
+%bool2_false = OpConstantNull %bool2
+%bool3_false = OpConstantNull %bool3
+%bool4_false = OpConstantNull %bool4
+%bool8_false = OpConstantNull %bool8
+%bool16_false = OpConstantNull %bool16
+    %index_1 = OpConstant %uint 1
+    %index_2 = OpConstant %uint 2
+    %index_3 = OpConstant %uint 3
+    %index_4 = OpConstant %uint 4
+    %index_5 = OpConstant %uint 5
+%_ptr_CrossWorkgroup_uint16 = OpTypePointer CrossWorkgroup %uint16
+          %6 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint16 %uint
+%expect_bool = OpFunction %void None %6
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_uint16
+      %value = OpFunctionParameter %uint
+         %10 = OpLabel
+                  ; setup
+  %value_vec = OpCompositeInsert %uint2 %value %uint2_0 0
+                  ; scalar expect:
+                  ;     bool test = value == 0
+                  ;     bool t1e = __builtin_expect(test, false);
+                  ;     int v1e = t1e ? 0 : value
+                  ;     dst[0] = (int16)(v1e, 0, ...);
+       %test = OpIEqual %bool %value %uint_0
+        %t1e = OpExpectKHR %bool %test %bool_false
+        %v1e = OpSelect %uint %t1e %uint_0 %value
+      %v1v16 = OpCompositeInsert %uint16 %v1e %uint16_0 0
+               OpStore %dst %v1v16 Aligned 64
+                  ; vec2 expect:
+                  ;     int2 v2 = (int2)(value);
+                  ;     bool2 test2 = v2 == 0
+                  ;     bool2 t2e = __builtin_expect(test2, false2)
+                  ;     int2 v2e = t2e ? : v2;
+                  ;     dst[1] = (int16)(v2e, 0, ...);
+         %v2 = OpVectorShuffle %uint2 %value_vec %value_vec 0 0
+      %test2 = OpIEqual %bool2 %v2 %uint2_0
+        %t2e = OpExpectKHR %bool2 %test2 %bool2_false
+        %v2e = OpSelect %uint2 %t2e %uint2_0 %v2
+      %v2v16 = OpVectorShuffle %uint16 %v2e %uint2_0 0 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 
+      %dst_1 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_1
+               OpStore %dst_1 %v2v16 Aligned 64
+                  ; vec3 expect
+         %v3 = OpVectorShuffle %uint3 %value_vec %value_vec 0 0 0
+      %test3 = OpIEqual %bool3 %v3 %uint3_0
+        %t3e = OpExpectKHR %bool3 %test3 %bool3_false
+        %v3e = OpSelect %uint3 %t3e %uint3_0 %v3
+      %v3v16 = OpVectorShuffle %uint16 %v3e %uint2_0 0 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3
+      %dst_2 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_2
+               OpStore %dst_2 %v3v16 Aligned 64
+                  ; vec4 expect
+         %v4 = OpVectorShuffle %uint4 %value_vec %value_vec 0 0 0 0
+      %test4 = OpIEqual %bool4 %v4 %uint4_0
+        %t4e = OpExpectKHR %bool4 %test4 %bool4_false
+        %v4e = OpSelect %uint4 %t4e %uint4_0 %v4
+      %v4v16 = OpVectorShuffle %uint16 %v4e %uint2_0 0 1 2 3 4 4 4 4 4 4 4 4 4 4 4 4
+      %dst_3 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_3
+               OpStore %dst_3 %v4v16 Aligned 64
+                  ; vec8 expect
+         %v8 = OpVectorShuffle %uint8 %value_vec %value_vec 0 0 0 0 0 0 0 0
+      %test8 = OpIEqual %bool8 %v8 %uint8_0
+        %t8e = OpExpectKHR %bool8 %test8 %bool8_false
+        %v8e = OpSelect %uint8 %t8e %uint8_0 %v8
+      %v8v16 = OpVectorShuffle %uint16 %v8e %uint2_0 0 1 2 3 4 5 6 7 8 8 8 8 8 8 8 8
+      %dst_4 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_4
+               OpStore %dst_4 %v8v16 Aligned 64
+                  ; vec16 expect
+        %v16 = OpVectorShuffle %uint16 %value_vec %value_vec 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+     %test16 = OpIEqual %bool16 %v16 %uint16_0
+       %t16e = OpExpectKHR %bool16 %test16 %bool16_false
+       %v16e = OpSelect %uint16 %t16e %uint16_0 %v16
+      %dst_5 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_5
+               OpStore %dst_5 %v16e Aligned 64
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/expect_bool.spvasm64 b/test_conformance/spirv_new/spirv_asm/expect_bool.spvasm64
new file mode 100644
index 0000000000..f512a3a98e
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/expect_bool.spvasm64
@@ -0,0 +1,113 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos LLVM/SPIR-V Translator; 14
+; Bound: 58
+; Schema: 0
+               OpCapability Addresses
+               OpCapability Linkage
+               OpCapability Kernel
+               OpCapability Vector16
+               OpCapability Int64
+               OpCapability ExpectAssumeKHR
+               OpExtension "SPV_KHR_expect_assume"
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %expect_bool "expect_bool"
+               OpSource OpenCL_C 102000
+               OpDecorate %dst FuncParamAttr NoCapture
+               OpDecorate %dst Alignment 64
+       %void = OpTypeVoid
+       %bool = OpTypeBool
+      %bool2 = OpTypeVector %bool 2
+      %bool3 = OpTypeVector %bool 3
+      %bool4 = OpTypeVector %bool 4
+      %bool8 = OpTypeVector %bool 8
+     %bool16 = OpTypeVector %bool 16
+       %uint = OpTypeInt 32 0
+      %uint2 = OpTypeVector %uint 2
+      %uint3 = OpTypeVector %uint 3
+      %uint4 = OpTypeVector %uint 4
+      %uint8 = OpTypeVector %uint 8
+     %uint16 = OpTypeVector %uint 16
+      %ulong = OpTypeInt 64 0
+     %uint_0 = OpConstantNull %uint
+    %uint2_0 = OpConstantNull %uint2
+    %uint3_0 = OpConstantNull %uint3
+    %uint4_0 = OpConstantNull %uint4
+    %uint8_0 = OpConstantNull %uint8
+   %uint16_0 = OpConstantNull %uint16
+ %bool_false = OpConstantNull %bool
+%bool2_false = OpConstantNull %bool2
+%bool3_false = OpConstantNull %bool3
+%bool4_false = OpConstantNull %bool4
+%bool8_false = OpConstantNull %bool8
+%bool16_false = OpConstantNull %bool16
+    %index_1 = OpConstant %ulong 1
+    %index_2 = OpConstant %ulong 2
+    %index_3 = OpConstant %ulong 3
+    %index_4 = OpConstant %ulong 4
+    %index_5 = OpConstant %ulong 5
+%_ptr_CrossWorkgroup_uint16 = OpTypePointer CrossWorkgroup %uint16
+          %6 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint16 %uint
+%expect_bool = OpFunction %void None %6
+        %dst = OpFunctionParameter %_ptr_CrossWorkgroup_uint16
+      %value = OpFunctionParameter %uint
+         %10 = OpLabel
+                  ; setup
+  %value_vec = OpCompositeInsert %uint2 %value %uint2_0 0
+                  ; scalar expect:
+                  ;     bool test = value == 0
+                  ;     bool t1e = __builtin_expect(test, false);
+                  ;     int v1e = t1e ? 0 : value
+                  ;     dst[0] = (int16)(v1e, 0, ...);
+       %test = OpIEqual %bool %value %uint_0
+        %t1e = OpExpectKHR %bool %test %bool_false
+        %v1e = OpSelect %uint %t1e %uint_0 %value
+      %v1v16 = OpCompositeInsert %uint16 %v1e %uint16_0 0
+               OpStore %dst %v1v16 Aligned 64
+                  ; vec2 expect:
+                  ;     int2 v2 = (int2)(value);
+                  ;     bool2 test2 = v2 == 0
+                  ;     bool2 t2e = __builtin_expect(test2, false2)
+                  ;     int2 v2e = t2e ? : v2;
+                  ;     dst[1] = (int16)(v2e, 0, ...);
+         %v2 = OpVectorShuffle %uint2 %value_vec %value_vec 0 0
+      %test2 = OpIEqual %bool2 %v2 %uint2_0
+        %t2e = OpExpectKHR %bool2 %test2 %bool2_false
+        %v2e = OpSelect %uint2 %t2e %uint2_0 %v2
+      %v2v16 = OpVectorShuffle %uint16 %v2e %uint2_0 0 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 
+      %dst_1 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_1
+               OpStore %dst_1 %v2v16 Aligned 64
+                  ; vec3 expect
+         %v3 = OpVectorShuffle %uint3 %value_vec %value_vec 0 0 0
+      %test3 = OpIEqual %bool3 %v3 %uint3_0
+        %t3e = OpExpectKHR %bool3 %test3 %bool3_false
+        %v3e = OpSelect %uint3 %t3e %uint3_0 %v3
+      %v3v16 = OpVectorShuffle %uint16 %v3e %uint2_0 0 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3
+      %dst_2 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_2
+               OpStore %dst_2 %v3v16 Aligned 64
+                  ; vec4 expect
+         %v4 = OpVectorShuffle %uint4 %value_vec %value_vec 0 0 0 0
+      %test4 = OpIEqual %bool4 %v4 %uint4_0
+        %t4e = OpExpectKHR %bool4 %test4 %bool4_false
+        %v4e = OpSelect %uint4 %t4e %uint4_0 %v4
+      %v4v16 = OpVectorShuffle %uint16 %v4e %uint2_0 0 1 2 3 4 4 4 4 4 4 4 4 4 4 4 4
+      %dst_3 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_3
+               OpStore %dst_3 %v4v16 Aligned 64
+                  ; vec8 expect
+         %v8 = OpVectorShuffle %uint8 %value_vec %value_vec 0 0 0 0 0 0 0 0
+      %test8 = OpIEqual %bool8 %v8 %uint8_0
+        %t8e = OpExpectKHR %bool8 %test8 %bool8_false
+        %v8e = OpSelect %uint8 %t8e %uint8_0 %v8
+      %v8v16 = OpVectorShuffle %uint16 %v8e %uint2_0 0 1 2 3 4 5 6 7 8 8 8 8 8 8 8 8
+      %dst_4 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_4
+               OpStore %dst_4 %v8v16 Aligned 64
+                  ; vec16 expect
+        %v16 = OpVectorShuffle %uint16 %value_vec %value_vec 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+     %test16 = OpIEqual %bool16 %v16 %uint16_0
+       %t16e = OpExpectKHR %bool16 %test16 %bool16_false
+       %v16e = OpSelect %uint16 %t16e %uint16_0 %v16
+      %dst_5 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint16 %dst %index_5
+               OpStore %dst_5 %v16e Aligned 64
+               OpReturn
+               OpFunctionEnd
diff --git a/test_conformance/spirv_new/test_cl_khr_expect_assume.cpp b/test_conformance/spirv_new/test_cl_khr_expect_assume.cpp
index 05c5068a03..62a3c2baca 100644
--- a/test_conformance/spirv_new/test_cl_khr_expect_assume.cpp
+++ b/test_conformance/spirv_new/test_cl_khr_expect_assume.cpp
@@ -23,36 +23,48 @@ template <typename T> struct TestInfo
 };
 template <> struct TestInfo<cl_char>
 {
+    using argType = cl_char;
     static constexpr const char* typeName = "char";
     static constexpr const char* testName = "expect_char";
 };
 template <> struct TestInfo<cl_short>
 {
+    using argType = cl_short;
     static constexpr const char* typeName = "short";
     static constexpr const char* testName = "expect_short";
 };
 template <> struct TestInfo<cl_int>
 {
+    using argType = cl_int;
     static constexpr const char* typeName = "int";
     static constexpr const char* testName = "expect_int";
 };
 template <> struct TestInfo<cl_long>
 {
+    using argType = cl_long;
     static constexpr const char* typeName = "long";
     static constexpr const char* testName = "expect_long";
 };
+template <> struct TestInfo<cl_bool>
+{
+    using argType = cl_int;
+    static constexpr const char* typeName = "bool";
+    static constexpr const char* testName = "expect_bool";
+};
 
 template <typename T>
 static int test_expect_type(cl_device_id device, cl_context context,
                             cl_command_queue queue)
 {
+    using ArgType = typename TestInfo<T>::argType;
+
     log_info("    testing type %s\n", TestInfo<T>::typeName);
 
-    const T value = 42;
+    const ArgType value = 42;
     cl_int error = CL_SUCCESS;
 
     std::vector<size_t> vecSizes({ 1, 2, 3, 4, 8, 16 });
-    std::vector<T> testData;
+    std::vector<ArgType> testData;
     testData.reserve(16 * vecSizes.size());
 
     for (auto v : vecSizes)
@@ -69,8 +81,8 @@ static int test_expect_type(cl_device_id device, cl_context context,
     }
 
     clMemWrapper dst =
-        clCreateBuffer(context, CL_MEM_WRITE_ONLY, testData.size() * sizeof(T),
-                       nullptr, &error);
+        clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                       testData.size() * sizeof(ArgType), nullptr, &error);
     test_error(error, "Unable to create destination buffer");
 
     clProgramWrapper prog;
@@ -90,10 +102,10 @@ static int test_expect_type(cl_device_id device, cl_context context,
                                    NULL, NULL);
     test_error(error, "Unable to enqueue kernel");
 
-    std::vector<T> resData(testData.size());
-    error =
-        clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, resData.size() * sizeof(T),
-                            resData.data(), 0, NULL, NULL);
+    std::vector<ArgType> resData(testData.size());
+    error = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0,
+                                resData.size() * sizeof(ArgType),
+                                resData.data(), 0, NULL, NULL);
     test_error(error, "Unable to read destination buffer");
 
     if (resData != testData)
@@ -122,6 +134,7 @@ TEST_SPIRV_FUNC(op_expect)
     {
         result |= test_expect_type<cl_long>(deviceID, context, queue);
     }
+    result |= test_expect_type<cl_bool>(deviceID, context, queue);
 
     return result;
 }
diff --git a/test_conformance/spirv_new/test_decorate.cpp b/test_conformance/spirv_new/test_decorate.cpp
index 4c2f82b540..b85419300d 100644
--- a/test_conformance/spirv_new/test_decorate.cpp
+++ b/test_conformance/spirv_new/test_decorate.cpp
@@ -216,7 +216,8 @@ static inline Ti generate_saturated_rhs_input(RandomSeed &seed)
 }
 
 template <typename Ti, typename Tl, typename To>
-static inline To compute_saturated_output(Ti lhs, Ti rhs)
+static inline To compute_saturated_output(Ti lhs, Ti rhs,
+                                          cl_half_rounding_mode half_rounding)
 {
     constexpr auto loVal = std::numeric_limits<To>::min();
     constexpr auto hiVal = std::numeric_limits<To>::max();
@@ -226,10 +227,10 @@ static inline To compute_saturated_output(Ti lhs, Ti rhs)
         cl_float f = cl_half_to_float(lhs) * cl_half_to_float(rhs);
 
         // Quantize to fp16:
-        f = cl_half_to_float(cl_half_from_float(f, CL_HALF_RTE));
+        f = cl_half_to_float(cl_half_from_float(f, half_rounding));
 
         To val = (To)std::min<float>(std::max<float>(f, loVal), hiVal);
-        if (isnan(cl_half_from_float(rhs, CL_HALF_RTE)))
+        if (isnan(cl_half_to_float(rhs)))
         {
             val = 0;
         }
@@ -246,6 +247,26 @@ static inline To compute_saturated_output(Ti lhs, Ti rhs)
     return val;
 }
 
+static cl_half_rounding_mode get_half_rounding_mode(cl_device_id deviceID)
+{
+    const cl_device_fp_config fpConfigHalf =
+        get_default_rounding_mode(deviceID, CL_DEVICE_HALF_FP_CONFIG);
+
+    if (fpConfigHalf == CL_FP_ROUND_TO_NEAREST)
+    {
+        return CL_HALF_RTE;
+    }
+    else if (fpConfigHalf == CL_FP_ROUND_TO_ZERO)
+    {
+        return CL_HALF_RTZ;
+    }
+    else
+    {
+        log_error("Error while acquiring half rounding mode");
+    }
+    return CL_HALF_RTE;
+}
+
 template <typename Ti, typename Tl, typename To>
 int verify_saturated_results(cl_device_id deviceID, cl_context context,
                              cl_command_queue queue, const char *kname,
@@ -303,13 +324,20 @@ int verify_saturated_results(cl_device_id deviceID, cl_context context,
     err = clEnqueueReadBuffer(queue, res, CL_TRUE, 0, out_bytes, &h_res[0], 0, NULL, NULL);
     SPIRV_CHECK_ERROR(err, "Failed to read to output");
 
+    cl_half_rounding_mode half_rounding = CL_HALF_RTE;
+    if (std::is_same<Ti, cl_half>::value)
+    {
+        half_rounding = get_half_rounding_mode(deviceID);
+    }
+
     for (int i = 0; i < num; i++)
     {
-        To val = compute_saturated_output<Ti, Tl, To>(h_lhs[i], h_rhs[i]);
+        To val = compute_saturated_output<Ti, Tl, To>(h_lhs[i], h_rhs[i],
+                                                      half_rounding);
 
         if (val != h_res[i])
         {
-            log_error("Value error at %d: got %d, want %d\n", i, val, h_res[i]);
+            log_error("Value error at %d: got %d, want %d\n", i, h_res[i], val);
             return -1;
         }
     }
diff --git a/test_conformance/vulkan/CMakeLists.txt b/test_conformance/vulkan/CMakeLists.txt
index 9c9cc7d472..c970a77e38 100644
--- a/test_conformance/vulkan/CMakeLists.txt
+++ b/test_conformance/vulkan/CMakeLists.txt
@@ -1,10 +1,6 @@
 set (MODULE_NAME VULKAN)
 
-if(WIN32)
-    list(APPEND CLConform_LIBRARIES vulkan-1 vulkan_wrapper)
-else(WIN32)
-    list(APPEND CLConform_LIBRARIES vulkan dl vulkan_wrapper)
-endif(WIN32)
+list(APPEND CLConform_LIBRARIES vulkan_wrapper)
 set(CMAKE_CXX_FLAGS "-fpermissive")
 if(WIN32)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVK_USE_PLATFORM_WIN32_KHR")
@@ -25,6 +21,8 @@ set (${MODULE_NAME}_SOURCES
         test_vulkan_interop_buffer.cpp
         test_vulkan_interop_image.cpp
         test_vulkan_api_consistency.cpp
+        test_vulkan_api_consistency_for_3dimages.cpp
+        test_vulkan_api_consistency_for_1dimages.cpp
         test_vulkan_platform_device_info.cpp
         vulkan_interop_common.cpp
         ../../test_common/harness/genericThread.cpp
diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp
index aec3f7c52a..4f560f96b2 100644
--- a/test_conformance/vulkan/main.cpp
+++ b/test_conformance/vulkan/main.cpp
@@ -157,6 +157,8 @@ test_definition test_list[] = { ADD_TEST(buffer_single_queue),
                                 ADD_TEST(image_multiple_queue),
                                 ADD_TEST(consistency_external_buffer),
                                 ADD_TEST(consistency_external_image),
+                                ADD_TEST(consistency_external_for_3dimage),
+                                ADD_TEST(consistency_external_for_1dimage),
                                 ADD_TEST(consistency_external_semaphore),
                                 ADD_TEST(platform_info),
                                 ADD_TEST(device_info) };
diff --git a/test_conformance/vulkan/procs.h b/test_conformance/vulkan/procs.h
index 37bf7869c0..d5465d7a28 100644
--- a/test_conformance/vulkan/procs.h
+++ b/test_conformance/vulkan/procs.h
@@ -28,6 +28,14 @@ extern int test_consistency_external_image(cl_device_id device,
                                            cl_context context,
                                            cl_command_queue queue,
                                            int num_elements);
+extern int test_consistency_external_for_3dimage(cl_device_id device,
+                                                 cl_context context,
+                                                 cl_command_queue queue,
+                                                 int num_elements);
+extern int test_consistency_external_for_1dimage(cl_device_id device,
+                                                 cl_context context,
+                                                 cl_command_queue queue,
+                                                 int num_elements);
 extern int test_consistency_external_semaphore(cl_device_id device,
                                                cl_context context,
                                                cl_command_queue queue,
diff --git a/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp b/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp
new file mode 100644
index 0000000000..2df210081c
--- /dev/null
+++ b/test_conformance/vulkan/test_vulkan_api_consistency_for_1dimages.cpp
@@ -0,0 +1,206 @@
+#include <vulkan_interop_common.hpp>
+#include <opencl_vulkan_wrapper.hpp>
+#include <vulkan_wrapper.hpp>
+#if !defined(__APPLE__)
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+#else
+#include <OpenCL/cl.h>
+#include <OpenCL/cl_ext.h>
+#endif
+
+#include <assert.h>
+#include <vector>
+#include <iostream>
+#include <string.h>
+#include "harness/testHarness.h"
+#include "harness/typeWrappers.h"
+#include "harness/deviceInfo.h"
+
+int test_consistency_external_for_1dimage(cl_device_id deviceID,
+                                          cl_context _context,
+                                          cl_command_queue _queue,
+                                          int num_elements)
+{
+    cl_int errNum;
+    VulkanDevice vkDevice;
+
+    // Context and command queue creation
+    cl_platform_id platform = NULL;
+    cl_context context = NULL;
+    cl_command_queue cmd_queue = NULL;
+
+    cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
+    errNum = clGetPlatformIDs(1, &platform, NULL);
+    test_error(errNum, "Failed to get platform id");
+
+    contextProperties[1] = (cl_context_properties)platform;
+
+    context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
+                                      NULL, NULL, &errNum);
+    test_error(errNum, "Unable to create context with properties");
+
+    cmd_queue = clCreateCommandQueue(context, deviceID, 0, &errNum);
+    test_error(errNum, "Unable to create command queue");
+
+    cl_device_id devList[] = { deviceID, NULL };
+
+#ifdef _WIN32
+    if (!is_extension_available(devList[0], "cl_khr_external_memory_win32"))
+    {
+        throw std::runtime_error("Device does not support"
+                                 "cl_khr_external_memory_win32 extension \n");
+    }
+#else
+    if (!is_extension_available(devList[0], "cl_khr_external_memory_opaque_fd"))
+    {
+        throw std::runtime_error(
+            "Device does not support cl_khr_external_memory_opaque_fd "
+            "extension \n");
+    }
+#endif
+    uint32_t width = 256;
+    cl_image_desc image_desc;
+    memset(&image_desc, 0x0, sizeof(cl_image_desc));
+    cl_image_format img_format = { 0 };
+
+    VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
+        getSupportedVulkanExternalMemoryHandleTypeList()[0];
+
+    VulkanImageTiling vulkanImageTiling =
+        vkClExternalMemoryHandleTilingAssumption(
+            deviceID, vkExternalMemoryHandleType, &errNum);
+    ASSERT_SUCCESS(errNum, "Failed to query OpenCL tiling mode");
+
+    VulkanImage1D vkImage1D =
+        VulkanImage1D(vkDevice, VULKAN_FORMAT_R8G8B8A8_UNORM, width,
+                      vulkanImageTiling, 1, vkExternalMemoryHandleType);
+
+    const VulkanMemoryTypeList& memoryTypeList = vkImage1D.getMemoryTypeList();
+    uint64_t totalImageMemSize = vkImage1D.getSize();
+
+    log_info("Memory type index: %u\n", (uint32_t)memoryTypeList[0]);
+    log_info("Memory type property: %d\n",
+             memoryTypeList[0].getMemoryTypeProperty());
+    log_info("Image size : %lu\n", totalImageMemSize);
+
+    VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(
+        vkDevice, vkImage1D, memoryTypeList[0], vkExternalMemoryHandleType);
+    vkDeviceMem->bindImage(vkImage1D, 0);
+
+    void* handle = NULL;
+    int fd;
+    std::vector<cl_mem_properties> extMemProperties{
+        (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR,
+        (cl_mem_properties)devList[0],
+        (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_END_KHR,
+    };
+    switch (vkExternalMemoryHandleType)
+    {
+#ifdef _WIN32
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT:
+            handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+            errNum = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+            extMemProperties.push_back(
+                (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+            extMemProperties.push_back((cl_mem_properties)handle);
+            break;
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+            handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+            errNum = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            extMemProperties.push_back(
+                (cl_mem_properties)
+                    CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            extMemProperties.push_back((cl_mem_properties)handle);
+            break;
+#else
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD:
+            fd = (int)vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+            errNum = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+            extMemProperties.push_back(
+                (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+            extMemProperties.push_back((cl_mem_properties)fd);
+            break;
+#endif
+        default:
+            errNum = TEST_FAIL;
+            log_error("Unsupported external memory handle type \n");
+            break;
+    }
+    if (errNum != CL_SUCCESS)
+    {
+        log_error("Checks failed for "
+                  "CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR\n");
+        return TEST_FAIL;
+    }
+    extMemProperties.push_back(0);
+
+    const VkImageCreateInfo VulkanImageCreateInfo =
+        vkImage1D.getVkImageCreateInfo();
+
+    errNum = getCLImageInfoFromVkImageInfo(
+        &VulkanImageCreateInfo, totalImageMemSize, &img_format, &image_desc);
+    if (errNum != CL_SUCCESS)
+    {
+        log_error("getCLImageInfoFromVkImageInfo failed!!!");
+        return TEST_FAIL;
+    }
+
+    clMemWrapper image;
+
+    // Pass valid properties, image_desc and image_format
+    image = clCreateImageWithProperties(
+        context, extMemProperties.data(), CL_MEM_READ_WRITE, &img_format,
+        &image_desc, NULL /* host_ptr */, &errNum);
+    test_error(errNum, "Unable to create Image with Properties");
+    image.reset();
+
+    // Passing properties, image_desc and image_format all as NULL
+    image = clCreateImageWithProperties(context, NULL, CL_MEM_READ_WRITE, NULL,
+                                        NULL, NULL, &errNum);
+    test_failure_error(
+        errNum, CL_INVALID_IMAGE_DESCRIPTOR,
+        "Image creation must fail with CL_INVALID_IMAGE_DESCRIPTOR "
+        "when all are passed as NULL");
+
+    image.reset();
+
+    // Passing NULL properties and a valid image_format and image_desc
+    image =
+        clCreateImageWithProperties(context, NULL, CL_MEM_READ_WRITE,
+                                    &img_format, &image_desc, NULL, &errNum);
+    test_error(errNum,
+               "Unable to create image with NULL properties "
+               "with valid image format and image desc");
+
+    image.reset();
+
+    // Passing image_format as NULL
+    image = clCreateImageWithProperties(context, extMemProperties.data(),
+                                        CL_MEM_READ_WRITE, NULL, &image_desc,
+                                        NULL, &errNum);
+    test_failure_error(errNum, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+                       "Image creation must fail with "
+                       "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"
+                       "when image desc passed as NULL");
+
+    image.reset();
+
+    // Passing image_desc as NULL
+    image = clCreateImageWithProperties(context, extMemProperties.data(),
+                                        CL_MEM_READ_WRITE, &img_format, NULL,
+                                        NULL, &errNum);
+    test_failure_error(errNum, CL_INVALID_IMAGE_DESCRIPTOR,
+                       "Image creation must fail with "
+                       "CL_INVALID_IMAGE_DESCRIPTOR "
+                       "when image desc passed as NULL");
+    image.reset();
+
+    if (cmd_queue) clReleaseCommandQueue(cmd_queue);
+    if (context) clReleaseContext(context);
+
+    return TEST_PASS;
+}
diff --git a/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp b/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp
new file mode 100644
index 0000000000..f8a7861cec
--- /dev/null
+++ b/test_conformance/vulkan/test_vulkan_api_consistency_for_3dimages.cpp
@@ -0,0 +1,210 @@
+#include <vulkan_interop_common.hpp>
+#include <opencl_vulkan_wrapper.hpp>
+#include <vulkan_wrapper.hpp>
+#if !defined(__APPLE__)
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+#else
+#include <OpenCL/cl.h>
+#include <OpenCL/cl_ext.h>
+#endif
+
+#include <assert.h>
+#include <vector>
+#include <iostream>
+#include <string.h>
+#include "harness/testHarness.h"
+#include "harness/typeWrappers.h"
+#include "harness/deviceInfo.h"
+#include <string>
+
+
+int test_consistency_external_for_3dimage(cl_device_id deviceID,
+                                          cl_context _context,
+                                          cl_command_queue _queue,
+                                          int num_elements)
+{
+    cl_int errNum;
+    VulkanDevice vkDevice;
+
+    // Context and command queue creation
+    cl_platform_id platform = NULL;
+    cl_context context = NULL;
+    cl_command_queue cmd_queue = NULL;
+
+    cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
+    errNum = clGetPlatformIDs(1, &platform, NULL);
+    test_error(errNum, "Failed to get platform id");
+
+    contextProperties[1] = (cl_context_properties)platform;
+
+    context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
+                                      NULL, NULL, &errNum);
+    test_error(errNum, "Unable to create context with properties");
+
+    cmd_queue = clCreateCommandQueue(context, deviceID, 0, &errNum);
+    test_error(errNum, "Unable to create command queue");
+
+    cl_device_id devList[] = { deviceID, NULL };
+
+#ifdef _WIN32
+    if (!is_extension_available(devList[0], "cl_khr_external_memory_win32"))
+    {
+        throw std::runtime_error("Device does not support"
+                                 "cl_khr_external_memory_win32 extension \n");
+    }
+#else
+    if (!is_extension_available(devList[0], "cl_khr_external_memory_opaque_fd"))
+    {
+        throw std::runtime_error(
+            "Device does not support cl_khr_external_memory_opaque_fd "
+            "extension \n");
+    }
+#endif
+    uint32_t width = 256;
+    uint32_t height = 16;
+    uint32_t depth = 10;
+    cl_image_desc image_desc;
+    memset(&image_desc, 0x0, sizeof(cl_image_desc));
+    cl_image_format img_format = { 0 };
+
+    VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
+        getSupportedVulkanExternalMemoryHandleTypeList()[0];
+
+    VulkanImageTiling vulkanImageTiling =
+        vkClExternalMemoryHandleTilingAssumption(
+            deviceID, vkExternalMemoryHandleType, &errNum);
+    ASSERT_SUCCESS(errNum, "Failed to query OpenCL tiling mode");
+
+    VulkanImage3D vkImage3D =
+        VulkanImage3D(vkDevice, VULKAN_FORMAT_R8G8B8A8_UNORM, width, height,
+                      depth, vulkanImageTiling, 1, vkExternalMemoryHandleType);
+
+    const VulkanMemoryTypeList& memoryTypeList = vkImage3D.getMemoryTypeList();
+    uint64_t totalImageMemSize = vkImage3D.getSize();
+
+    log_info("Memory type index: %u\n", (uint32_t)memoryTypeList[0]);
+    log_info("Memory type property: %d\n",
+             memoryTypeList[0].getMemoryTypeProperty());
+    log_info("Image size : %lu\n", totalImageMemSize);
+
+    VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(
+        vkDevice, vkImage3D, memoryTypeList[0], vkExternalMemoryHandleType);
+    vkDeviceMem->bindImage(vkImage3D, 0);
+
+    void* handle = NULL;
+    int fd;
+    std::vector<cl_mem_properties> extMemProperties{
+        (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_KHR,
+        (cl_mem_properties)devList[0],
+        (cl_mem_properties)CL_MEM_DEVICE_HANDLE_LIST_END_KHR,
+    };
+    switch (vkExternalMemoryHandleType)
+    {
+#ifdef _WIN32
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT:
+            handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+            errNum = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+            extMemProperties.push_back(
+                (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+            extMemProperties.push_back((cl_mem_properties)handle);
+            break;
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+            handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+            errNum = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            extMemProperties.push_back(
+                (cl_mem_properties)
+                    CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            extMemProperties.push_back((cl_mem_properties)handle);
+            break;
+#else
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD:
+            fd = (int)vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+            errNum = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+            extMemProperties.push_back(
+                (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+            extMemProperties.push_back((cl_mem_properties)fd);
+            break;
+#endif
+        default:
+            errNum = TEST_FAIL;
+            log_error("Unsupported external memory handle type \n");
+            break;
+    }
+    if (errNum != CL_SUCCESS)
+    {
+        log_error("Checks failed for "
+                  "CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR\n");
+        return TEST_FAIL;
+    }
+    extMemProperties.push_back(0);
+
+    const VkImageCreateInfo VulkanImageCreateInfo =
+        vkImage3D.getVkImageCreateInfo();
+
+    errNum = getCLImageInfoFromVkImageInfo(
+        &VulkanImageCreateInfo, totalImageMemSize, &img_format, &image_desc);
+    if (errNum != CL_SUCCESS)
+    {
+        log_error("getCLImageInfoFromVkImageInfo failed!!!");
+        return TEST_FAIL;
+    }
+
+    clMemWrapper image;
+
+    // Pass valid properties, image_desc and image_format
+    image = clCreateImageWithProperties(
+        context, extMemProperties.data(), CL_MEM_READ_WRITE, &img_format,
+        &image_desc, NULL /* host_ptr */, &errNum);
+    test_error(errNum, "Unable to create Image with Properties");
+    image.reset();
+
+    // Passing properties, image_desc and image_format all as NULL
+    image = clCreateImageWithProperties(context, NULL, CL_MEM_READ_WRITE, NULL,
+                                        NULL, NULL, &errNum);
+    test_failure_error(
+        errNum, CL_INVALID_IMAGE_DESCRIPTOR,
+        "Image creation must fail with CL_INVALID_IMAGE_DESCRIPTOR "
+        "when all are passed as NULL");
+
+    image.reset();
+
+    // Passing NULL properties and a valid image_format and image_desc
+    image =
+        clCreateImageWithProperties(context, NULL, CL_MEM_READ_WRITE,
+                                    &img_format, &image_desc, NULL, &errNum);
+    test_error(errNum,
+               "Unable to create image with NULL properties "
+               "with valid image format and image desc");
+
+    image.reset();
+
+    // Passing image_format as NULL
+    image = clCreateImageWithProperties(context, extMemProperties.data(),
+                                        CL_MEM_READ_WRITE, NULL, &image_desc,
+                                        NULL, &errNum);
+    test_failure_error(errNum, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+                       "Image creation must fail with "
+                       "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"
+                       "when image desc passed as NULL");
+
+    image.reset();
+
+    // Passing image_desc as NULL
+    image = clCreateImageWithProperties(context, extMemProperties.data(),
+                                        CL_MEM_READ_WRITE, &img_format, NULL,
+                                        NULL, &errNum);
+    test_failure_error(errNum, CL_INVALID_IMAGE_DESCRIPTOR,
+                       "Image creation must fail with "
+                       "CL_INVALID_IMAGE_DESCRIPTOR "
+                       "when image desc passed as NULL");
+    image.reset();
+
+    if (cmd_queue) clReleaseCommandQueue(cmd_queue);
+    if (context) clReleaseContext(context);
+
+    return TEST_PASS;
+}
diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
index 2787c17189..de350f7022 100644
--- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
+++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
@@ -140,10 +140,12 @@ int run_test_with_two_queue(
     }
     else
     {
-        clVk2CLExternalSemaphore = new clExternalImportableSemaphore(
-            vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
-        clCl2VkExternalSemaphore = new clExternalExportableSemaphore(
-            vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+        CREATE_OPENCL_SEMAPHORE(clVk2CLExternalSemaphore, vkVk2CLSemaphore,
+                                context, vkExternalSemaphoreHandleType,
+                                deviceId, false);
+        CREATE_OPENCL_SEMAPHORE(clCl2VkExternalSemaphore, vkCl2VkSemaphore,
+                                context, vkExternalSemaphoreHandleType,
+                                deviceId, true);
     }
 
     const uint32_t maxIter = innerIterations;
@@ -281,9 +283,16 @@ int run_test_with_two_queue(
 
                 cl_event first_launch;
 
+                cl_event acquire_event = nullptr;
+                err = clEnqueueAcquireExternalMemObjectsKHRptr(
+                    cmd_queue1, vkBufferList.size(), buffers, 0, nullptr,
+                    &acquire_event);
+                test_error_and_cleanup(err, CLEANUP,
+                                       "Failed to acquire buffers");
+
                 err = clEnqueueNDRangeKernel(cmd_queue1, update_buffer_kernel,
-                                             1, NULL, global_work_size, NULL, 0,
-                                             NULL, &first_launch);
+                                             1, NULL, global_work_size, NULL, 1,
+                                             &acquire_event, &first_launch);
                 test_error_and_cleanup(
                     err, CLEANUP,
                     "Error: Failed to launch update_buffer_kernel,"
@@ -297,6 +306,12 @@ int run_test_with_two_queue(
                     "Error: Failed to launch update_buffer_kernel,"
                     "error\n");
 
+                err = clEnqueueReleaseExternalMemObjectsKHRptr(
+                    cmd_queue2, vkBufferList.size(), buffers, 0, nullptr,
+                    nullptr);
+                test_error_and_cleanup(err, CLEANUP,
+                                       "Failed to release buffers");
+
                 if (use_fence)
                 {
                     clFlush(cmd_queue1);
@@ -310,6 +325,9 @@ int run_test_with_two_queue(
                     test_error_and_cleanup(err, CLEANUP,
                                            "Failed to signal CL semaphore\n");
                 }
+                err = clReleaseEvent(acquire_event);
+                test_error_and_cleanup(err, CLEANUP,
+                                       "Failed to release acquire event\n");
             }
             error_2 = (uint8_t *)malloc(sizeof(uint8_t));
             if (NULL == error_2)
@@ -453,10 +471,12 @@ int run_test_with_one_queue(
     }
     else
     {
-        clVk2CLExternalSemaphore = new clExternalImportableSemaphore(
-            vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
-        clCl2VkExternalSemaphore = new clExternalExportableSemaphore(
-            vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+        CREATE_OPENCL_SEMAPHORE(clVk2CLExternalSemaphore, vkVk2CLSemaphore,
+                                context, vkExternalSemaphoreHandleType,
+                                deviceId, false);
+        CREATE_OPENCL_SEMAPHORE(clCl2VkExternalSemaphore, vkCl2VkSemaphore,
+                                context, vkExternalSemaphoreHandleType,
+                                deviceId, true);
     }
 
     const uint32_t maxIter = innerIterations;
@@ -585,6 +605,12 @@ int run_test_with_one_queue(
                     err, CLEANUP,
                     "Error: Failed to set arg values for kernel\n");
 
+                err = clEnqueueAcquireExternalMemObjectsKHRptr(
+                    cmd_queue1, vkBufferList.size(), buffers, 0, nullptr,
+                    nullptr);
+                test_error_and_cleanup(err, CLEANUP,
+                                       "Failed to acquire buffers");
+
                 err = clEnqueueNDRangeKernel(cmd_queue1, update_buffer_kernel,
                                              1, NULL, global_work_size, NULL, 0,
                                              NULL, NULL);
@@ -593,6 +619,12 @@ int run_test_with_one_queue(
                     "Error: Failed to launch update_buffer_kernel,"
                     " error\n");
 
+                err = clEnqueueReleaseExternalMemObjectsKHRptr(
+                    cmd_queue1, vkBufferList.size(), buffers, 0, nullptr,
+                    nullptr);
+                test_error_and_cleanup(err, CLEANUP,
+                                       "Failed to release buffers");
+
                 if (use_fence)
                 {
                     clFlush(cmd_queue1);
@@ -742,10 +774,12 @@ int run_test_with_multi_import_same_ctx(
     }
     else
     {
-        clVk2CLExternalSemaphore = new clExternalImportableSemaphore(
-            vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
-        clCl2VkExternalSemaphore = new clExternalExportableSemaphore(
-            vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+        CREATE_OPENCL_SEMAPHORE(clVk2CLExternalSemaphore, vkVk2CLSemaphore,
+                                context, vkExternalSemaphoreHandleType,
+                                deviceId, false);
+        CREATE_OPENCL_SEMAPHORE(clCl2VkExternalSemaphore, vkCl2VkSemaphore,
+                                context, vkExternalSemaphoreHandleType,
+                                deviceId, true);
     }
 
     const uint32_t maxIter = innerIterations;
@@ -883,6 +917,11 @@ int run_test_with_multi_import_same_ctx(
                             err |= clSetKernelArg(
                                 update_buffer_kernel, i + 1, sizeof(cl_mem),
                                 (void *)&(buffers[i][launchIter]));
+                            err = clEnqueueAcquireExternalMemObjectsKHRptr(
+                                cmd_queue1, 1, &buffers[i][launchIter], 0,
+                                nullptr, nullptr);
+                            test_error_and_cleanup(err, CLEANUP,
+                                                   "Failed to acquire buffers");
                         }
                         test_error_and_cleanup(
                             err, CLEANUP,
@@ -896,6 +935,15 @@ int run_test_with_multi_import_same_ctx(
                             err, CLEANUP,
                             "Error: Failed to launch "
                             "update_buffer_kernel, error\n ");
+
+                        for (int i = 0; i < numBuffers; i++)
+                        {
+                            err = clEnqueueReleaseExternalMemObjectsKHRptr(
+                                cmd_queue1, 1, &buffers[i][launchIter], 0,
+                                nullptr, nullptr);
+                            test_error_and_cleanup(err, CLEANUP,
+                                                   "Failed to release buffers");
+                        }
                     }
                     if (use_fence)
                     {
@@ -1071,17 +1119,19 @@ int run_test_with_multi_import_diff_ctx(
     }
     else
     {
-        clVk2CLExternalSemaphore = new clExternalImportableSemaphore(
-            vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
-        clCl2VkExternalSemaphore = new clExternalExportableSemaphore(
-            vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
-
-        clVk2CLExternalSemaphore2 = new clExternalImportableSemaphore(
-            vkVk2CLSemaphore, context2, vkExternalSemaphoreHandleType,
-            deviceId);
-        clCl2VkExternalSemaphore2 = new clExternalExportableSemaphore(
-            vkCl2VkSemaphore, context2, vkExternalSemaphoreHandleType,
-            deviceId);
+        CREATE_OPENCL_SEMAPHORE(clVk2CLExternalSemaphore, vkVk2CLSemaphore,
+                                context, vkExternalSemaphoreHandleType,
+                                deviceId, false);
+        CREATE_OPENCL_SEMAPHORE(clCl2VkExternalSemaphore, vkCl2VkSemaphore,
+                                context, vkExternalSemaphoreHandleType,
+                                deviceId, false);
+
+        CREATE_OPENCL_SEMAPHORE(clVk2CLExternalSemaphore2, vkVk2CLSemaphore,
+                                context2, vkExternalSemaphoreHandleType,
+                                deviceId, false);
+        CREATE_OPENCL_SEMAPHORE(clCl2VkExternalSemaphore2, vkCl2VkSemaphore,
+                                context2, vkExternalSemaphoreHandleType,
+                                deviceId, false);
     }
 
     const uint32_t maxIter = innerIterations;
@@ -1229,11 +1279,22 @@ int run_test_with_multi_import_diff_ctx(
                     err =
                         clSetKernelArg(update_buffer_kernel1[launchIter], 0,
                                        sizeof(uint32_t), (void *)&pBufferSize);
+                    test_error_and_cleanup(err, CLEANUP,
+                                           "Failed to set kernel arg");
+
                     for (int i = 0; i < numBuffers; i++)
                     {
-                        err |= clSetKernelArg(
+                        err = clSetKernelArg(
                             update_buffer_kernel1[launchIter], i + 1,
                             sizeof(cl_mem), (void *)&(buffers1[i][launchIter]));
+                        test_error_and_cleanup(err, CLEANUP,
+                                               "Failed to set kernel arg");
+
+                        err = clEnqueueAcquireExternalMemObjectsKHRptr(
+                            cmd_queue1, 1, &buffers1[i][launchIter], 0, nullptr,
+                            nullptr);
+                        test_error_and_cleanup(err, CLEANUP,
+                                               "Failed to acquire buffers");
                     }
                     test_error_and_cleanup(
                         err, CLEANUP,
@@ -1246,6 +1307,14 @@ int run_test_with_multi_import_diff_ctx(
                     test_error_and_cleanup(err, CLEANUP,
                                            "Error: Failed to launch "
                                            "update_buffer_kernel, error\n");
+                    for (int i = 0; i < numBuffers; i++)
+                    {
+                        err = clEnqueueReleaseExternalMemObjectsKHRptr(
+                            cmd_queue1, 1, &buffers1[i][launchIter], 0, nullptr,
+                            nullptr);
+                        test_error_and_cleanup(err, CLEANUP,
+                                               "Failed to release buffers");
+                    }
                 }
                 if (use_fence)
                 {
@@ -1298,12 +1367,23 @@ int run_test_with_multi_import_diff_ctx(
                         err = clSetKernelArg(update_buffer_kernel2[launchIter],
                                              0, sizeof(uint32_t),
                                              (void *)&bufferSize);
+                        test_error_and_cleanup(err, CLEANUP,
+                                               "Failed to set kernel arg");
+
                         for (int i = 0; i < numBuffers; i++)
                         {
-                            err |= clSetKernelArg(
+                            err = clSetKernelArg(
                                 update_buffer_kernel2[launchIter], i + 1,
                                 sizeof(cl_mem),
                                 (void *)&(buffers2[i][launchIter]));
+                            test_error_and_cleanup(err, CLEANUP,
+                                                   "Failed to set kernel arg");
+
+                            err = clEnqueueAcquireExternalMemObjectsKHRptr(
+                                cmd_queue1, 1, &buffers2[i][launchIter], 0,
+                                nullptr, nullptr);
+                            test_error_and_cleanup(err, CLEANUP,
+                                                   "Failed to acquire buffers");
                         }
                         test_error_and_cleanup(
                             err, CLEANUP,
@@ -1317,6 +1397,14 @@ int run_test_with_multi_import_diff_ctx(
                             err, CLEANUP,
                             "Error: Failed to launch "
                             "update_buffer_kernel, error\n ");
+                        for (int i = 0; i < numBuffers; i++)
+                        {
+                            err = clEnqueueReleaseExternalMemObjectsKHRptr(
+                                cmd_queue1, 1, &buffers2[i][launchIter], 0,
+                                nullptr, nullptr);
+                            test_error_and_cleanup(err, CLEANUP,
+                                                   "Failed to release buffers");
+                        }
                     }
                     if (use_fence)
                     {
@@ -1757,4 +1845,4 @@ int test_buffer_common(cl_device_id device_, cl_context context_,
     if (extensions) free(extensions);
 
     return errNum;
-}
+}
\ No newline at end of file
diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp
index 7ca7b7f321..4c5d44a024 100644
--- a/test_conformance/vulkan/test_vulkan_interop_image.cpp
+++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp
@@ -251,10 +251,10 @@ int run_test_with_two_queue(
     clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
     clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
 
-    clVk2CLExternalSemaphore = new clExternalImportableSemaphore(
-        vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
-    clCl2VkExternalSemaphore = new clExternalExportableSemaphore(
-        vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+    CREATE_OPENCL_SEMAPHORE(clVk2CLExternalSemaphore, vkVk2CLSemaphore, context,
+                            vkExternalSemaphoreHandleType, deviceId, false);
+    CREATE_OPENCL_SEMAPHORE(clCl2VkExternalSemaphore, vkCl2VkSemaphore, context,
+                            vkExternalSemaphoreHandleType, deviceId, true);
 
     std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory1;
     std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory2;
@@ -644,7 +644,18 @@ int run_test_with_two_queue(
                                     err, CLEANUP,
                                     "Error: Failed to set arg values \n");
 
-                                // clVk2CLExternalSemaphore->wait(cmd_queue1);
+                                err = clEnqueueAcquireExternalMemObjectsKHRptr(
+                                    cmd_queue1, num2DImages,
+                                    external_mem_image1, 0, nullptr, nullptr);
+                                test_error_and_cleanup(
+                                    err, CLEANUP, "Failed to acquire images");
+
+                                err = clEnqueueAcquireExternalMemObjectsKHRptr(
+                                    cmd_queue1, num2DImages,
+                                    external_mem_image2, 0, nullptr, nullptr);
+                                test_error_and_cleanup(
+                                    err, CLEANUP, "Failed to acquire images");
+
                                 size_t global_work_size[3] = { width, height,
                                                                1 };
                                 cl_event first_launch;
@@ -656,6 +667,30 @@ int run_test_with_two_queue(
                                     err, CLEANUP,
                                     "Failed to enqueue updateKernelCQ1\n");
 
+                                err = clEnqueueReleaseExternalMemObjectsKHRptr(
+                                    cmd_queue1, num2DImages,
+                                    external_mem_image1, 0, nullptr, nullptr);
+                                test_error_and_cleanup(
+                                    err, CLEANUP, "Failed to release images");
+
+                                err = clEnqueueReleaseExternalMemObjectsKHRptr(
+                                    cmd_queue1, num2DImages,
+                                    external_mem_image2, 0, nullptr, nullptr);
+                                test_error_and_cleanup(
+                                    err, CLEANUP, "Failed to release images");
+
+                                err = clEnqueueAcquireExternalMemObjectsKHRptr(
+                                    cmd_queue2, num2DImages,
+                                    external_mem_image1, 0, nullptr, nullptr);
+                                test_error_and_cleanup(
+                                    err, CLEANUP, "Failed to acquire images");
+
+                                err = clEnqueueAcquireExternalMemObjectsKHRptr(
+                                    cmd_queue2, num2DImages,
+                                    external_mem_image2, 0, nullptr, nullptr);
+                                test_error_and_cleanup(
+                                    err, CLEANUP, "Failed to acquire images");
+
                                 err = clEnqueueNDRangeKernel(
                                     cmd_queue2, updateKernelCQ2, 2, NULL,
                                     global_work_size, NULL, 1, &first_launch,
@@ -664,6 +699,18 @@ int run_test_with_two_queue(
                                     err, CLEANUP,
                                     "Failed to enqueue updateKernelCQ2\n");
 
+                                err = clEnqueueReleaseExternalMemObjectsKHRptr(
+                                    cmd_queue2, num2DImages,
+                                    external_mem_image1, 0, nullptr, nullptr);
+                                test_error_and_cleanup(
+                                    err, CLEANUP, "Failed to release images");
+
+                                err = clEnqueueReleaseExternalMemObjectsKHRptr(
+                                    cmd_queue2, num2DImages,
+                                    external_mem_image2, 0, nullptr, nullptr);
+                                test_error_and_cleanup(
+                                    err, CLEANUP, "Failed to release images");
+
                                 clFinish(cmd_queue2);
                                 err = clCl2VkExternalSemaphore->signal(
                                     cmd_queue2);
@@ -816,10 +863,10 @@ int run_test_with_one_queue(
     clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
     clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
 
-    clVk2CLExternalSemaphore = new clExternalImportableSemaphore(
-        vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
-    clCl2VkExternalSemaphore = new clExternalExportableSemaphore(
-        vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+    CREATE_OPENCL_SEMAPHORE(clVk2CLExternalSemaphore, vkVk2CLSemaphore, context,
+                            vkExternalSemaphoreHandleType, deviceId, false);
+    CREATE_OPENCL_SEMAPHORE(clCl2VkExternalSemaphore, vkCl2VkSemaphore, context,
+                            vkExternalSemaphoreHandleType, deviceId, true);
 
     std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory1;
     std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory2;
@@ -1182,6 +1229,18 @@ int run_test_with_one_queue(
                                     "Error: Failed to set arg "
                                     "values for kernel-1\n");
 
+                                err = clEnqueueAcquireExternalMemObjectsKHRptr(
+                                    cmd_queue1, num2DImages,
+                                    external_mem_image1, 0, nullptr, nullptr);
+                                test_error_and_cleanup(
+                                    err, CLEANUP, "Failed to acquire images");
+
+                                err = clEnqueueAcquireExternalMemObjectsKHRptr(
+                                    cmd_queue1, num2DImages,
+                                    external_mem_image2, 0, nullptr, nullptr);
+                                test_error_and_cleanup(
+                                    err, CLEANUP, "Failed to acquire images");
+
                                 size_t global_work_size[3] = { width, height,
                                                                1 };
                                 err = clEnqueueNDRangeKernel(
@@ -1191,6 +1250,18 @@ int run_test_with_one_queue(
                                     err, CLEANUP,
                                     "Failed to enqueue updateKernelCQ1\n");
 
+                                err = clEnqueueReleaseExternalMemObjectsKHRptr(
+                                    cmd_queue1, num2DImages,
+                                    external_mem_image1, 0, nullptr, nullptr);
+                                test_error_and_cleanup(
+                                    err, CLEANUP, "Failed to release images");
+
+                                err = clEnqueueReleaseExternalMemObjectsKHRptr(
+                                    cmd_queue1, num2DImages,
+                                    external_mem_image2, 0, nullptr, nullptr);
+                                test_error_and_cleanup(
+                                    err, CLEANUP, "Failed to release images");
+
                                 err = clCl2VkExternalSemaphore->signal(
                                     cmd_queue1);
                                 test_error_and_cleanup(
@@ -1538,4 +1609,4 @@ int test_image_common(cl_device_id device_, cl_context context_,
     if (devices) free(devices);
 
     return err;
-}
+}
\ No newline at end of file