From 5e41b64c3afcc632408d7a564c9d320dc208c4ea Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Fri, 12 Sep 2025 07:42:12 +0200 Subject: [PATCH 1/4] Added floating point types support for work_group_reduce tests --- .../workgroups/test_wg_scan_reduce.cpp | 302 ++++++++++++++++-- 1 file changed, 281 insertions(+), 21 deletions(-) diff --git a/test_conformance/workgroups/test_wg_scan_reduce.cpp b/test_conformance/workgroups/test_wg_scan_reduce.cpp index f1f28cee1a..6d5206e840 100644 --- a/test_conformance/workgroups/test_wg_scan_reduce.cpp +++ b/test_conformance/workgroups/test_wg_scan_reduce.cpp @@ -21,6 +21,10 @@ #include "testBase.h" +cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; +constexpr cl_half g_half_min = 0xfbff; +constexpr cl_half g_half_max = 0x7bff; + static std::string make_kernel_string(const std::string &type, const std::string &kernelName, const std::string &func) @@ -64,33 +68,80 @@ template <> struct TestTypeInfo static constexpr const char *deviceName = "ulong"; }; +template <> struct TestTypeInfo +{ + static constexpr const char *deviceName = "double"; +}; + +template <> struct TestTypeInfo +{ + static constexpr const char *deviceName = "float"; +}; + +// please keep in mind cl_half type on host side is the same as uint16_t, +// therefore, if you will add below 16-bit unsigned int type support it will be +// likely confused with cl_half + +template <> struct TestTypeInfo +{ + static constexpr const char *deviceName = "half"; +}; + template struct Add { using Type = T; static constexpr const char *opName = "add"; static constexpr T identityValue = 0; - static T combine(T a, T b) { return a + b; } + static T combine(T a, T b) + { + if (std::is_same_v) + return cl_half_from_float(cl_half_to_float(a) + cl_half_to_float(b), + gHalfRoundingMode); + else + return a + b; + } }; template struct Max { using Type = T; static constexpr const char *opName = "max"; - static constexpr T identityValue = std::numeric_limits::min(); - static T combine(T a, T b) { return std::max(a, b); } + static constexpr T identityValue = std::is_same_v + ? g_half_min + : (std::is_integral_v ? std::numeric_limits::min() + : -std::numeric_limits::max()); + static T combine(T a, T b) + { + if (std::is_same_v) + return cl_half_from_float( + std::max(cl_half_to_float(a), cl_half_to_float(b)), + gHalfRoundingMode); + else + return std::max(a, b); + } }; template struct Min { using Type = T; static constexpr const char *opName = "min"; - static constexpr T identityValue = std::numeric_limits::max(); - static T combine(T a, T b) { return std::min(a, b); } + static constexpr T identityValue = + std::is_same_v ? g_half_max : std::numeric_limits::max(); + static T combine(T a, T b) + { + if (std::is_same_v) + return cl_half_from_float( + std::min(cl_half_to_float(a), cl_half_to_float(b)), + gHalfRoundingMode); + else + return std::min(a, b); + } }; template struct Reduce { using Type = typename C::Type; + using Operation = C; static constexpr const char *testName = "work_group_reduce"; static constexpr const char *testOpName = C::opName; @@ -98,7 +149,7 @@ template struct Reduce TestTypeInfo::deviceName; static constexpr const char *kernelName = "test_wg_reduce"; static int verify(Type *inptr, Type *outptr, size_t n_elems, - size_t max_wg_size) + size_t max_wg_size, const Type &max_err = 0) { for (size_t i = 0; i < n_elems; i += max_wg_size) { @@ -112,21 +163,125 @@ template struct Reduce for (size_t j = 0; j < wg_size; j++) { - if (result != outptr[i + j]) + if constexpr (std::is_floating_point_v) { - log_info("%s_%s: Error at %zu\n", testName, testOpName, - i + j); - return -1; + if (fabs(result - outptr[i + j]) > max_err) + { + log_info("%s_%s: Error at %zu\n", testName, testOpName, + i + j); + return -1; + } + } + else if (std::is_same_v) + { + if (fabs(cl_half_to_float(result) + - cl_half_to_float(outptr[i + j])) + > cl_half_to_float(max_err)) + { + log_info("%s_%s: Error at %zu\n", testName, testOpName, + i + j); + return -1; + } + } + else + { + if (result != outptr[i + j]) + { + log_info("%s_%s: Error at %zu\n", testName, testOpName, + i + j); + return -1; + } } } } return 0; } + + static void generate_reference_values(Type *inptr, size_t n_elems, + size_t max_wg_size, Type &max_err = 0) + { + MTdataHolder d(gRandomSeed); + if constexpr (std::is_floating_point_v< + Type> || std::is_same_v) + { + std::vector ref_vals(max_wg_size, 0); + if (std::is_same_v) + { + // to prevent overflow limit range of randomization + float max_range = 99.0; + float min_range = -99.0; + // generate reference values for one work group + for (size_t j = 0; j < max_wg_size; j++) + ref_vals[j] = cl_half_from_float( + get_random_float(min_range, max_range, d), + gHalfRoundingMode); + + // populate reference data across all work groups + for (size_t i = 0; i < (size_t)n_elems; i += max_wg_size) + { + size_t wg_size = std::min(max_wg_size, n_elems - i); + memcpy(&inptr[i], ref_vals.data(), sizeof(Type) * wg_size); + } + + if constexpr (std::is_same_v>) + { + // compute maximal summation error + std::sort(ref_vals.begin(), ref_vals.end(), + [](cl_half a, cl_half b) { + return std::abs(cl_half_to_float(a)) + < std::abs(cl_half_to_float(b)); + }); + + float s = 0.f; + for (auto it = ref_vals.begin(); it != ref_vals.end(); ++it) + s += std::abs(cl_half_to_float(*it)); + max_err = cl_half_from_float( + fabs((max_wg_size - 1) * CL_HALF_EPSILON * s), + gHalfRoundingMode); + } + } + else + { + double max_range = 999.0; + double min_range = -999.0; + for (size_t j = 0; j < max_wg_size; j++) + ref_vals[j] = get_random_float(min_range, max_range, d); + + for (size_t i = 0; i < (size_t)n_elems; i += max_wg_size) + { + size_t work_group_size = std::min(max_wg_size, n_elems - i); + memcpy(&inptr[i], ref_vals.data(), + sizeof(Type) * work_group_size); + } + + if constexpr (std::is_same_v>) + { + // compute maximal summation error + std::sort(ref_vals.begin(), ref_vals.end()); + Type abs_sum = 0; + for (auto elem : ref_vals) abs_sum += fabs(elem); + // Higham, N. J. (2002). Accuracy and Stability of Numerical + // Algorithms (2nd ed.), Chapter 4: Summation, Section 2: + // Error Analysis (worst case error summation) + max_err = (max_wg_size - 1) + * (std::is_same_v ? CL_FLT_EPSILON + : CL_DBL_EPSILON) + * abs_sum; + } + } + } + else + { + for (size_t i = 0; i < n_elems; i++) + inptr[i] = (Type)genrand_int64(d); + } + } }; template struct ScanInclusive { using Type = typename C::Type; + using Operation = C; static constexpr const char *testName = "work_group_scan_inclusive"; static constexpr const char *testOpName = C::opName; @@ -134,7 +289,7 @@ template struct ScanInclusive TestTypeInfo::deviceName; static constexpr const char *kernelName = "test_wg_scan_inclusive"; static int verify(Type *inptr, Type *outptr, size_t n_elems, - size_t max_wg_size) + size_t max_wg_size, const Type &max_err = 0) { for (size_t i = 0; i < n_elems; i += max_wg_size) { @@ -154,11 +309,19 @@ template struct ScanInclusive } return 0; } + + static void generate_reference_values(Type *inptr, size_t n_elems, + size_t max_wg_size, Type &max_err = 0) + { + MTdataHolder d(gRandomSeed); + for (size_t i = 0; i < n_elems; i++) inptr[i] = (Type)genrand_int64(d); + } }; template struct ScanExclusive { using Type = typename C::Type; + using Operation = C; static constexpr const char *testName = "work_group_scan_exclusive"; static constexpr const char *testOpName = C::opName; @@ -166,7 +329,7 @@ template struct ScanExclusive TestTypeInfo::deviceName; static constexpr const char *kernelName = "test_wg_scan_exclusive"; static int verify(Type *inptr, Type *outptr, size_t n_elems, - size_t max_wg_size) + size_t max_wg_size, const Type &max_err = 0) { for (size_t i = 0; i < n_elems; i += max_wg_size) { @@ -186,6 +349,13 @@ template struct ScanExclusive } return 0; } + + static void generate_reference_values(Type *inptr, size_t n_elems, + size_t max_wg_size, Type &max_err = 0) + { + MTdataHolder d(gRandomSeed); + for (size_t i = 0; i < n_elems; i++) inptr[i] = (Type)genrand_int64(d); + } }; template @@ -193,7 +363,6 @@ static int run_test(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { using T = typename TestInfo::Type; - cl_int err = CL_SUCCESS; clProgramWrapper program; @@ -231,11 +400,9 @@ static int run_test(cl_device_id device, cl_context context, std::vector input_ptr(n_elems); - MTdataHolder d(gRandomSeed); - for (int i = 0; i < n_elems; i++) - { - input_ptr[i] = (T)genrand_int64(d); - } + T max_err = 0; + TestInfo::generate_reference_values(input_ptr.data(), n_elems, wg_size[0], + max_err); err = clEnqueueWriteBuffer(queue, src, CL_TRUE, 0, sizeof(T) * n_elems, input_ptr.data(), 0, NULL, NULL); @@ -260,10 +427,10 @@ static int run_test(cl_device_id device, cl_context context, test_error(err, "clEnqueueReadBuffer to read read dst buffer failed"); if (TestInfo::verify(input_ptr.data(), output_ptr.data(), n_elems, - wg_size[0])) + wg_size[0], max_err)) { - log_error("%s_%s %s failed\n", TestInfo::testName, TestInfo::testOpName, - TestInfo::deviceTypeName); + log_error("%s_%s %s verify failed\n", TestInfo::testName, + TestInfo::testOpName, TestInfo::deviceTypeName); return TEST_FAIL; } @@ -289,6 +456,37 @@ REGISTER_TEST_VERSION(work_group_reduce_add, Version(2, 0)) num_elements); } + if (is_extension_available(device, "cl_khr_fp16")) + { + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + gHalfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + gHalfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode\n"); + return TEST_FAIL; + } + + result |= run_test>>(device, context, queue, + num_elements); + } + + result |= + run_test>>(device, context, queue, num_elements); + + if (is_extension_available(device, "cl_khr_fp64")) + { + result |= run_test>>(device, context, queue, + num_elements); + } + return result; } @@ -309,6 +507,37 @@ REGISTER_TEST_VERSION(work_group_reduce_max, Version(2, 0)) num_elements); } + if (is_extension_available(device, "cl_khr_fp16")) + { + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + gHalfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + gHalfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode\n"); + return TEST_FAIL; + } + + result |= run_test>>(device, context, queue, + num_elements); + } + + result |= + run_test>>(device, context, queue, num_elements); + + if (is_extension_available(device, "cl_khr_fp64")) + { + result |= run_test>>(device, context, queue, + num_elements); + } + return result; } @@ -329,6 +558,37 @@ REGISTER_TEST_VERSION(work_group_reduce_min, Version(2, 0)) num_elements); } + if (is_extension_available(device, "cl_khr_fp16")) + { + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + gHalfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + gHalfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode\n"); + return TEST_FAIL; + } + + result |= run_test>>(device, context, queue, + num_elements); + } + + result |= + run_test>>(device, context, queue, num_elements); + + if (is_extension_available(device, "cl_khr_fp64")) + { + result |= run_test>>(device, context, queue, + num_elements); + } + return result; } From 4e40bf9a1f4f96ec4950a342654f083007552023 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 16 Sep 2025 12:06:44 +0200 Subject: [PATCH 2/4] Added corrections due to code review --- .../workgroups/test_wg_scan_reduce.cpp | 117 ++++++++++-------- 1 file changed, 64 insertions(+), 53 deletions(-) diff --git a/test_conformance/workgroups/test_wg_scan_reduce.cpp b/test_conformance/workgroups/test_wg_scan_reduce.cpp index 6d5206e840..74d1a92f95 100644 --- a/test_conformance/workgroups/test_wg_scan_reduce.cpp +++ b/test_conformance/workgroups/test_wg_scan_reduce.cpp @@ -92,13 +92,18 @@ template struct Add using Type = T; static constexpr const char *opName = "add"; static constexpr T identityValue = 0; - static T combine(T a, T b) + static T combine(T a, T b) { return a + b; } +}; + +template <> struct Add +{ + using Type = cl_half; + static constexpr const char *opName = "add"; + static constexpr Type identityValue = 0; + static Type combine(Type a, Type b) { - if (std::is_same_v) - return cl_half_from_float(cl_half_to_float(a) + cl_half_to_float(b), - gHalfRoundingMode); - else - return a + b; + return cl_half_from_float(cl_half_to_float(a) + cl_half_to_float(b), + gHalfRoundingMode); } }; @@ -106,18 +111,22 @@ template struct Max { using Type = T; static constexpr const char *opName = "max"; - static constexpr T identityValue = std::is_same_v - ? g_half_min - : (std::is_integral_v ? std::numeric_limits::min() - : -std::numeric_limits::max()); - static T combine(T a, T b) + static constexpr T identityValue = std::is_integral_v + ? std::numeric_limits::min() + : -std::numeric_limits::max(); + static T combine(T a, T b) { return std::max(a, b); } +}; + +template <> struct Max +{ + using Type = cl_half; + static constexpr const char *opName = "max"; + static constexpr Type identityValue = g_half_min; + static Type combine(Type a, Type b) { - if (std::is_same_v) - return cl_half_from_float( - std::max(cl_half_to_float(a), cl_half_to_float(b)), - gHalfRoundingMode); - else - return std::max(a, b); + return cl_half_from_float( + std::max(cl_half_to_float(a), cl_half_to_float(b)), + gHalfRoundingMode); } }; @@ -125,16 +134,20 @@ template struct Min { using Type = T; static constexpr const char *opName = "min"; - static constexpr T identityValue = - std::is_same_v ? g_half_max : std::numeric_limits::max(); - static T combine(T a, T b) + static constexpr T identityValue = std::numeric_limits::max(); + static T combine(T a, T b) { return std::min(a, b); } +}; + +template <> struct Min +{ + using Type = cl_half; + static constexpr const char *opName = "min"; + static constexpr Type identityValue = g_half_max; + static Type combine(Type a, Type b) { - if (std::is_same_v) - return cl_half_from_float( - std::min(cl_half_to_float(a), cl_half_to_float(b)), - gHalfRoundingMode); - else - return std::min(a, b); + return cl_half_from_float( + std::min(cl_half_to_float(a), cl_half_to_float(b)), + gHalfRoundingMode); } }; @@ -148,6 +161,27 @@ template struct Reduce static constexpr const char *deviceTypeName = TestTypeInfo::deviceName; static constexpr const char *kernelName = "test_wg_reduce"; + + static int check_result(const Type &test_value, const Type &reference, + const Type &max_err = 0) + { + if constexpr (std::is_floating_point_v) + { + if (fabs(reference - test_value) > max_err) return -1; + } + else if constexpr (std::is_same_v) + { + if (fabs(cl_half_to_float(reference) - cl_half_to_float(test_value)) + > cl_half_to_float(max_err)) + return -1; + } + else + { + if (reference != test_value) return -1; + } + return CL_SUCCESS; + } + static int verify(Type *inptr, Type *outptr, size_t n_elems, size_t max_wg_size, const Type &max_err = 0) { @@ -163,34 +197,11 @@ template struct Reduce for (size_t j = 0; j < wg_size; j++) { - if constexpr (std::is_floating_point_v) - { - if (fabs(result - outptr[i + j]) > max_err) - { - log_info("%s_%s: Error at %zu\n", testName, testOpName, - i + j); - return -1; - } - } - else if (std::is_same_v) - { - if (fabs(cl_half_to_float(result) - - cl_half_to_float(outptr[i + j])) - > cl_half_to_float(max_err)) - { - log_info("%s_%s: Error at %zu\n", testName, testOpName, - i + j); - return -1; - } - } - else + if (check_result(outptr[i + j], result, max_err) != CL_SUCCESS) { - if (result != outptr[i + j]) - { - log_info("%s_%s: Error at %zu\n", testName, testOpName, - i + j); - return -1; - } + log_info("%s_%s: Error at %zu\n", testName, testOpName, + i + j); + return -1; } } } From 8f6ec4083c410d0d11803459592521bcb3108769 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Fri, 19 Sep 2025 12:52:49 +0200 Subject: [PATCH 3/4] Added missing constexpr correction --- test_conformance/api/test_kernels.cpp | 2 +- test_conformance/workgroups/test_wg_scan_reduce.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test_conformance/api/test_kernels.cpp b/test_conformance/api/test_kernels.cpp index 3c156d8704..a923a9cf9d 100644 --- a/test_conformance/api/test_kernels.cpp +++ b/test_conformance/api/test_kernels.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at diff --git a/test_conformance/workgroups/test_wg_scan_reduce.cpp b/test_conformance/workgroups/test_wg_scan_reduce.cpp index 74d1a92f95..0f3a156e9b 100644 --- a/test_conformance/workgroups/test_wg_scan_reduce.cpp +++ b/test_conformance/workgroups/test_wg_scan_reduce.cpp @@ -216,7 +216,7 @@ template struct Reduce Type> || std::is_same_v) { std::vector ref_vals(max_wg_size, 0); - if (std::is_same_v) + if constexpr (std::is_same_v) { // to prevent overflow limit range of randomization float max_range = 99.0; From 43d5880930ade63ae4fcab1bcf9ac07312683301 Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Fri, 15 May 2026 16:09:24 +0200 Subject: [PATCH 4/4] renaming correction --- .../workgroups/test_wg_scan_reduce.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test_conformance/workgroups/test_wg_scan_reduce.cpp b/test_conformance/workgroups/test_wg_scan_reduce.cpp index 0f3a156e9b..1efe78bc6c 100644 --- a/test_conformance/workgroups/test_wg_scan_reduce.cpp +++ b/test_conformance/workgroups/test_wg_scan_reduce.cpp @@ -208,8 +208,8 @@ template struct Reduce return 0; } - static void generate_reference_values(Type *inptr, size_t n_elems, - size_t max_wg_size, Type &max_err = 0) + static void generate_input_values(Type *inptr, size_t n_elems, + size_t max_wg_size, Type &max_err = 0) { MTdataHolder d(gRandomSeed); if constexpr (std::is_floating_point_v< @@ -321,8 +321,8 @@ template struct ScanInclusive return 0; } - static void generate_reference_values(Type *inptr, size_t n_elems, - size_t max_wg_size, Type &max_err = 0) + static void generate_input_values(Type *inptr, size_t n_elems, + size_t max_wg_size, Type &max_err = 0) { MTdataHolder d(gRandomSeed); for (size_t i = 0; i < n_elems; i++) inptr[i] = (Type)genrand_int64(d); @@ -361,8 +361,8 @@ template struct ScanExclusive return 0; } - static void generate_reference_values(Type *inptr, size_t n_elems, - size_t max_wg_size, Type &max_err = 0) + static void generate_input_values(Type *inptr, size_t n_elems, + size_t max_wg_size, Type &max_err = 0) { MTdataHolder d(gRandomSeed); for (size_t i = 0; i < n_elems; i++) inptr[i] = (Type)genrand_int64(d); @@ -412,8 +412,8 @@ static int run_test(cl_device_id device, cl_context context, std::vector input_ptr(n_elems); T max_err = 0; - TestInfo::generate_reference_values(input_ptr.data(), n_elems, wg_size[0], - max_err); + TestInfo::generate_input_values(input_ptr.data(), n_elems, wg_size[0], + max_err); err = clEnqueueWriteBuffer(queue, src, CL_TRUE, 0, sizeof(T) * n_elems, input_ptr.data(), 0, NULL, NULL);