From a6333a5486d6eed0e1b586e5651953023eb91a8d Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Wed, 24 Sep 2025 11:37:50 +0200 Subject: [PATCH 1/3] Added floating point types support for work_group_scan_exclusive tests --- .../workgroups/test_wg_scan_reduce.cpp | 309 +++++++++++++++++- 1 file changed, 292 insertions(+), 17 deletions(-) diff --git a/test_conformance/workgroups/test_wg_scan_reduce.cpp b/test_conformance/workgroups/test_wg_scan_reduce.cpp index f1f28cee1a..5156c18278 100644 --- a/test_conformance/workgroups/test_wg_scan_reduce.cpp +++ b/test_conformance/workgroups/test_wg_scan_reduce.cpp @@ -21,6 +21,10 @@ #include "testBase.h" +cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; +constexpr cl_half g_half_min = 0xFC00; +constexpr cl_half g_half_max = 0x7C00; + static std::string make_kernel_string(const std::string &type, const std::string &kernelName, const std::string &func) @@ -64,6 +68,25 @@ template <> struct TestTypeInfo static constexpr const char *deviceName = "ulong"; }; +template <> struct TestTypeInfo +{ + static constexpr const char *deviceName = "double"; +}; + +template <> struct TestTypeInfo +{ + static constexpr const char *deviceName = "float"; +}; + +// please keep in mind cl_half type on host side is the same as uint16_t, +// therefore, if you will add below 16-bit unsigned int type support it will be +// likely confused with cl_half + +template <> struct TestTypeInfo +{ + static constexpr const char *deviceName = "half"; +}; + template struct Add { using Type = T; @@ -72,25 +95,68 @@ template struct Add static T combine(T a, T b) { return a + b; } }; +template <> struct Add +{ + using Type = cl_half; + static constexpr const char *opName = "add"; + static constexpr Type identityValue = 0; + static Type combine(Type a, Type b) + { + return cl_half_from_float(cl_half_to_float(a) + cl_half_to_float(b), + gHalfRoundingMode); + } +}; + template struct Max { using Type = T; static constexpr const char *opName = "max"; - static constexpr T identityValue = std::numeric_limits::min(); + static constexpr T identityValue = std::is_integral_v + ? std::numeric_limits::min() + : -std::numeric_limits::infinity(); static T combine(T a, T b) { return std::max(a, b); } }; +template <> struct Max +{ + using Type = cl_half; + static constexpr const char *opName = "max"; + static constexpr Type identityValue = g_half_min; + static Type combine(Type a, Type b) + { + return cl_half_from_float( + std::max(cl_half_to_float(a), cl_half_to_float(b)), + gHalfRoundingMode); + } +}; + template struct Min { using Type = T; static constexpr const char *opName = "min"; - static constexpr T identityValue = std::numeric_limits::max(); + static constexpr T identityValue = std::is_integral_v + ? std::numeric_limits::max() + : std::numeric_limits::infinity(); static T combine(T a, T b) { return std::min(a, b); } }; +template <> struct Min +{ + using Type = cl_half; + static constexpr const char *opName = "min"; + static constexpr Type identityValue = g_half_max; + static Type combine(Type a, Type b) + { + return cl_half_from_float( + std::min(cl_half_to_float(a), cl_half_to_float(b)), + gHalfRoundingMode); + } +}; + template struct Reduce { using Type = typename C::Type; + using Operation = C; static constexpr const char *testName = "work_group_reduce"; static constexpr const char *testOpName = C::opName; @@ -98,7 +164,7 @@ template struct Reduce TestTypeInfo::deviceName; static constexpr const char *kernelName = "test_wg_reduce"; static int verify(Type *inptr, Type *outptr, size_t n_elems, - size_t max_wg_size) + size_t max_wg_size, const Type *const max_err = nullptr) { for (size_t i = 0; i < n_elems; i += max_wg_size) { @@ -122,11 +188,20 @@ template struct Reduce } return 0; } + + static void generate_reference_values(Type *inptr, size_t n_elems, + size_t max_wg_size, + const Type *max_err = nullptr) + { + MTdataHolder d(gRandomSeed); + for (size_t i = 0; i < n_elems; i++) inptr[i] = (Type)genrand_int64(d); + } }; template struct ScanInclusive { using Type = typename C::Type; + using Operation = C; static constexpr const char *testName = "work_group_scan_inclusive"; static constexpr const char *testOpName = C::opName; @@ -134,7 +209,7 @@ template struct ScanInclusive TestTypeInfo::deviceName; static constexpr const char *kernelName = "test_wg_scan_inclusive"; static int verify(Type *inptr, Type *outptr, size_t n_elems, - size_t max_wg_size) + size_t max_wg_size, const Type *const max_err = nullptr) { for (size_t i = 0; i < n_elems; i += max_wg_size) { @@ -154,19 +229,50 @@ template struct ScanInclusive } return 0; } + + static void generate_reference_values(Type *inptr, size_t n_elems, + size_t max_wg_size, + const Type *max_err = nullptr) + { + MTdataHolder d(gRandomSeed); + for (size_t i = 0; i < n_elems; i++) inptr[i] = (Type)genrand_int64(d); + } }; template struct ScanExclusive { using Type = typename C::Type; + using Operation = C; static constexpr const char *testName = "work_group_scan_exclusive"; static constexpr const char *testOpName = C::opName; static constexpr const char *deviceTypeName = TestTypeInfo::deviceName; static constexpr const char *kernelName = "test_wg_scan_exclusive"; + + static int check_result(const Type &test_value, const Type &reference, + const Type &max_err = 0) + { + if constexpr (std::is_floating_point_v) + { + if (std::abs(reference - test_value) > max_err) return -1; + } + else if constexpr (std::is_same_v) + { + if (std::abs(cl_half_to_float(reference) + - cl_half_to_float(test_value)) + > cl_half_to_float(max_err)) + return -1; + } + else + { + if (reference != test_value) return -1; + } + return CL_SUCCESS; + } + static int verify(Type *inptr, Type *outptr, size_t n_elems, - size_t max_wg_size) + size_t max_wg_size, const Type *const max_err = nullptr) { for (size_t i = 0; i < n_elems; i += max_wg_size) { @@ -175,7 +281,8 @@ template struct ScanExclusive Type result = C::identityValue; for (size_t j = 0; j < wg_size; ++j) { - if (result != outptr[i + j]) + if (check_result(outptr[i + j], result, max_err[j]) + != CL_SUCCESS) { log_info("%s_%s: Error at %zu\n", testName, testOpName, i + j); @@ -186,6 +293,83 @@ template struct ScanExclusive } return 0; } + + static void generate_reference_values(Type *inptr, size_t n_elems, + size_t max_wg_size, + Type *const max_err = nullptr) + { + MTdataHolder d(gRandomSeed); + if constexpr (std::is_floating_point_v< + Type> || std::is_same_v) + { + std::vector ref_vals(max_wg_size, 0); + if constexpr (std::is_same_v) + { + // to prevent overflow limit range of randomization + float max_range = 99.0; + float min_range = -99.0; + // generate reference values for one work group + for (size_t j = 0; j < max_wg_size; j++) + ref_vals[j] = cl_half_from_float( + get_random_float(min_range, max_range, d), + gHalfRoundingMode); + + // populate reference data across all work groups + for (size_t i = 0; i < (size_t)n_elems; i += max_wg_size) + { + size_t wg_size = std::min(max_wg_size, n_elems - i); + memcpy(&inptr[i], ref_vals.data(), sizeof(Type) * wg_size); + } + + if constexpr (std::is_same_v>) + { + // compute maximal summation error + float s = std::abs(cl_half_to_float(ref_vals[0])); + for (size_t i = 1; i < (size_t)n_elems; i++) + { + max_err[i] = cl_half_from_float( + std::abs((max_wg_size - 1) * CL_HALF_EPSILON * s), + gHalfRoundingMode); + s += std::abs(cl_half_to_float(ref_vals[i])); + } + } + } + else + { + double max_range = 999.0; + double min_range = -999.0; + for (size_t j = 0; j < max_wg_size; j++) + ref_vals[j] = get_random_float(min_range, max_range, d); + + for (size_t i = 0; i < (size_t)n_elems; i += max_wg_size) + { + size_t work_group_size = std::min(max_wg_size, n_elems - i); + memcpy(&inptr[i], ref_vals.data(), + sizeof(Type) * work_group_size); + } + + if constexpr (std::is_same_v>) + { + // compute maximal summation error + Type s = std::abs(ref_vals[0]); + for (size_t i = 1; i < (size_t)n_elems; i++) + { + max_err[i] = std::abs((max_wg_size - 1) + * (std::is_same_v + ? CL_FLT_EPSILON + : CL_DBL_EPSILON) + * s); + s += std::abs(ref_vals[i]); + } + } + } + } + else + { + for (size_t i = 0; i < n_elems; i++) + inptr[i] = (Type)genrand_int64(d); + } + } }; template @@ -229,16 +413,14 @@ static int run_test(cl_device_id device, cl_context context, sizeof(T) * n_elems, NULL, &err); test_error(err, "Unable to create destination buffer"); - std::vector input_ptr(n_elems); + std::vector input_vec(n_elems); - MTdataHolder d(gRandomSeed); - for (int i = 0; i < n_elems; i++) - { - input_ptr[i] = (T)genrand_int64(d); - } + std::vector max_err_vec(n_elems, 0); + TestInfo::generate_reference_values(input_vec.data(), n_elems, wg_size[0], + max_err_vec.data()); err = clEnqueueWriteBuffer(queue, src, CL_TRUE, 0, sizeof(T) * n_elems, - input_ptr.data(), 0, NULL, NULL); + input_vec.data(), 0, NULL, NULL); test_error(err, "clWriteBuffer to initialize src buffer failed"); err = clSetKernelArg(kernel, 0, sizeof(src), &src); @@ -259,11 +441,11 @@ static int run_test(cl_device_id device, cl_context context, output_ptr.data(), 0, NULL, NULL); test_error(err, "clEnqueueReadBuffer to read read dst buffer failed"); - if (TestInfo::verify(input_ptr.data(), output_ptr.data(), n_elems, - wg_size[0])) + if (TestInfo::verify(input_vec.data(), output_ptr.data(), n_elems, + wg_size[0], max_err_vec.data())) { - log_error("%s_%s %s failed\n", TestInfo::testName, TestInfo::testOpName, - TestInfo::deviceTypeName); + log_error("%s_%s %s verify failed\n", TestInfo::testName, + TestInfo::testOpName, TestInfo::deviceTypeName); return TEST_FAIL; } @@ -409,6 +591,37 @@ REGISTER_TEST_VERSION(work_group_scan_exclusive_add, Version(2, 0)) num_elements); } + if (is_extension_available(device, "cl_khr_fp16")) + { + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + gHalfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + gHalfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode\n"); + return TEST_FAIL; + } + + result |= run_test>>(device, context, queue, + num_elements); + } + + result |= run_test>>(device, context, queue, + num_elements); + + if (is_extension_available(device, "cl_khr_fp64")) + { + result |= run_test>>(device, context, + queue, num_elements); + } + return result; } @@ -429,6 +642,37 @@ REGISTER_TEST_VERSION(work_group_scan_exclusive_max, Version(2, 0)) num_elements); } + if (is_extension_available(device, "cl_khr_fp16")) + { + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + gHalfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + gHalfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode\n"); + return TEST_FAIL; + } + + result |= run_test>>(device, context, queue, + num_elements); + } + + result |= run_test>>(device, context, queue, + num_elements); + + if (is_extension_available(device, "cl_khr_fp64")) + { + result |= run_test>>(device, context, + queue, num_elements); + } + return result; } @@ -449,5 +693,36 @@ REGISTER_TEST_VERSION(work_group_scan_exclusive_min, Version(2, 0)) num_elements); } + if (is_extension_available(device, "cl_khr_fp16")) + { + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + gHalfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + gHalfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode\n"); + return TEST_FAIL; + } + + result |= run_test>>(device, context, queue, + num_elements); + } + + result |= run_test>>(device, context, queue, + num_elements); + + if (is_extension_available(device, "cl_khr_fp64")) + { + result |= run_test>>(device, context, + queue, num_elements); + } + return result; } From 23636babcce62704940bcde0aea1fab395c8690b Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Tue, 19 May 2026 12:37:48 +0200 Subject: [PATCH 2/3] renaming correction -adapted from #2525 due to code review --- .../workgroups/test_wg_scan_reduce.cpp | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/test_conformance/workgroups/test_wg_scan_reduce.cpp b/test_conformance/workgroups/test_wg_scan_reduce.cpp index 5156c18278..fa019a2b19 100644 --- a/test_conformance/workgroups/test_wg_scan_reduce.cpp +++ b/test_conformance/workgroups/test_wg_scan_reduce.cpp @@ -189,9 +189,9 @@ template struct Reduce return 0; } - static void generate_reference_values(Type *inptr, size_t n_elems, - size_t max_wg_size, - const Type *max_err = nullptr) + static void generate_input_values(Type *inptr, size_t n_elems, + size_t max_wg_size, + const Type *max_err = nullptr) { MTdataHolder d(gRandomSeed); for (size_t i = 0; i < n_elems; i++) inptr[i] = (Type)genrand_int64(d); @@ -230,9 +230,9 @@ template struct ScanInclusive return 0; } - static void generate_reference_values(Type *inptr, size_t n_elems, - size_t max_wg_size, - const Type *max_err = nullptr) + static void generate_input_values(Type *inptr, size_t n_elems, + size_t max_wg_size, + const Type *max_err = nullptr) { MTdataHolder d(gRandomSeed); for (size_t i = 0; i < n_elems; i++) inptr[i] = (Type)genrand_int64(d); @@ -294,9 +294,9 @@ template struct ScanExclusive return 0; } - static void generate_reference_values(Type *inptr, size_t n_elems, - size_t max_wg_size, - Type *const max_err = nullptr) + static void generate_input_values(Type *inptr, size_t n_elems, + size_t max_wg_size, + Type *const max_err = nullptr) { MTdataHolder d(gRandomSeed); if constexpr (std::is_floating_point_v< @@ -416,8 +416,8 @@ static int run_test(cl_device_id device, cl_context context, std::vector input_vec(n_elems); std::vector max_err_vec(n_elems, 0); - TestInfo::generate_reference_values(input_vec.data(), n_elems, wg_size[0], - max_err_vec.data()); + TestInfo::generate_input_values(input_vec.data(), n_elems, wg_size[0], + max_err_vec.data()); err = clEnqueueWriteBuffer(queue, src, CL_TRUE, 0, sizeof(T) * n_elems, input_vec.data(), 0, NULL, NULL); From e67c50320c6550f2d6c5e50f6ad546ed5330286b Mon Sep 17 00:00:00 2001 From: Marcin Hajder Date: Wed, 3 Jun 2026 12:28:20 +0200 Subject: [PATCH 3/3] Reverted incorrect merging + code format --- .../workgroups/test_wg_scan_reduce.cpp | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/test_conformance/workgroups/test_wg_scan_reduce.cpp b/test_conformance/workgroups/test_wg_scan_reduce.cpp index 13b14f8021..9943ce03d8 100644 --- a/test_conformance/workgroups/test_wg_scan_reduce.cpp +++ b/test_conformance/workgroups/test_wg_scan_reduce.cpp @@ -22,8 +22,8 @@ #include "testBase.h" cl_half_rounding_mode gHalfRoundingMode = CL_HALF_RTE; -constexpr cl_half g_half_min = 0xfbff; -constexpr cl_half g_half_max = 0x7bff; +constexpr cl_half g_half_min = 0xFC00; +constexpr cl_half g_half_max = 0x7C00; static std::string make_kernel_string(const std::string &type, const std::string &kernelName, @@ -113,7 +113,7 @@ template struct Max static constexpr const char *opName = "max"; static constexpr T identityValue = std::is_integral_v ? std::numeric_limits::min() - : -std::numeric_limits::max(); + : -std::numeric_limits::infinity(); static T combine(T a, T b) { return std::max(a, b); } }; @@ -646,15 +646,17 @@ static int run_all_types(cl_device_id device, cl_context context, { int result = TEST_PASS; - result |= run_test>>(device, context, queue, num_elements); - result |= run_test>>(device, context, queue, num_elements); + result |= + run_test>>(device, context, queue, num_elements); + result |= + run_test>>(device, context, queue, num_elements); if (gHasLong) { result |= run_test>>(device, context, queue, - num_elements); + num_elements); result |= run_test>>(device, context, queue, - num_elements); + num_elements); } if (is_extension_available(device, "cl_khr_fp16")) @@ -676,16 +678,16 @@ static int run_all_types(cl_device_id device, cl_context context, } result |= run_test>>(device, context, queue, - num_elements); + num_elements); } - result |= run_test>>(device, context, queue, - num_elements); + result |= + run_test>>(device, context, queue, num_elements); if (is_extension_available(device, "cl_khr_fp64")) { result |= run_test>>(device, context, queue, - num_elements); + num_elements); } return result;