Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
240 changes: 174 additions & 66 deletions test_conformance/workgroups/test_wg_scan_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ template <typename C> struct Reduce
}

static int verify(Type *inptr, Type *outptr, size_t n_elems,
size_t max_wg_size, const Type &max_err = 0)
size_t max_wg_size, const Type *const max_err = nullptr)
{
for (size_t i = 0; i < n_elems; i += max_wg_size)
{
Expand All @@ -197,7 +197,8 @@ template <typename C> struct Reduce

for (size_t j = 0; j < wg_size; j++)
{
if (check_result(outptr[i + j], result, max_err) != CL_SUCCESS)
if (check_result(outptr[i + j], result, max_err[0])
!= CL_SUCCESS)
{
log_info("%s_%s: Error at %zu\n", testName, testOpName,
i + j);
Expand All @@ -209,7 +210,8 @@ template <typename C> struct Reduce
}

static void generate_input_values(Type *inptr, size_t n_elems,
size_t max_wg_size, Type &max_err = 0)
size_t max_wg_size,
Type *const max_err = nullptr)
{
MTdataHolder d(gRandomSeed);
if constexpr (std::is_floating_point_v<
Expand Down Expand Up @@ -246,7 +248,7 @@ template <typename C> struct Reduce
float s = 0.f;
for (auto it = ref_vals.begin(); it != ref_vals.end(); ++it)
s += std::abs(cl_half_to_float(*it));
max_err = cl_half_from_float(
max_err[0] = cl_half_from_float(
fabs((max_wg_size - 1) * CL_HALF_EPSILON * s),
gHalfRoundingMode);
}
Expand Down Expand Up @@ -274,7 +276,7 @@ template <typename C> struct Reduce
// Higham, N. J. (2002). Accuracy and Stability of Numerical
// Algorithms (2nd ed.), Chapter 4: Summation, Section 2:
// Error Analysis (worst case error summation)
max_err = (max_wg_size - 1)
max_err[0] = (max_wg_size - 1)
* (std::is_same_v<Type, cl_float> ? CL_FLT_EPSILON
: CL_DBL_EPSILON)
* abs_sum;
Expand All @@ -299,8 +301,30 @@ template <typename C> struct ScanInclusive
static constexpr const char *deviceTypeName =
TestTypeInfo<Type>::deviceName;
static constexpr const char *kernelName = "test_wg_scan_inclusive";

static int check_result(const Type &test_value, const Type &reference,
const Type &max_err = 0)
{
if constexpr (std::is_floating_point_v<Type>)
{
if (std::abs(reference - test_value) > max_err) return -1;
}
else if constexpr (std::is_same_v<Type, cl_half>)
{
if (std::abs(cl_half_to_float(reference)
- cl_half_to_float(test_value))
> cl_half_to_float(max_err))
return -1;
}
else
{
if (reference != test_value) return -1;
}
return CL_SUCCESS;
}

static int verify(Type *inptr, Type *outptr, size_t n_elems,
size_t max_wg_size, const Type &max_err = 0)
size_t max_wg_size, const Type *const max_err = nullptr)
{
for (size_t i = 0; i < n_elems; i += max_wg_size)
{
Expand All @@ -310,7 +334,8 @@ template <typename C> struct ScanInclusive
for (size_t j = 0; j < wg_size; ++j)
{
result = C::combine(result, inptr[i + j]);
if (result != outptr[i + j])
if (check_result(outptr[i + j], result, max_err[j])
!= CL_SUCCESS)
{
log_info("%s_%s: Error at %zu\n", testName, testOpName,
i + j);
Expand All @@ -322,10 +347,80 @@ template <typename C> struct ScanInclusive
}

static void generate_input_values(Type *inptr, size_t n_elems,
size_t max_wg_size, Type &max_err = 0)
size_t max_wg_size,
Type *const max_err = nullptr)
{
MTdataHolder d(gRandomSeed);
for (size_t i = 0; i < n_elems; i++) inptr[i] = (Type)genrand_int64(d);
if constexpr (std::is_floating_point_v<
Type> || std::is_same_v<Type, cl_half>)
{
std::vector<Type> ref_vals(max_wg_size, 0);
if constexpr (std::is_same_v<Type, cl_half>)
{
// to prevent overflow limit range of randomization
float max_range = 99.0;
float min_range = -99.0;
// generate reference values for one work group
for (size_t j = 0; j < max_wg_size; j++)
ref_vals[j] = cl_half_from_float(
get_random_float(min_range, max_range, d),
gHalfRoundingMode);

// populate reference data across all work groups
for (size_t i = 0; i < (size_t)n_elems; i += max_wg_size)
{
size_t wg_size = std::min(max_wg_size, n_elems - i);
memcpy(&inptr[i], ref_vals.data(), sizeof(Type) * wg_size);
}

if constexpr (std::is_same_v<Operation, Add<Type>>)
{
// compute maximal summation error
float s = std::abs(cl_half_to_float(ref_vals[0]));
for (size_t i = 1; i < (size_t)n_elems; i++)
{
s += std::abs(cl_half_to_float(ref_vals[i]));
max_err[i] = cl_half_from_float(
std::abs((max_wg_size - 1) * CL_HALF_EPSILON * s),
gHalfRoundingMode);
}
}
}
else
{
double max_range = 999.0;
double min_range = -999.0;
for (size_t j = 0; j < max_wg_size; j++)
ref_vals[j] = get_random_float(min_range, max_range, d);

for (size_t i = 0; i < (size_t)n_elems; i += max_wg_size)
{
size_t work_group_size = std::min(max_wg_size, n_elems - i);
memcpy(&inptr[i], ref_vals.data(),
sizeof(Type) * work_group_size);
}

if constexpr (std::is_same_v<Operation, Add<Type>>)
{
// compute maximal summation error
Type s = std::abs(ref_vals[0]);
for (size_t i = 1; i < (size_t)n_elems; i++)
{
s += std::abs(ref_vals[i]);
max_err[i] = std::abs((max_wg_size - 1)
* (std::is_same_v<Type, cl_float>
? CL_FLT_EPSILON
: CL_DBL_EPSILON)
* s);
}
}
}
}
else
{
for (size_t i = 0; i < n_elems; i++)
inptr[i] = (Type)genrand_int64(d);
}
}
};

Expand All @@ -340,7 +435,7 @@ template <typename C> struct ScanExclusive
TestTypeInfo<Type>::deviceName;
static constexpr const char *kernelName = "test_wg_scan_exclusive";
static int verify(Type *inptr, Type *outptr, size_t n_elems,
size_t max_wg_size, const Type &max_err = 0)
size_t max_wg_size, const Type *const max_err = nullptr)
{
for (size_t i = 0; i < n_elems; i += max_wg_size)
{
Expand All @@ -362,7 +457,8 @@ template <typename C> struct ScanExclusive
}

static void generate_input_values(Type *inptr, size_t n_elems,
size_t max_wg_size, Type &max_err = 0)
size_t max_wg_size,
const Type *max_err = nullptr)
{
MTdataHolder d(gRandomSeed);
for (size_t i = 0; i < n_elems; i++) inptr[i] = (Type)genrand_int64(d);
Expand Down Expand Up @@ -409,14 +505,14 @@ static int run_test(cl_device_id device, cl_context context,
sizeof(T) * n_elems, NULL, &err);
test_error(err, "Unable to create destination buffer");

std::vector<T> input_ptr(n_elems);
std::vector<T> input_vec(n_elems);

T max_err = 0;
TestInfo::generate_input_values(input_ptr.data(), n_elems, wg_size[0],
max_err);
std::vector<T> max_err_vec(n_elems, 0);
TestInfo::generate_input_values(input_vec.data(), n_elems, wg_size[0],
max_err_vec.data());

err = clEnqueueWriteBuffer(queue, src, CL_TRUE, 0, sizeof(T) * n_elems,
input_ptr.data(), 0, NULL, NULL);
input_vec.data(), 0, NULL, NULL);
test_error(err, "clWriteBuffer to initialize src buffer failed");

err = clSetKernelArg(kernel, 0, sizeof(src), &src);
Expand All @@ -437,8 +533,8 @@ static int run_test(cl_device_id device, cl_context context,
output_ptr.data(), 0, NULL, NULL);
test_error(err, "clEnqueueReadBuffer to read read dst buffer failed");

if (TestInfo::verify(input_ptr.data(), output_ptr.data(), n_elems,
wg_size[0], max_err))
if (TestInfo::verify(input_vec.data(), output_ptr.data(), n_elems,
wg_size[0], max_err_vec.data()))
{
log_error("%s_%s %s verify failed\n", TestInfo::testName,
TestInfo::testOpName, TestInfo::deviceTypeName);
Expand All @@ -450,6 +546,60 @@ static int run_test(cl_device_id device, cl_context context,
return TEST_PASS;
}

template <template <typename> class Op>
static int run_scan_inclusive_all_types(cl_device_id device, cl_context context,
cl_command_queue queue,
int num_elements)
{
int result = TEST_PASS;

result |= run_test<ScanInclusive<Op<cl_int>>>(device, context, queue,
num_elements);
result |= run_test<ScanInclusive<Op<cl_uint>>>(device, context, queue,
num_elements);

if (gHasLong)
{
result |= run_test<ScanInclusive<Op<cl_long>>>(device, context, queue,
num_elements);
result |= run_test<ScanInclusive<Op<cl_ulong>>>(device, context, queue,
num_elements);
}

if (is_extension_available(device, "cl_khr_fp16"))
{
const cl_device_fp_config fpConfigHalf =
get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG);
if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0)
{
gHalfRoundingMode = CL_HALF_RTE;
}
else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0)
{
gHalfRoundingMode = CL_HALF_RTZ;
}
else
{
log_error("Error while acquiring half rounding mode\n");
return TEST_FAIL;
}

result |= run_test<ScanInclusive<Op<cl_half>>>(device, context, queue,
num_elements);
}

result |= run_test<ScanInclusive<Op<cl_float>>>(device, context, queue,
num_elements);

if (is_extension_available(device, "cl_khr_fp64"))
{
result |= run_test<ScanInclusive<Op<cl_double>>>(device, context, queue,
num_elements);
}

return result;
}

REGISTER_TEST_VERSION(work_group_reduce_add, Version(2, 0))
{
int result = TEST_PASS;
Expand Down Expand Up @@ -605,62 +755,20 @@ REGISTER_TEST_VERSION(work_group_reduce_min, Version(2, 0))

REGISTER_TEST_VERSION(work_group_scan_inclusive_add, Version(2, 0))
{
int result = TEST_PASS;

result |= run_test<ScanInclusive<Add<cl_int>>>(device, context, queue,
num_elements);
result |= run_test<ScanInclusive<Add<cl_uint>>>(device, context, queue,
num_elements);

if (gHasLong)
{
result |= run_test<ScanInclusive<Add<cl_long>>>(device, context, queue,
num_elements);
result |= run_test<ScanInclusive<Add<cl_ulong>>>(device, context, queue,
num_elements);
}

return result;
return run_scan_inclusive_all_types<Add>(device, context, queue,
num_elements);
}

REGISTER_TEST_VERSION(work_group_scan_inclusive_max, Version(2, 0))
{
int result = TEST_PASS;

result |= run_test<ScanInclusive<Max<cl_int>>>(device, context, queue,
num_elements);
result |= run_test<ScanInclusive<Max<cl_uint>>>(device, context, queue,
num_elements);

if (gHasLong)
{
result |= run_test<ScanInclusive<Max<cl_long>>>(device, context, queue,
num_elements);
result |= run_test<ScanInclusive<Max<cl_ulong>>>(device, context, queue,
num_elements);
}

return result;
return run_scan_inclusive_all_types<Max>(device, context, queue,
num_elements);
}

REGISTER_TEST_VERSION(work_group_scan_inclusive_min, Version(2, 0))
{
int result = TEST_PASS;

result |= run_test<ScanInclusive<Min<cl_int>>>(device, context, queue,
num_elements);
result |= run_test<ScanInclusive<Min<cl_uint>>>(device, context, queue,
num_elements);

if (gHasLong)
{
result |= run_test<ScanInclusive<Min<cl_long>>>(device, context, queue,
num_elements);
result |= run_test<ScanInclusive<Min<cl_ulong>>>(device, context, queue,
num_elements);
}

return result;
return run_scan_inclusive_all_types<Min>(device, context, queue,
num_elements);
}

REGISTER_TEST_VERSION(work_group_scan_exclusive_add, Version(2, 0))
Expand Down
Loading