From 167700b0b1983288a999ffadc337af0e99ec4764 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Sun, 31 May 2026 22:35:10 +0200 Subject: [PATCH 1/2] subgroups: fix set_last_workgroup_params for uniform subgroup sizes While the workgroup could be non-uniform, we could end up with uniform subgroup sizes. E.g. with local = 168 and subgroup_size = 8 This helps debugging the subgroup tests with non uniform subgroups, because one could test with uniform subgroup sizes to narrow down the actual implementation bug. --- test_conformance/subgroups/subhelpers.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test_conformance/subgroups/subhelpers.cpp b/test_conformance/subgroups/subhelpers.cpp index b622ee7974..183c6f4cbe 100644 --- a/test_conformance/subgroups/subhelpers.cpp +++ b/test_conformance/subgroups/subhelpers.cpp @@ -215,8 +215,10 @@ void set_last_workgroup_params(int non_uniform_size, int &number_of_subgroups, int subgroup_size, int &workgroup_size, int &last_subgroup_size) { - number_of_subgroups = 1 + non_uniform_size / subgroup_size; + number_of_subgroups = + (subgroup_size + non_uniform_size - 1) / subgroup_size; last_subgroup_size = non_uniform_size % subgroup_size; + if (last_subgroup_size == 0) last_subgroup_size = subgroup_size; workgroup_size = non_uniform_size; } From 74237f8b3384619e40f08f85c1acdc2d58c9e517 Mon Sep 17 00:00:00 2001 From: Karol Herbst Date: Mon, 1 Jun 2026 02:34:26 +0200 Subject: [PATCH 2/2] fix subgroup ballot broadcast cases for non-uniform workgroups The return value of get_sub_group_size() is implementation defined and might not return the actual size of the sub groups on the edges of a non- uniform workgroup. Closes: https://github.com/KhronosGroup/OpenCL-CTS/issues/1198 --- .../subgroups/test_subgroup_ballot.cpp | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index 04f90e176e..863656963e 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -766,7 +766,16 @@ __kernel void test_sub_group_non_uniform_broadcast(const __global Type *in, __gl int gid = get_global_id(0); XY(xy,gid); Type x = in[gid]; - if (xy[gid].x < (get_sub_group_size() >> 1)) { + + uint sub_group_size = get_sub_group_size(); + // If we are at the edge, calculate our own sub_group_size as it's implementation defined otherwise. + if (get_local_size(0) != get_enqueued_local_size(0) && get_sub_group_id() == get_num_sub_groups() - 1) { + uint new_sub_group_size = get_local_size(0) % sub_group_size; + if (new_sub_group_size != 0) + sub_group_size = new_sub_group_size; + } + + if (xy[gid].x < (sub_group_size >> 1)) { out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z); } else { out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w); @@ -778,7 +787,16 @@ __kernel void test_sub_group_broadcast_first(const __global Type *in, __global i int gid = get_global_id(0); XY(xy,gid); Type x = in[gid]; - if (xy[gid].x < (get_sub_group_size() >> 1)) { + + uint sub_group_size = get_sub_group_size(); + // If we are at the edge, calculate our own sub_group_size as it's implementation defined otherwise. + if (get_local_size(0) != get_enqueued_local_size(0) && get_sub_group_id() == get_num_sub_groups() - 1) { + uint new_sub_group_size = get_local_size(0) % sub_group_size; + if (new_sub_group_size != 0) + sub_group_size = new_sub_group_size; + } + + if (xy[gid].x < (sub_group_size >> 1)) { out[gid] = sub_group_broadcast_first(x);; } else { out[gid] = sub_group_broadcast_first(x);;