diff --git a/test_conformance/subgroups/subhelpers.cpp b/test_conformance/subgroups/subhelpers.cpp index b622ee7974..183c6f4cbe 100644 --- a/test_conformance/subgroups/subhelpers.cpp +++ b/test_conformance/subgroups/subhelpers.cpp @@ -215,8 +215,10 @@ void set_last_workgroup_params(int non_uniform_size, int &number_of_subgroups, int subgroup_size, int &workgroup_size, int &last_subgroup_size) { - number_of_subgroups = 1 + non_uniform_size / subgroup_size; + number_of_subgroups = + (subgroup_size + non_uniform_size - 1) / subgroup_size; last_subgroup_size = non_uniform_size % subgroup_size; + if (last_subgroup_size == 0) last_subgroup_size = subgroup_size; workgroup_size = non_uniform_size; } diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp index 04f90e176e..863656963e 100644 --- a/test_conformance/subgroups/test_subgroup_ballot.cpp +++ b/test_conformance/subgroups/test_subgroup_ballot.cpp @@ -766,7 +766,16 @@ __kernel void test_sub_group_non_uniform_broadcast(const __global Type *in, __gl int gid = get_global_id(0); XY(xy,gid); Type x = in[gid]; - if (xy[gid].x < (get_sub_group_size() >> 1)) { + + uint sub_group_size = get_sub_group_size(); + // If we are at the edge, calculate our own sub_group_size as it's implementation defined otherwise. + if (get_local_size(0) != get_enqueued_local_size(0) && get_sub_group_id() == get_num_sub_groups() - 1) { + uint new_sub_group_size = get_local_size(0) % sub_group_size; + if (new_sub_group_size != 0) + sub_group_size = new_sub_group_size; + } + + if (xy[gid].x < (sub_group_size >> 1)) { out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z); } else { out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w); @@ -778,7 +787,16 @@ __kernel void test_sub_group_broadcast_first(const __global Type *in, __global i int gid = get_global_id(0); XY(xy,gid); Type x = in[gid]; - if (xy[gid].x < (get_sub_group_size() >> 1)) { + + uint sub_group_size = get_sub_group_size(); + // If we are at the edge, calculate our own sub_group_size as it's implementation defined otherwise. + if (get_local_size(0) != get_enqueued_local_size(0) && get_sub_group_id() == get_num_sub_groups() - 1) { + uint new_sub_group_size = get_local_size(0) % sub_group_size; + if (new_sub_group_size != 0) + sub_group_size = new_sub_group_size; + } + + if (xy[gid].x < (sub_group_size >> 1)) { out[gid] = sub_group_broadcast_first(x);; } else { out[gid] = sub_group_broadcast_first(x);;