// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "procs.h" #include "subhelpers.h" #include "harness/conversions.h" #include "harness/typeWrappers.h" static const char *lbar_source = "__kernel void test_lbar(const __global Type *in, __global int2 *xy, " "__global Type *out)\n" "{\n" " __local int tmp[200];\n" " int gid = get_global_id(0);\n" " int nid = get_sub_group_size();\n" " int lid = get_sub_group_local_id();\n" " xy[gid].x = lid;\n" " xy[gid].y = get_sub_group_id();\n" " if (get_sub_group_id() == 0) {\n" " tmp[lid] = in[gid];\n" " sub_group_barrier(CLK_LOCAL_MEM_FENCE);\n" " out[gid] = tmp[nid-1-lid];\n" " } else {\n" " out[gid] = -in[gid];\n" " }\n" "}\n"; static const char *gbar_source = "__kernel void test_gbar(const __global Type *in, __global int2 *xy, " "__global Type *out, __global Type *tmp)\n" "{\n" " int gid = get_global_id(0);\n" " int nid = get_sub_group_size();\n" " int lid = get_sub_group_local_id();\n" " int tof = get_group_id(0)*get_max_sub_group_size();\n" " xy[gid].x = lid;\n" " xy[gid].y = get_sub_group_id();\n" " if (get_sub_group_id() == 0) {\n" " tmp[tof+lid] = in[gid];\n" " sub_group_barrier(CLK_GLOBAL_MEM_FENCE);\n" " out[gid] = tmp[tof+nid-1-lid];\n" " } else {\n" " out[gid] = -in[gid];\n" " }\n" "}\n"; // barrier test functions template struct BAR { static void gen(cl_int *x, cl_int *t, cl_int *m, const WorkGroupParams &test_params) { int i, ii, j, k, n; int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; int nj = (nw + ns - 1) / ns; ng = ng / nw; int e; ii = 0; for (k = 0; k < ng; ++k) { for (j = 0; j < nj; ++j) { ii = j * ns; n = ii + ns > nw ? nw - ii : ns; for (i = 0; i < n; ++i) t[ii + i] = genrand_int32(gMTdata); } // Now map into work group using map from device for (j = 0; j < nw; ++j) { x[j] = t[j]; } x += nw; m += 2 * nw; } } static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m, const WorkGroupParams &test_params) { int ii, i, j, k, n; int nw = test_params.local_workgroup_size; int ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; int nj = (nw + ns - 1) / ns; ng = ng / nw; cl_int tr, rr; if (Which == 0) log_info(" sub_group_barrier(CLK_LOCAL_MEM_FENCE)...\n"); else log_info(" sub_group_barrier(CLK_GLOBAL_MEM_FENCE)...\n"); for (k = 0; k < ng; ++k) { // Map to array indexed to array indexed by local ID and sub group for (j = 0; j < nw; ++j) { mx[j] = x[j]; my[j] = y[j]; } for (j = 0; j < nj; ++j) { ii = j * ns; n = ii + ns > nw ? nw - ii : ns; for (i = 0; i < n; ++i) { tr = j == 0 ? mx[ii + n - 1 - i] : -mx[ii + i]; rr = my[ii + i]; if (tr != rr) { log_error("ERROR: sub_group_barrier mismatch for local " "id %d in sub group %d in group %d expected " "%d got %d\n", i, j, k, tr, rr); return -1; } } } x += nw; y += nw; m += 2 * nw; } return 0; } }; // Entry point from main int test_barrier_functions(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, bool useCoreSubgroups) { int error = TEST_PASS; // Adjust these individually below if desired/needed constexpr size_t global_work_size = 2000; constexpr size_t local_work_size = 200; WorkGroupParams test_params(global_work_size, local_work_size); test_params.use_core_subgroups = useCoreSubgroups; error = test>::run(device, context, queue, num_elements, "test_lbar", lbar_source, test_params); error |= test, global_work_size>::run( device, context, queue, num_elements, "test_gbar", gbar_source, test_params); return error; } int test_barrier_functions_core(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { return test_barrier_functions(device, context, queue, num_elements, true); } int test_barrier_functions_ext(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { bool hasExtension = is_extension_available(device, "cl_khr_subgroups"); if (!hasExtension) { log_info( "Device does not support 'cl_khr_subgroups'. Skipping the test.\n"); return TEST_SKIPPED_ITSELF; } return test_barrier_functions(device, context, queue, num_elements, false); }