// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #ifndef SUBHELPERS_H #define SUBHELPERS_H #include "testHarness.h" #include "kernelHelpers.h" #include "typeWrappers.h" #include #include class subgroupsAPI { public: subgroupsAPI(cl_platform_id platform, bool useCoreSubgroups) { static_assert(CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, "Enums have to be the same"); static_assert(CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, "Enums have to be the same"); if (useCoreSubgroups) { _clGetKernelSubGroupInfo_ptr = &clGetKernelSubGroupInfo; clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfo"; } else { _clGetKernelSubGroupInfo_ptr = (clGetKernelSubGroupInfoKHR_fn) clGetExtensionFunctionAddressForPlatform( platform, "clGetKernelSubGroupInfoKHR"); clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfoKHR"; } } clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr() { return _clGetKernelSubGroupInfo_ptr; } const char *clGetKernelSubGroupInfo_name; private: clGetKernelSubGroupInfoKHR_fn _clGetKernelSubGroupInfo_ptr; }; // Some template helpers template struct TypeName; template <> struct TypeName { static const char *val() { return "half"; } }; template <> struct TypeName { static const char *val() { return "uint"; } }; template <> struct TypeName { static const char *val() { return "int"; } }; template <> struct TypeName { static const char *val() { return "ulong"; } }; template <> struct TypeName { static const char *val() { return "long"; } }; template <> struct TypeName { static const char *val() { return "float"; } }; template <> struct TypeName { static const char *val() { return "double"; } }; template struct TypeDef; template <> struct TypeDef { static const char *val() { return "typedef half Type;\n"; } }; template <> struct TypeDef { static const char *val() { return "typedef uint Type;\n"; } }; template <> struct TypeDef { static const char *val() { return "typedef int Type;\n"; } }; template <> struct TypeDef { static const char *val() { return "typedef ulong Type;\n"; } }; template <> struct TypeDef { static const char *val() { return "typedef long Type;\n"; } }; template <> struct TypeDef { static const char *val() { return "typedef float Type;\n"; } }; template <> struct TypeDef { static const char *val() { return "typedef double Type;\n"; } }; template struct TypeIdentity; // template <> struct TypeIdentity { static cl_half val() { return // (cl_half)0.0; } }; template <> struct TypeIdentity { static // cl_half val() { return -(cl_half)65536.0; } }; template <> struct // TypeIdentity { static cl_half val() { return (cl_half)65536.0; } // }; template <> struct TypeIdentity { static cl_uint val() { return (cl_uint)0; } }; template <> struct TypeIdentity { static cl_uint val() { return (cl_uint)0; } }; template <> struct TypeIdentity { static cl_uint val() { return (cl_uint)0xffffffff; } }; template <> struct TypeIdentity { static cl_int val() { return (cl_int)0; } }; template <> struct TypeIdentity { static cl_int val() { return (cl_int)0x80000000; } }; template <> struct TypeIdentity { static cl_int val() { return (cl_int)0x7fffffff; } }; template <> struct TypeIdentity { static cl_ulong val() { return (cl_ulong)0; } }; template <> struct TypeIdentity { static cl_ulong val() { return (cl_ulong)0; } }; template <> struct TypeIdentity { static cl_ulong val() { return (cl_ulong)0xffffffffffffffffULL; } }; template <> struct TypeIdentity { static cl_long val() { return (cl_long)0; } }; template <> struct TypeIdentity { static cl_long val() { return (cl_long)0x8000000000000000ULL; } }; template <> struct TypeIdentity { static cl_long val() { return (cl_long)0x7fffffffffffffffULL; } }; template <> struct TypeIdentity { static float val() { return 0.F; } }; template <> struct TypeIdentity { static float val() { return -std::numeric_limits::infinity(); } }; template <> struct TypeIdentity { static float val() { return std::numeric_limits::infinity(); } }; template <> struct TypeIdentity { static double val() { return 0.L; } }; template <> struct TypeIdentity { static double val() { return -std::numeric_limits::infinity(); } }; template <> struct TypeIdentity { static double val() { return std::numeric_limits::infinity(); } }; template struct TypeCheck; template <> struct TypeCheck { static bool val(cl_device_id) { return true; } }; template <> struct TypeCheck { static bool val(cl_device_id) { return true; } }; static bool int64_ok(cl_device_id device) { char profile[128]; int error; error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), (void *)&profile, NULL); if (error) { log_info("clGetDeviceInfo failed with CL_DEVICE_PROFILE\n"); return false; } if (strcmp(profile, "EMBEDDED_PROFILE") == 0) return is_extension_available(device, "cles_khr_int64"); return true; } template <> struct TypeCheck { static bool val(cl_device_id device) { return int64_ok(device); } }; template <> struct TypeCheck { static bool val(cl_device_id device) { return int64_ok(device); } }; template <> struct TypeCheck { static bool val(cl_device_id) { return true; } }; template <> struct TypeCheck { static bool val(cl_device_id device) { return is_extension_available(device, "cl_khr_fp16"); } }; template <> struct TypeCheck { static bool val(cl_device_id device) { int error; cl_device_fp_config c; error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c), (void *)&c, NULL); if (error) { log_info( "clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n"); return false; } return c != 0; } }; // Run a test kernel to compute the result of a built-in on an input static int run_kernel(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t global, size_t local, void *idata, size_t isize, void *mdata, size_t msize, void *odata, size_t osize, size_t tsize = 0) { clMemWrapper in; clMemWrapper xy; clMemWrapper out; clMemWrapper tmp; int error; in = clCreateBuffer(context, CL_MEM_READ_ONLY, isize, NULL, &error); test_error(error, "clCreateBuffer failed"); xy = clCreateBuffer(context, CL_MEM_WRITE_ONLY, msize, NULL, &error); test_error(error, "clCreateBuffer failed"); out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error); test_error(error, "clCreateBuffer failed"); if (tsize) { tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, tsize, NULL, &error); test_error(error, "clCreateBuffer failed"); } error = clSetKernelArg(kernel, 0, sizeof(in), (void *)&in); test_error(error, "clSetKernelArg failed"); error = clSetKernelArg(kernel, 1, sizeof(xy), (void *)&xy); test_error(error, "clSetKernelArg failed"); error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out); test_error(error, "clSetKernelArg failed"); if (tsize) { error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp); test_error(error, "clSetKernelArg failed"); } error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL, NULL); test_error(error, "clEnqueueWriteBuffer failed"); error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL); test_error(error, "clEnqueueNDRangeKernel failed"); error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL, NULL); test_error(error, "clEnqueueReadBuffer failed"); error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL, NULL); test_error(error, "clEnqueueReadBuffer failed"); error = clFinish(queue); test_error(error, "clFinish failed"); return error; } // Driver for testing a single built in function template struct test { static int run(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, const char *kname, const char *src, int dynscl, bool useCoreSubgroups) { size_t tmp; int error; int subgroup_size, num_subgroups; size_t realSize; size_t global; size_t local; clProgramWrapper program; clKernelWrapper kernel; cl_platform_id platform; cl_int sgmap[2 * GSIZE]; Ty mapin[LSIZE]; Ty mapout[LSIZE]; // Make sure a test of type Ty is supported by the device if (!TypeCheck::val(device)) return 0; error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL); test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM"); std::stringstream kernel_sstr; if (useCoreSubgroups) { kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"; } kernel_sstr << "#define XY(M,I) M[I].x = get_sub_group_local_id(); " "M[I].y = get_sub_group_id();\n"; kernel_sstr << TypeDef::val(); kernel_sstr << src; const std::string &kernel_str = kernel_sstr.str(); const char *kernel_src = kernel_str.c_str(); error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &kernel_src, kname, "-cl-std=CL2.0"); if (error != 0) return error; // Determine some local dimensions to use for the test. global = GSIZE; error = get_max_common_work_group_size(context, kernel, GSIZE, &local); test_error(error, "get_max_common_work_group_size failed"); // Limit it a bit so we have muliple work groups // Ideally this will still be large enough to give us multiple subgroups if (local > LSIZE) local = LSIZE; // Get the sub group info subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups); clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr = subgroupsApiSet.clGetKernelSubGroupInfo_ptr(); if (clGetKernelSubGroupInfo_ptr == NULL) { log_error("ERROR: %s function not available", subgroupsApiSet.clGetKernelSubGroupInfo_name); return TEST_FAIL; } error = clGetKernelSubGroupInfo_ptr( kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL); if (error != CL_SUCCESS) { log_error("ERROR: %s function error for " "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE", subgroupsApiSet.clGetKernelSubGroupInfo_name); return TEST_FAIL; } subgroup_size = (int)tmp; error = clGetKernelSubGroupInfo_ptr( kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL); if (error != CL_SUCCESS) { log_error("ERROR: %s function error for " "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE", subgroupsApiSet.clGetKernelSubGroupInfo_name); return TEST_FAIL; } num_subgroups = (int)tmp; // Make sure the number of sub groups is what we expect if (num_subgroups != (local + subgroup_size - 1) / subgroup_size) { log_error("ERROR: unexpected number of subgroups (%d) returned\n", num_subgroups); return TEST_FAIL; } std::vector idata; std::vector odata; size_t input_array_size = GSIZE; size_t output_array_size = GSIZE; if (dynscl != 0) { input_array_size = (int)global / (int)local * num_subgroups * dynscl; output_array_size = (int)global / (int)local * dynscl; } idata.resize(input_array_size); odata.resize(output_array_size); // Run the kernel once on zeroes to get the map memset(&idata[0], 0, input_array_size * sizeof(Ty)); error = run_kernel(context, queue, kernel, global, local, &idata[0], input_array_size * sizeof(Ty), sgmap, global * sizeof(cl_int) * 2, &odata[0], output_array_size * sizeof(Ty), TSIZE * sizeof(Ty)); if (error) return error; // Generate the desired input for the kernel Fns::gen(&idata[0], mapin, sgmap, subgroup_size, (int)local, (int)global / (int)local); error = run_kernel(context, queue, kernel, global, local, &idata[0], input_array_size * sizeof(Ty), sgmap, global * sizeof(cl_int) * 2, &odata[0], output_array_size * sizeof(Ty), TSIZE * sizeof(Ty)); if (error) return error; // Check the result return Fns::chk(&idata[0], &odata[0], mapin, mapout, sgmap, subgroup_size, (int)local, (int)global / (int)local); } }; #endif