1 // 2 // Copyright (c) 2017 The Khronos Group Inc. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 #ifndef SUBHELPERS_H 17 #define SUBHELPERS_H 18 19 #include "testHarness.h" 20 #include "kernelHelpers.h" 21 #include "typeWrappers.h" 22 23 #include <limits> 24 #include <vector> 25 26 class subgroupsAPI { 27 public: subgroupsAPI(cl_platform_id platform,bool useCoreSubgroups)28 subgroupsAPI(cl_platform_id platform, bool useCoreSubgroups) 29 { 30 static_assert(CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE 31 == CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, 32 "Enums have to be the same"); 33 static_assert(CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE 34 == CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, 35 "Enums have to be the same"); 36 if (useCoreSubgroups) 37 { 38 _clGetKernelSubGroupInfo_ptr = &clGetKernelSubGroupInfo; 39 clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfo"; 40 } 41 else 42 { 43 _clGetKernelSubGroupInfo_ptr = (clGetKernelSubGroupInfoKHR_fn) 44 clGetExtensionFunctionAddressForPlatform( 45 platform, "clGetKernelSubGroupInfoKHR"); 46 clGetKernelSubGroupInfo_name = "clGetKernelSubGroupInfoKHR"; 47 } 48 } clGetKernelSubGroupInfo_ptr()49 clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr() 50 { 51 return _clGetKernelSubGroupInfo_ptr; 52 } 53 const char *clGetKernelSubGroupInfo_name; 54 55 private: 56 clGetKernelSubGroupInfoKHR_fn _clGetKernelSubGroupInfo_ptr; 57 }; 58 59 // Some template helpers 60 template <typename Ty> struct TypeName; 61 template <> struct TypeName<cl_half> 62 { 63 static const char *val() { return "half"; } 64 }; 65 template <> struct TypeName<cl_uint> 66 { 67 static const char *val() { return "uint"; } 68 }; 69 template <> struct TypeName<cl_int> 70 { 71 static const char *val() { return "int"; } 72 }; 73 template <> struct TypeName<cl_ulong> 74 { 75 static const char *val() { return "ulong"; } 76 }; 77 template <> struct TypeName<cl_long> 78 { 79 static const char *val() { return "long"; } 80 }; 81 template <> struct TypeName<float> 82 { 83 static const char *val() { return "float"; } 84 }; 85 template <> struct TypeName<double> 86 { 87 static const char *val() { return "double"; } 88 }; 89 90 template <typename Ty> struct TypeDef; 91 template <> struct TypeDef<cl_half> 92 { 93 static const char *val() { return "typedef half Type;\n"; } 94 }; 95 template <> struct TypeDef<cl_uint> 96 { 97 static const char *val() { return "typedef uint Type;\n"; } 98 }; 99 template <> struct TypeDef<cl_int> 100 { 101 static const char *val() { return "typedef int Type;\n"; } 102 }; 103 template <> struct TypeDef<cl_ulong> 104 { 105 static const char *val() { return "typedef ulong Type;\n"; } 106 }; 107 template <> struct TypeDef<cl_long> 108 { 109 static const char *val() { return "typedef long Type;\n"; } 110 }; 111 template <> struct TypeDef<float> 112 { 113 static const char *val() { return "typedef float Type;\n"; } 114 }; 115 template <> struct TypeDef<double> 116 { 117 static const char *val() { return "typedef double Type;\n"; } 118 }; 119 120 template <typename Ty, int Which> struct TypeIdentity; 121 // template <> struct TypeIdentity<cl_half,0> { static cl_half val() { return 122 // (cl_half)0.0; } }; template <> struct TypeIdentity<cl_half,0> { static 123 // cl_half val() { return -(cl_half)65536.0; } }; template <> struct 124 // TypeIdentity<cl_half,0> { static cl_half val() { return (cl_half)65536.0; } 125 // }; 126 127 template <> struct TypeIdentity<cl_uint, 0> 128 { 129 static cl_uint val() { return (cl_uint)0; } 130 }; 131 template <> struct TypeIdentity<cl_uint, 1> 132 { 133 static cl_uint val() { return (cl_uint)0; } 134 }; 135 template <> struct TypeIdentity<cl_uint, 2> 136 { 137 static cl_uint val() { return (cl_uint)0xffffffff; } 138 }; 139 140 template <> struct TypeIdentity<cl_int, 0> 141 { 142 static cl_int val() { return (cl_int)0; } 143 }; 144 template <> struct TypeIdentity<cl_int, 1> 145 { 146 static cl_int val() { return (cl_int)0x80000000; } 147 }; 148 template <> struct TypeIdentity<cl_int, 2> 149 { 150 static cl_int val() { return (cl_int)0x7fffffff; } 151 }; 152 153 template <> struct TypeIdentity<cl_ulong, 0> 154 { 155 static cl_ulong val() { return (cl_ulong)0; } 156 }; 157 template <> struct TypeIdentity<cl_ulong, 1> 158 { 159 static cl_ulong val() { return (cl_ulong)0; } 160 }; 161 template <> struct TypeIdentity<cl_ulong, 2> 162 { 163 static cl_ulong val() { return (cl_ulong)0xffffffffffffffffULL; } 164 }; 165 166 template <> struct TypeIdentity<cl_long, 0> 167 { 168 static cl_long val() { return (cl_long)0; } 169 }; 170 template <> struct TypeIdentity<cl_long, 1> 171 { 172 static cl_long val() { return (cl_long)0x8000000000000000ULL; } 173 }; 174 template <> struct TypeIdentity<cl_long, 2> 175 { 176 static cl_long val() { return (cl_long)0x7fffffffffffffffULL; } 177 }; 178 179 180 template <> struct TypeIdentity<float, 0> 181 { 182 static float val() { return 0.F; } 183 }; 184 template <> struct TypeIdentity<float, 1> 185 { 186 static float val() { return -std::numeric_limits<float>::infinity(); } 187 }; 188 template <> struct TypeIdentity<float, 2> 189 { 190 static float val() { return std::numeric_limits<float>::infinity(); } 191 }; 192 193 template <> struct TypeIdentity<double, 0> 194 { 195 static double val() { return 0.L; } 196 }; 197 198 template <> struct TypeIdentity<double, 1> 199 { 200 static double val() { return -std::numeric_limits<double>::infinity(); } 201 }; 202 template <> struct TypeIdentity<double, 2> 203 { 204 static double val() { return std::numeric_limits<double>::infinity(); } 205 }; 206 207 template <typename Ty> struct TypeCheck; 208 template <> struct TypeCheck<cl_uint> 209 { 210 static bool val(cl_device_id) { return true; } 211 }; 212 template <> struct TypeCheck<cl_int> 213 { 214 static bool val(cl_device_id) { return true; } 215 }; 216 217 static bool int64_ok(cl_device_id device) 218 { 219 char profile[128]; 220 int error; 221 222 error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), 223 (void *)&profile, NULL); 224 if (error) 225 { 226 log_info("clGetDeviceInfo failed with CL_DEVICE_PROFILE\n"); 227 return false; 228 } 229 230 if (strcmp(profile, "EMBEDDED_PROFILE") == 0) 231 return is_extension_available(device, "cles_khr_int64"); 232 233 return true; 234 } 235 236 template <> struct TypeCheck<cl_ulong> 237 { 238 static bool val(cl_device_id device) { return int64_ok(device); } 239 }; 240 template <> struct TypeCheck<cl_long> 241 { 242 static bool val(cl_device_id device) { return int64_ok(device); } 243 }; 244 template <> struct TypeCheck<cl_float> 245 { 246 static bool val(cl_device_id) { return true; } 247 }; 248 template <> struct TypeCheck<cl_half> 249 { 250 static bool val(cl_device_id device) 251 { 252 return is_extension_available(device, "cl_khr_fp16"); 253 } 254 }; 255 template <> struct TypeCheck<double> 256 { 257 static bool val(cl_device_id device) 258 { 259 int error; 260 cl_device_fp_config c; 261 error = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(c), 262 (void *)&c, NULL); 263 if (error) 264 { 265 log_info( 266 "clGetDeviceInfo failed with CL_DEVICE_DOUBLE_FP_CONFIG\n"); 267 return false; 268 } 269 return c != 0; 270 } 271 }; 272 273 274 // Run a test kernel to compute the result of a built-in on an input 275 static int run_kernel(cl_context context, cl_command_queue queue, 276 cl_kernel kernel, size_t global, size_t local, 277 void *idata, size_t isize, void *mdata, size_t msize, 278 void *odata, size_t osize, size_t tsize = 0) 279 { 280 clMemWrapper in; 281 clMemWrapper xy; 282 clMemWrapper out; 283 clMemWrapper tmp; 284 int error; 285 286 in = clCreateBuffer(context, CL_MEM_READ_ONLY, isize, NULL, &error); 287 test_error(error, "clCreateBuffer failed"); 288 289 xy = clCreateBuffer(context, CL_MEM_WRITE_ONLY, msize, NULL, &error); 290 test_error(error, "clCreateBuffer failed"); 291 292 out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error); 293 test_error(error, "clCreateBuffer failed"); 294 295 if (tsize) 296 { 297 tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, 298 tsize, NULL, &error); 299 test_error(error, "clCreateBuffer failed"); 300 } 301 302 error = clSetKernelArg(kernel, 0, sizeof(in), (void *)&in); 303 test_error(error, "clSetKernelArg failed"); 304 305 error = clSetKernelArg(kernel, 1, sizeof(xy), (void *)&xy); 306 test_error(error, "clSetKernelArg failed"); 307 308 error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out); 309 test_error(error, "clSetKernelArg failed"); 310 311 if (tsize) 312 { 313 error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp); 314 test_error(error, "clSetKernelArg failed"); 315 } 316 317 error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL, 318 NULL); 319 test_error(error, "clEnqueueWriteBuffer failed"); 320 321 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, 322 NULL, NULL); 323 test_error(error, "clEnqueueNDRangeKernel failed"); 324 325 error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL, 326 NULL); 327 test_error(error, "clEnqueueReadBuffer failed"); 328 329 error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL, 330 NULL); 331 test_error(error, "clEnqueueReadBuffer failed"); 332 333 error = clFinish(queue); 334 test_error(error, "clFinish failed"); 335 336 return error; 337 } 338 339 // Driver for testing a single built in function 340 template <typename Ty, typename Fns, size_t GSIZE, size_t LSIZE, 341 size_t TSIZE = 0> 342 struct test 343 { 344 static int run(cl_device_id device, cl_context context, 345 cl_command_queue queue, int num_elements, const char *kname, 346 const char *src, int dynscl, bool useCoreSubgroups) 347 { 348 size_t tmp; 349 int error; 350 int subgroup_size, num_subgroups; 351 size_t realSize; 352 size_t global; 353 size_t local; 354 clProgramWrapper program; 355 clKernelWrapper kernel; 356 cl_platform_id platform; 357 cl_int sgmap[2 * GSIZE]; 358 Ty mapin[LSIZE]; 359 Ty mapout[LSIZE]; 360 361 // Make sure a test of type Ty is supported by the device 362 if (!TypeCheck<Ty>::val(device)) return 0; 363 364 error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), 365 (void *)&platform, NULL); 366 test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM"); 367 std::stringstream kernel_sstr; 368 if (useCoreSubgroups) 369 { 370 kernel_sstr 371 << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"; 372 } 373 kernel_sstr << "#define XY(M,I) M[I].x = get_sub_group_local_id(); " 374 "M[I].y = get_sub_group_id();\n"; 375 kernel_sstr << TypeDef<Ty>::val(); 376 kernel_sstr << src; 377 const std::string &kernel_str = kernel_sstr.str(); 378 const char *kernel_src = kernel_str.c_str(); 379 380 error = create_single_kernel_helper_with_build_options( 381 context, &program, &kernel, 1, &kernel_src, kname, "-cl-std=CL2.0"); 382 if (error != 0) return error; 383 384 // Determine some local dimensions to use for the test. 385 global = GSIZE; 386 error = get_max_common_work_group_size(context, kernel, GSIZE, &local); 387 test_error(error, "get_max_common_work_group_size failed"); 388 389 // Limit it a bit so we have muliple work groups 390 // Ideally this will still be large enough to give us multiple subgroups 391 if (local > LSIZE) local = LSIZE; 392 393 // Get the sub group info 394 subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups); 395 clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr = 396 subgroupsApiSet.clGetKernelSubGroupInfo_ptr(); 397 if (clGetKernelSubGroupInfo_ptr == NULL) 398 { 399 log_error("ERROR: %s function not available", 400 subgroupsApiSet.clGetKernelSubGroupInfo_name); 401 return TEST_FAIL; 402 } 403 error = clGetKernelSubGroupInfo_ptr( 404 kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, 405 sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL); 406 if (error != CL_SUCCESS) 407 { 408 log_error("ERROR: %s function error for " 409 "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE", 410 subgroupsApiSet.clGetKernelSubGroupInfo_name); 411 return TEST_FAIL; 412 } 413 414 subgroup_size = (int)tmp; 415 416 error = clGetKernelSubGroupInfo_ptr( 417 kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, 418 sizeof(local), (void *)&local, sizeof(tmp), (void *)&tmp, NULL); 419 if (error != CL_SUCCESS) 420 { 421 log_error("ERROR: %s function error for " 422 "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE", 423 subgroupsApiSet.clGetKernelSubGroupInfo_name); 424 return TEST_FAIL; 425 } 426 427 num_subgroups = (int)tmp; 428 // Make sure the number of sub groups is what we expect 429 if (num_subgroups != (local + subgroup_size - 1) / subgroup_size) 430 { 431 log_error("ERROR: unexpected number of subgroups (%d) returned\n", 432 num_subgroups); 433 return TEST_FAIL; 434 } 435 436 std::vector<Ty> idata; 437 std::vector<Ty> odata; 438 size_t input_array_size = GSIZE; 439 size_t output_array_size = GSIZE; 440 441 if (dynscl != 0) 442 { 443 input_array_size = 444 (int)global / (int)local * num_subgroups * dynscl; 445 output_array_size = (int)global / (int)local * dynscl; 446 } 447 448 idata.resize(input_array_size); 449 odata.resize(output_array_size); 450 451 // Run the kernel once on zeroes to get the map 452 memset(&idata[0], 0, input_array_size * sizeof(Ty)); 453 error = run_kernel(context, queue, kernel, global, local, &idata[0], 454 input_array_size * sizeof(Ty), sgmap, 455 global * sizeof(cl_int) * 2, &odata[0], 456 output_array_size * sizeof(Ty), TSIZE * sizeof(Ty)); 457 if (error) return error; 458 459 // Generate the desired input for the kernel 460 Fns::gen(&idata[0], mapin, sgmap, subgroup_size, (int)local, 461 (int)global / (int)local); 462 463 error = run_kernel(context, queue, kernel, global, local, &idata[0], 464 input_array_size * sizeof(Ty), sgmap, 465 global * sizeof(cl_int) * 2, &odata[0], 466 output_array_size * sizeof(Ty), TSIZE * sizeof(Ty)); 467 if (error) return error; 468 469 470 // Check the result 471 return Fns::chk(&idata[0], &odata[0], mapin, mapout, sgmap, 472 subgroup_size, (int)local, (int)global / (int)local); 473 } 474 }; 475 476 #endif 477