1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "crc32.h"
17 #include "kernelHelpers.h"
18 #include "deviceInfo.h"
19 #include "errorHelpers.h"
20 #include "imageHelpers.h"
21 #include "typeWrappers.h"
22 #include "testHarness.h"
23 #include "parseParameters.h"
24
25 #include <cassert>
26 #include <vector>
27 #include <string>
28 #include <fstream>
29 #include <sstream>
30 #include <iomanip>
31 #include <mutex>
32 #include <algorithm>
33
34 #if defined(_WIN32)
35 std::string slash = "\\";
36 #else
37 std::string slash = "/";
38 #endif
39
40 static std::mutex gCompilerMutex;
41
42 static cl_int get_first_device_id(const cl_context context,
43 cl_device_id &device);
44
get_file_size(const std::string & fileName)45 long get_file_size(const std::string &fileName)
46 {
47 std::ifstream ifs(fileName.c_str(), std::ios::binary);
48 if (!ifs.good()) return 0;
49 // get length of file:
50 ifs.seekg(0, std::ios::end);
51 std::ios::pos_type length = ifs.tellg();
52 return static_cast<long>(length);
53 }
54
get_kernel_content(unsigned int numKernelLines,const char * const * kernelProgram)55 static std::string get_kernel_content(unsigned int numKernelLines,
56 const char *const *kernelProgram)
57 {
58 std::string kernel;
59 for (size_t i = 0; i < numKernelLines; ++i)
60 {
61 std::string chunk(kernelProgram[i], 0, std::string::npos);
62 kernel += chunk;
63 }
64
65 return kernel;
66 }
67
get_kernel_name(const std::string & source)68 std::string get_kernel_name(const std::string &source)
69 {
70 // Create list of kernel names
71 std::string kernelsList;
72 size_t kPos = source.find("kernel");
73 while (kPos != std::string::npos)
74 {
75 // check for '__kernel'
76 size_t pos = kPos;
77 if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_')
78 pos -= 2;
79
80 // check character before 'kernel' (white space expected)
81 size_t wsPos = source.find_last_of(" \t\r\n", pos);
82 if (wsPos == std::string::npos || wsPos + 1 == pos)
83 {
84 // check character after 'kernel' (white space expected)
85 size_t akPos = kPos + sizeof("kernel") - 1;
86 wsPos = source.find_first_of(" \t\r\n", akPos);
87 if (!(wsPos == akPos))
88 {
89 kPos = source.find("kernel", kPos + 1);
90 continue;
91 }
92
93 bool attributeFound;
94 do
95 {
96 attributeFound = false;
97 // find '(' after kernel name name
98 size_t pPos = source.find("(", akPos);
99 if (!(pPos != std::string::npos)) continue;
100
101 // check for not empty kernel name before '('
102 pos = source.find_last_not_of(" \t\r\n", pPos - 1);
103 if (!(pos != std::string::npos && pos > akPos)) continue;
104
105 // find character before kernel name
106 wsPos = source.find_last_of(" \t\r\n", pos);
107 if (!(wsPos != std::string::npos && wsPos >= akPos)) continue;
108
109 std::string name =
110 source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
111 // check for kernel attribute
112 if (name == "__attribute__")
113 {
114 attributeFound = true;
115 int pCount = 1;
116 akPos = pPos + 1;
117 while (pCount > 0 && akPos != std::string::npos)
118 {
119 akPos = source.find_first_of("()", akPos + 1);
120 if (akPos != std::string::npos)
121 {
122 if (source[akPos] == '(')
123 pCount++;
124 else
125 pCount--;
126 }
127 }
128 }
129 else
130 {
131 kernelsList += name + ".";
132 }
133 } while (attributeFound);
134 }
135 kPos = source.find("kernel", kPos + 1);
136 }
137 std::ostringstream oss;
138 if (MAX_LEN_FOR_KERNEL_LIST > 0)
139 {
140 if (kernelsList.size() > MAX_LEN_FOR_KERNEL_LIST + 1)
141 {
142 kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
143 kernelsList[kernelsList.size() - 1] = '.';
144 kernelsList[kernelsList.size() - 1] = '.';
145 }
146 oss << kernelsList;
147 }
148 return oss.str();
149 }
150
151 static std::string
get_offline_compilation_file_type_str(const CompilationMode compilationMode)152 get_offline_compilation_file_type_str(const CompilationMode compilationMode)
153 {
154 switch (compilationMode)
155 {
156 default: assert(0 && "Invalid compilation mode"); abort();
157 case kOnline:
158 assert(0 && "Invalid compilation mode for offline compilation");
159 abort();
160 case kBinary: return "binary";
161 case kSpir_v: return "SPIR-V";
162 }
163 }
164
get_unique_filename_prefix(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)165 static std::string get_unique_filename_prefix(unsigned int numKernelLines,
166 const char *const *kernelProgram,
167 const char *buildOptions)
168 {
169 std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
170 std::string kernelName = get_kernel_name(kernel);
171 cl_uint kernelCrc = crc32(kernel.data(), kernel.size());
172 std::ostringstream oss;
173 oss << kernelName << std::hex << std::setfill('0') << std::setw(8)
174 << kernelCrc;
175 if (buildOptions)
176 {
177 cl_uint bOptionsCrc = crc32(buildOptions, strlen(buildOptions));
178 oss << '.' << std::hex << std::setfill('0') << std::setw(8)
179 << bOptionsCrc;
180 }
181 return oss.str();
182 }
183
184
185 static std::string
get_cl_build_options_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)186 get_cl_build_options_filename_with_path(const std::string &filePath,
187 const std::string &fileNamePrefix)
188 {
189 return filePath + slash + fileNamePrefix + ".options";
190 }
191
192 static std::string
get_cl_source_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)193 get_cl_source_filename_with_path(const std::string &filePath,
194 const std::string &fileNamePrefix)
195 {
196 return filePath + slash + fileNamePrefix + ".cl";
197 }
198
199 static std::string
get_binary_filename_with_path(CompilationMode mode,cl_uint deviceAddrSpaceSize,const std::string & filePath,const std::string & fileNamePrefix)200 get_binary_filename_with_path(CompilationMode mode, cl_uint deviceAddrSpaceSize,
201 const std::string &filePath,
202 const std::string &fileNamePrefix)
203 {
204 std::string binaryFilename = filePath + slash + fileNamePrefix;
205 if (kSpir_v == mode)
206 {
207 std::ostringstream extension;
208 extension << ".spv" << deviceAddrSpaceSize;
209 binaryFilename += extension.str();
210 }
211 return binaryFilename;
212 }
213
file_exist_on_disk(const std::string & filePath,const std::string & fileName)214 static bool file_exist_on_disk(const std::string &filePath,
215 const std::string &fileName)
216 {
217 std::string fileNameWithPath = filePath + slash + fileName;
218 bool exist = false;
219 std::ifstream ifs;
220
221 ifs.open(fileNameWithPath.c_str(), std::ios::binary);
222 if (ifs.good()) exist = true;
223 ifs.close();
224 return exist;
225 }
226
should_save_kernel_source_to_disk(CompilationMode mode,CompilationCacheMode cacheMode,const std::string & binaryPath,const std::string & binaryName)227 static bool should_save_kernel_source_to_disk(CompilationMode mode,
228 CompilationCacheMode cacheMode,
229 const std::string &binaryPath,
230 const std::string &binaryName)
231 {
232 bool saveToDisk = false;
233 if (cacheMode == kCacheModeDumpCl
234 || (cacheMode == kCacheModeOverwrite && mode != kOnline))
235 {
236 saveToDisk = true;
237 }
238 if (cacheMode == kCacheModeCompileIfAbsent && mode != kOnline)
239 {
240 saveToDisk = !file_exist_on_disk(binaryPath, binaryName);
241 }
242 return saveToDisk;
243 }
244
save_kernel_build_options_to_disk(const std::string & path,const std::string & prefix,const char * buildOptions)245 static int save_kernel_build_options_to_disk(const std::string &path,
246 const std::string &prefix,
247 const char *buildOptions)
248 {
249 std::string filename =
250 get_cl_build_options_filename_with_path(path, prefix);
251 std::ofstream ofs(filename.c_str(), std::ios::binary);
252 if (!ofs.good())
253 {
254 log_info("Can't save kernel build options: %s\n", filename.c_str());
255 return -1;
256 }
257 ofs.write(buildOptions, strlen(buildOptions));
258 ofs.close();
259 log_info("Saved kernel build options to file: %s\n", filename.c_str());
260 return CL_SUCCESS;
261 }
262
save_kernel_source_to_disk(const std::string & path,const std::string & prefix,const std::string & source)263 static int save_kernel_source_to_disk(const std::string &path,
264 const std::string &prefix,
265 const std::string &source)
266 {
267 std::string filename = get_cl_source_filename_with_path(path, prefix);
268 std::ofstream ofs(filename.c_str(), std::ios::binary);
269 if (!ofs.good())
270 {
271 log_info("Can't save kernel source: %s\n", filename.c_str());
272 return -1;
273 }
274 ofs.write(source.c_str(), source.size());
275 ofs.close();
276 log_info("Saved kernel source to file: %s\n", filename.c_str());
277 return CL_SUCCESS;
278 }
279
280 static int
save_kernel_source_and_options_to_disk(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)281 save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
282 const char *const *kernelProgram,
283 const char *buildOptions)
284 {
285 int error;
286
287 std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
288 std::string kernelNamePrefix =
289 get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
290
291 // save kernel source to disk
292 error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix,
293 kernel);
294
295 // save kernel build options to disk if exists
296 if (buildOptions != NULL)
297 error |= save_kernel_build_options_to_disk(
298 gCompilationCachePath, kernelNamePrefix, buildOptions);
299
300 return error;
301 }
302
303 static std::string
get_compilation_mode_str(const CompilationMode compilationMode)304 get_compilation_mode_str(const CompilationMode compilationMode)
305 {
306 switch (compilationMode)
307 {
308 default: assert(0 && "Invalid compilation mode"); abort();
309 case kOnline: return "online";
310 case kBinary: return "binary";
311 case kSpir_v: return "spir-v";
312 }
313 }
314
get_cl_device_info_str(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfo)315 static cl_int get_cl_device_info_str(const cl_device_id device,
316 const cl_uint device_address_space_size,
317 const CompilationMode compilationMode,
318 std::string &clDeviceInfo)
319 {
320 std::string extensionsString = get_device_extensions_string(device);
321 std::string versionString = get_device_version_string(device);
322
323 std::ostringstream clDeviceInfoStream;
324 std::string file_type =
325 get_offline_compilation_file_type_str(compilationMode);
326 clDeviceInfoStream << "# OpenCL device info affecting " << file_type
327 << " offline compilation:" << std::endl
328 << "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size
329 << std::endl
330 << "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\""
331 << std::endl;
332 /* We only need the device's supported IL version(s) when compiling IL
333 * that will be loaded with clCreateProgramWithIL() */
334 if (compilationMode == kSpir_v)
335 {
336 std::string ilVersionString = get_device_il_version_string(device);
337 clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString
338 << "\"" << std::endl;
339 }
340 clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\""
341 << std::endl;
342 clDeviceInfoStream << "CL_DEVICE_IMAGE_SUPPORT="
343 << (0 == checkForImageSupport(device)) << std::endl;
344 clDeviceInfoStream << "CL_DEVICE_NAME=\"" << get_device_name(device).c_str()
345 << "\"" << std::endl;
346
347 clDeviceInfo = clDeviceInfoStream.str();
348
349 return CL_SUCCESS;
350 }
351
write_cl_device_info(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfoFilename)352 static int write_cl_device_info(const cl_device_id device,
353 const cl_uint device_address_space_size,
354 const CompilationMode compilationMode,
355 std::string &clDeviceInfoFilename)
356 {
357 std::string clDeviceInfo;
358 int error = get_cl_device_info_str(device, device_address_space_size,
359 compilationMode, clDeviceInfo);
360 if (error != CL_SUCCESS)
361 {
362 return error;
363 }
364
365 cl_uint crc = crc32(clDeviceInfo.data(), clDeviceInfo.size());
366
367 /* Get the filename for the clDeviceInfo file.
368 * Note: the file includes the hash on its content, so it is usually
369 * unnecessary to delete it. */
370 std::ostringstream clDeviceInfoFilenameStream;
371 clDeviceInfoFilenameStream << gCompilationCachePath << slash
372 << "clDeviceInfo-";
373 clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8)
374 << crc << ".txt";
375
376 clDeviceInfoFilename = clDeviceInfoFilenameStream.str();
377
378 if ((size_t)get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
379 {
380 /* The CL device info file has already been created.
381 * Nothing to do. */
382 return 0;
383 }
384
385 /* The file does not exist or its length is not as expected.
386 * Create/overwrite it. */
387 std::ofstream ofs(clDeviceInfoFilename);
388 if (!ofs.good())
389 {
390 log_info("OfflineCompiler: can't create CL device info file: %s\n",
391 clDeviceInfoFilename.c_str());
392 return -1;
393 }
394 ofs << clDeviceInfo;
395 ofs.close();
396
397 return CL_SUCCESS;
398 }
399
get_offline_compilation_command(const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename,const std::string & clDeviceInfoFilename)400 static std::string get_offline_compilation_command(
401 const cl_uint device_address_space_size,
402 const CompilationMode compilationMode, const std::string &bOptions,
403 const std::string &sourceFilename, const std::string &outputFilename,
404 const std::string &clDeviceInfoFilename)
405 {
406 std::ostringstream wrapperOptions;
407
408 wrapperOptions << gCompilationProgram
409 << " --mode=" << get_compilation_mode_str(compilationMode)
410 << " --source=" << sourceFilename
411 << " --output=" << outputFilename
412 << " --cl-device-info=" << clDeviceInfoFilename;
413
414 if (bOptions != "")
415 {
416 // Add build options passed to this function
417 wrapperOptions << " -- " << bOptions;
418 }
419
420 return wrapperOptions.str();
421 }
422
invoke_offline_compiler(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename)423 static int invoke_offline_compiler(const cl_device_id device,
424 const cl_uint device_address_space_size,
425 const CompilationMode compilationMode,
426 const std::string &bOptions,
427 const std::string &sourceFilename,
428 const std::string &outputFilename)
429 {
430 std::string runString;
431 std::string clDeviceInfoFilename;
432
433 // See cl_offline_compiler-interface.txt for a description of the
434 // format of the CL device information file generated below, and
435 // the internal command line interface for invoking the offline
436 // compiler.
437
438 cl_int err = write_cl_device_info(device, device_address_space_size,
439 compilationMode, clDeviceInfoFilename);
440 if (err != CL_SUCCESS)
441 {
442 log_error("Failed writing CL device info file\n");
443 return err;
444 }
445
446 runString = get_offline_compilation_command(
447 device_address_space_size, compilationMode, bOptions, sourceFilename,
448 outputFilename, clDeviceInfoFilename);
449
450 // execute script
451 log_info("Executing command: %s\n", runString.c_str());
452 fflush(stdout);
453 int returnCode = system(runString.c_str());
454 if (returnCode != 0)
455 {
456 log_error("ERROR: Command finished with error: 0x%x\n", returnCode);
457 return CL_COMPILE_PROGRAM_FAILURE;
458 }
459
460 return CL_SUCCESS;
461 }
462
get_first_device_id(const cl_context context,cl_device_id & device)463 static cl_int get_first_device_id(const cl_context context,
464 cl_device_id &device)
465 {
466 cl_uint numDevices = 0;
467 cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
468 sizeof(cl_uint), &numDevices, NULL);
469 test_error(error, "clGetContextInfo failed getting CL_CONTEXT_NUM_DEVICES");
470
471 if (numDevices == 0)
472 {
473 log_error("ERROR: No CL devices found\n");
474 return -1;
475 }
476
477 std::vector<cl_device_id> devices(numDevices, 0);
478 error =
479 clGetContextInfo(context, CL_CONTEXT_DEVICES,
480 numDevices * sizeof(cl_device_id), &devices[0], NULL);
481 test_error(error, "clGetContextInfo failed getting CL_CONTEXT_DEVICES");
482
483 device = devices[0];
484 return CL_SUCCESS;
485 }
486
get_device_address_bits(const cl_device_id device,cl_uint & device_address_space_size)487 static cl_int get_device_address_bits(const cl_device_id device,
488 cl_uint &device_address_space_size)
489 {
490 cl_int error =
491 clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
492 &device_address_space_size, NULL);
493 test_error(error, "Unable to obtain device address bits");
494
495 if (device_address_space_size != 32 && device_address_space_size != 64)
496 {
497 log_error("ERROR: Unexpected number of device address bits: %u\n",
498 device_address_space_size);
499 return -1;
500 }
501
502 return CL_SUCCESS;
503 }
504
get_offline_compiler_output(std::ifstream & ifs,const cl_device_id device,cl_uint deviceAddrSpaceSize,const CompilationMode compilationMode,const std::string & bOptions,const std::string & kernelPath,const std::string & kernelNamePrefix)505 static int get_offline_compiler_output(
506 std::ifstream &ifs, const cl_device_id device, cl_uint deviceAddrSpaceSize,
507 const CompilationMode compilationMode, const std::string &bOptions,
508 const std::string &kernelPath, const std::string &kernelNamePrefix)
509 {
510 std::string sourceFilename =
511 get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
512 std::string outputFilename = get_binary_filename_with_path(
513 compilationMode, deviceAddrSpaceSize, kernelPath, kernelNamePrefix);
514
515 ifs.open(outputFilename.c_str(), std::ios::binary);
516 if (!ifs.good())
517 {
518 std::string file_type =
519 get_offline_compilation_file_type_str(compilationMode);
520 if (gCompilationCacheMode == kCacheModeForceRead)
521 {
522 log_info("OfflineCompiler: can't open cached %s file: %s\n",
523 file_type.c_str(), outputFilename.c_str());
524 return -1;
525 }
526 else
527 {
528 int error = invoke_offline_compiler(device, deviceAddrSpaceSize,
529 compilationMode, bOptions,
530 sourceFilename, outputFilename);
531 if (error != CL_SUCCESS) return error;
532
533 // read output file
534 ifs.open(outputFilename.c_str(), std::ios::binary);
535 if (!ifs.good())
536 {
537 log_info("OfflineCompiler: can't read generated %s file: %s\n",
538 file_type.c_str(), outputFilename.c_str());
539 return -1;
540 }
541 }
542 }
543 return CL_SUCCESS;
544 }
545
create_single_kernel_helper_create_program_offline(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions,CompilationMode compilationMode)546 static int create_single_kernel_helper_create_program_offline(
547 cl_context context, cl_device_id device, cl_program *outProgram,
548 unsigned int numKernelLines, const char *const *kernelProgram,
549 const char *buildOptions, CompilationMode compilationMode)
550 {
551 if (kCacheModeDumpCl == gCompilationCacheMode)
552 {
553 return -1;
554 }
555
556 // Get device CL_DEVICE_ADDRESS_BITS
557 int error;
558 cl_uint device_address_space_size = 0;
559 if (device == NULL)
560 {
561 error = get_first_device_id(context, device);
562 test_error(error, "Failed to get device ID for first device");
563 }
564 error = get_device_address_bits(device, device_address_space_size);
565 if (error != CL_SUCCESS) return error;
566
567 // set build options
568 std::string bOptions;
569 bOptions += buildOptions ? std::string(buildOptions) : "";
570
571 std::string kernelName =
572 get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
573
574
575 std::ifstream ifs;
576 error = get_offline_compiler_output(ifs, device, device_address_space_size,
577 compilationMode, bOptions,
578 gCompilationCachePath, kernelName);
579 if (error != CL_SUCCESS) return error;
580
581 ifs.seekg(0, ifs.end);
582 int length = ifs.tellg();
583 ifs.seekg(0, ifs.beg);
584
585 // treat modifiedProgram as input for clCreateProgramWithBinary
586 if (compilationMode == kBinary)
587 {
588 // read binary from file:
589 std::vector<unsigned char> modifiedKernelBuf(length);
590
591 ifs.read((char *)&modifiedKernelBuf[0], length);
592 ifs.close();
593
594 size_t lengths = modifiedKernelBuf.size();
595 const unsigned char *binaries = { &modifiedKernelBuf[0] };
596 log_info("offlineCompiler: clCreateProgramWithSource replaced with "
597 "clCreateProgramWithBinary\n");
598 *outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths,
599 &binaries, NULL, &error);
600 if (*outProgram == NULL || error != CL_SUCCESS)
601 {
602 print_error(error, "clCreateProgramWithBinary failed");
603 return error;
604 }
605 }
606 // treat modifiedProgram as input for clCreateProgramWithIL
607 else if (compilationMode == kSpir_v)
608 {
609 // read spir-v from file:
610 std::vector<unsigned char> modifiedKernelBuf(length);
611
612 ifs.read((char *)&modifiedKernelBuf[0], length);
613 ifs.close();
614
615 size_t length = modifiedKernelBuf.size();
616 log_info("offlineCompiler: clCreateProgramWithSource replaced with "
617 "clCreateProgramWithIL\n");
618 if (gCoreILProgram)
619 {
620 *outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0],
621 length, &error);
622 }
623 else
624 {
625 cl_platform_id platform;
626 error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
627 sizeof(cl_platform_id), &platform, NULL);
628 test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
629
630 clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
631 clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
632 clGetExtensionFunctionAddressForPlatform(
633 platform, "clCreateProgramWithILKHR");
634 if (clCreateProgramWithILKHR == NULL)
635 {
636 log_error(
637 "ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
638 return -1;
639 }
640 *outProgram = clCreateProgramWithILKHR(
641 context, &modifiedKernelBuf[0], length, &error);
642 }
643
644 if (*outProgram == NULL || error != CL_SUCCESS)
645 {
646 if (gCoreILProgram)
647 {
648 print_error(error, "clCreateProgramWithIL failed");
649 }
650 else
651 {
652 print_error(error, "clCreateProgramWithILKHR failed");
653 }
654 return error;
655 }
656 }
657
658 return CL_SUCCESS;
659 }
660
create_single_kernel_helper_create_program(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions,CompilationMode compilationMode)661 static int create_single_kernel_helper_create_program(
662 cl_context context, cl_device_id device, cl_program *outProgram,
663 unsigned int numKernelLines, const char **kernelProgram,
664 const char *buildOptions, CompilationMode compilationMode)
665 {
666 std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
667
668 std::string filePrefix =
669 get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
670 bool shouldSaveToDisk = should_save_kernel_source_to_disk(
671 compilationMode, gCompilationCacheMode, gCompilationCachePath,
672 filePrefix);
673
674 if (shouldSaveToDisk)
675 {
676 if (CL_SUCCESS
677 != save_kernel_source_and_options_to_disk(
678 numKernelLines, kernelProgram, buildOptions))
679 {
680 log_error("Unable to dump kernel source to disk");
681 return -1;
682 }
683 }
684 if (compilationMode == kOnline)
685 {
686 int error = CL_SUCCESS;
687
688 /* Create the program object from source */
689 *outProgram = clCreateProgramWithSource(context, numKernelLines,
690 kernelProgram, NULL, &error);
691 if (*outProgram == NULL || error != CL_SUCCESS)
692 {
693 print_error(error, "clCreateProgramWithSource failed");
694 return error;
695 }
696 return CL_SUCCESS;
697 }
698 else
699 {
700 return create_single_kernel_helper_create_program_offline(
701 context, device, outProgram, numKernelLines, kernelProgram,
702 buildOptions, compilationMode);
703 }
704 }
705
create_single_kernel_helper_create_program(cl_context context,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions)706 int create_single_kernel_helper_create_program(cl_context context,
707 cl_program *outProgram,
708 unsigned int numKernelLines,
709 const char **kernelProgram,
710 const char *buildOptions)
711 {
712 return create_single_kernel_helper_create_program(
713 context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
714 gCompilationMode);
715 }
716
create_single_kernel_helper_create_program_for_device(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions)717 int create_single_kernel_helper_create_program_for_device(
718 cl_context context, cl_device_id device, cl_program *outProgram,
719 unsigned int numKernelLines, const char **kernelProgram,
720 const char *buildOptions)
721 {
722 return create_single_kernel_helper_create_program(
723 context, device, outProgram, numKernelLines, kernelProgram,
724 buildOptions, gCompilationMode);
725 }
726
create_single_kernel_helper_with_build_options(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)727 int create_single_kernel_helper_with_build_options(
728 cl_context context, cl_program *outProgram, cl_kernel *outKernel,
729 unsigned int numKernelLines, const char **kernelProgram,
730 const char *kernelName, const char *buildOptions)
731 {
732 return create_single_kernel_helper(context, outProgram, outKernel,
733 numKernelLines, kernelProgram,
734 kernelName, buildOptions);
735 }
736
737 // Creates and builds OpenCL C/C++ program, and creates a kernel
create_single_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)738 int create_single_kernel_helper(cl_context context, cl_program *outProgram,
739 cl_kernel *outKernel,
740 unsigned int numKernelLines,
741 const char **kernelProgram,
742 const char *kernelName,
743 const char *buildOptions)
744 {
745 // For the logic that automatically adds -cl-std it is much cleaner if the
746 // build options have RAII. This buffer will store the potentially updated
747 // build options, in which case buildOptions will point at the string owned
748 // by this buffer.
749 std::string build_options_internal{ buildOptions ? buildOptions : "" };
750
751 // Check the build options for the -cl-std option.
752 if (!buildOptions || !strstr(buildOptions, "-cl-std"))
753 {
754 // If the build option isn't present add it using the latest OpenCL-C
755 // version supported by the device. This allows calling code to force a
756 // particular CL C version if it is required, but also means that
757 // callers need not specify a version if they want to assume the most
758 // recent CL C.
759
760 auto version = get_max_OpenCL_C_for_context(context);
761
762 std::string cl_std{};
763 if (version >= Version(3, 0))
764 {
765 cl_std = "-cl-std=CL3.0";
766 }
767 else if (version >= Version(2, 0) && version < Version(3, 0))
768 {
769 cl_std = "-cl-std=CL2.0";
770 }
771 else
772 {
773 // If the -cl-std build option is not specified, the highest OpenCL
774 // C 1.x language version supported by each device is used when
775 // compiling the program for each device.
776 cl_std = "";
777 }
778 build_options_internal += ' ';
779 build_options_internal += cl_std;
780 buildOptions = build_options_internal.c_str();
781 }
782 int error = create_single_kernel_helper_create_program(
783 context, outProgram, numKernelLines, kernelProgram, buildOptions);
784 if (error != CL_SUCCESS)
785 {
786 log_error("Create program failed: %d, line: %d\n", error, __LINE__);
787 return error;
788 }
789
790 // Remove offline-compiler-only build options
791 std::string newBuildOptions;
792 if (buildOptions != NULL)
793 {
794 newBuildOptions = buildOptions;
795 std::string offlineCompierOptions[] = {
796 "-cl-fp16-enable", "-cl-fp64-enable", "-cl-zero-init-local-mem-vars"
797 };
798 for (auto &s : offlineCompierOptions)
799 {
800 std::string::size_type i = newBuildOptions.find(s);
801 if (i != std::string::npos) newBuildOptions.erase(i, s.length());
802 }
803 }
804 // Build program and create kernel
805 return build_program_create_kernel_helper(
806 context, outProgram, outKernel, numKernelLines, kernelProgram,
807 kernelName, newBuildOptions.c_str());
808 }
809
810 // Builds OpenCL C/C++ program and creates
build_program_create_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)811 int build_program_create_kernel_helper(
812 cl_context context, cl_program *outProgram, cl_kernel *outKernel,
813 unsigned int numKernelLines, const char **kernelProgram,
814 const char *kernelName, const char *buildOptions)
815 {
816 int error;
817 /* Compile the program */
818 int buildProgramFailed = 0;
819 int printedSource = 0;
820 error = clBuildProgram(*outProgram, 0, NULL, buildOptions, NULL, NULL);
821 if (error != CL_SUCCESS)
822 {
823 unsigned int i;
824 print_error(error, "clBuildProgram failed");
825 buildProgramFailed = 1;
826 printedSource = 1;
827 log_error("Build options: %s\n", buildOptions);
828 log_error("Original source is: ------------\n");
829 for (i = 0; i < numKernelLines; i++) log_error("%s", kernelProgram[i]);
830 }
831
832 // Verify the build status on all devices
833 cl_uint deviceCount = 0;
834 error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES,
835 sizeof(deviceCount), &deviceCount, NULL);
836 if (error != CL_SUCCESS)
837 {
838 print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
839 return error;
840 }
841
842 if (deviceCount == 0)
843 {
844 log_error("No devices found for program.\n");
845 return -1;
846 }
847
848 cl_device_id *devices =
849 (cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
850 if (NULL == devices) return -1;
851 BufferOwningPtr<cl_device_id> devicesBuf(devices);
852
853 memset(devices, 0, deviceCount * sizeof(cl_device_id));
854 error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES,
855 sizeof(cl_device_id) * deviceCount, devices, NULL);
856 if (error != CL_SUCCESS)
857 {
858 print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
859 return error;
860 }
861
862 cl_uint z;
863 bool buildFailed = false;
864 for (z = 0; z < deviceCount; z++)
865 {
866 char deviceName[4096] = "";
867 error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName),
868 deviceName, NULL);
869 if (error != CL_SUCCESS || deviceName[0] == '\0')
870 {
871 log_error("Device \"%d\" failed to return a name\n", z);
872 print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
873 }
874
875 cl_build_status buildStatus;
876 error = clGetProgramBuildInfo(*outProgram, devices[z],
877 CL_PROGRAM_BUILD_STATUS,
878 sizeof(buildStatus), &buildStatus, NULL);
879 if (error != CL_SUCCESS)
880 {
881 print_error(error,
882 "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
883 return error;
884 }
885
886 if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed
887 && deviceCount == 1)
888 {
889 buildFailed = true;
890 log_error("clBuildProgram returned an error, but buildStatus is "
891 "marked as CL_BUILD_SUCCESS.\n");
892 }
893
894 if (buildStatus != CL_BUILD_SUCCESS)
895 {
896
897 char statusString[64] = "";
898 if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
899 sprintf(statusString, "CL_BUILD_SUCCESS");
900 else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
901 sprintf(statusString, "CL_BUILD_NONE");
902 else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
903 sprintf(statusString, "CL_BUILD_ERROR");
904 else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
905 sprintf(statusString, "CL_BUILD_IN_PROGRESS");
906 else
907 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
908
909 if (buildStatus != CL_BUILD_SUCCESS)
910 log_error(
911 "Build not successful for device \"%s\", status: %s\n",
912 deviceName, statusString);
913 size_t paramSize = 0;
914 error = clGetProgramBuildInfo(*outProgram, devices[z],
915 CL_PROGRAM_BUILD_LOG, 0, NULL,
916 ¶mSize);
917 if (error != CL_SUCCESS)
918 {
919
920 print_error(
921 error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
922 return error;
923 }
924
925 std::string log;
926 log.resize(paramSize / sizeof(char));
927 error = clGetProgramBuildInfo(*outProgram, devices[z],
928 CL_PROGRAM_BUILD_LOG, paramSize,
929 &log[0], NULL);
930 if (error != CL_SUCCESS || log[0] == '\0')
931 {
932 log_error("Device %d (%s) failed to return a build log\n", z,
933 deviceName);
934 if (error)
935 {
936 print_error(
937 error,
938 "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
939 return error;
940 }
941 else
942 {
943 log_error("clGetProgramBuildInfo returned an empty log.\n");
944 return -1;
945 }
946 }
947 // In this case we've already printed out the code above.
948 if (!printedSource)
949 {
950 unsigned int i;
951 log_error("Original source is: ------------\n");
952 for (i = 0; i < numKernelLines; i++)
953 log_error("%s", kernelProgram[i]);
954 printedSource = 1;
955 }
956 log_error("Build log for device \"%s\" is: ------------\n",
957 deviceName);
958 log_error("%s\n", log.c_str());
959 log_error("\n----------\n");
960 return -1;
961 }
962 }
963
964 if (buildFailed)
965 {
966 return -1;
967 }
968
969 /* And create a kernel from it */
970 if (kernelName != NULL)
971 {
972 *outKernel = clCreateKernel(*outProgram, kernelName, &error);
973 if (*outKernel == NULL || error != CL_SUCCESS)
974 {
975 print_error(error, "Unable to create kernel");
976 return error;
977 }
978 }
979
980 return 0;
981 }
982
get_max_allowed_work_group_size(cl_context context,cl_kernel kernel,size_t * outMaxSize,size_t * outLimits)983 int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel,
984 size_t *outMaxSize, size_t *outLimits)
985 {
986 cl_device_id *devices;
987 size_t size, maxCommonSize = 0;
988 int numDevices, i, j, error;
989 cl_uint numDims;
990 size_t outSize;
991 size_t sizeLimit[] = { 1, 1, 1 };
992
993
994 /* Assume fewer than 16 devices will be returned */
995 error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &outSize);
996 test_error(error, "Unable to obtain list of devices size for context");
997 devices = (cl_device_id *)malloc(outSize);
998 BufferOwningPtr<cl_device_id> devicesBuf(devices);
999
1000 error =
1001 clGetContextInfo(context, CL_CONTEXT_DEVICES, outSize, devices, NULL);
1002 test_error(error, "Unable to obtain list of devices for context");
1003
1004 numDevices = (int)(outSize / sizeof(cl_device_id));
1005
1006 for (i = 0; i < numDevices; i++)
1007 {
1008 error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE,
1009 sizeof(size), &size, NULL);
1010 test_error(error, "Unable to obtain max work group size for device");
1011 if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
1012
1013 error = clGetKernelWorkGroupInfo(kernel, devices[i],
1014 CL_KERNEL_WORK_GROUP_SIZE,
1015 sizeof(size), &size, NULL);
1016 test_error(
1017 error,
1018 "Unable to obtain max work group size for device and kernel combo");
1019 if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
1020
1021 error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
1022 sizeof(numDims), &numDims, NULL);
1023 test_error(
1024 error,
1025 "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
1026 sizeLimit[0] = 1;
1027 error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES,
1028 numDims * sizeof(size_t), sizeLimit, NULL);
1029 test_error(error,
1030 "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
1031
1032 if (outLimits != NULL)
1033 {
1034 if (i == 0)
1035 {
1036 for (j = 0; j < 3; j++) outLimits[j] = sizeLimit[j];
1037 }
1038 else
1039 {
1040 for (j = 0; j < (int)numDims; j++)
1041 {
1042 if (sizeLimit[j] < outLimits[j])
1043 outLimits[j] = sizeLimit[j];
1044 }
1045 }
1046 }
1047 }
1048
1049 *outMaxSize = (unsigned int)maxCommonSize;
1050 return 0;
1051 }
1052
1053
get_max_allowed_1d_work_group_size_on_device(cl_device_id device,cl_kernel kernel,size_t * outSize)1054 extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device,
1055 cl_kernel kernel,
1056 size_t *outSize)
1057 {
1058 cl_uint maxDim;
1059 size_t maxWgSize;
1060 size_t *maxWgSizePerDim;
1061 int error;
1062
1063 error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
1064 sizeof(size_t), &maxWgSize, NULL);
1065 test_error(error,
1066 "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed");
1067
1068 error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
1069 sizeof(cl_uint), &maxDim, NULL);
1070 test_error(error,
1071 "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
1072 maxWgSizePerDim = (size_t *)malloc(maxDim * sizeof(size_t));
1073 if (!maxWgSizePerDim)
1074 {
1075 log_error("Unable to allocate maxWgSizePerDim\n");
1076 return -1;
1077 }
1078
1079 error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1080 maxDim * sizeof(size_t), maxWgSizePerDim, NULL);
1081 if (error != CL_SUCCESS)
1082 {
1083 log_error("clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n");
1084 free(maxWgSizePerDim);
1085 return error;
1086 }
1087
1088 // "maxWgSize" is limited to that of the first dimension.
1089 if (maxWgSize > maxWgSizePerDim[0])
1090 {
1091 maxWgSize = maxWgSizePerDim[0];
1092 }
1093
1094 free(maxWgSizePerDim);
1095
1096 *outSize = maxWgSize;
1097 return 0;
1098 }
1099
1100
get_max_common_work_group_size(cl_context context,cl_kernel kernel,size_t globalThreadSize,size_t * outMaxSize)1101 int get_max_common_work_group_size(cl_context context, cl_kernel kernel,
1102 size_t globalThreadSize, size_t *outMaxSize)
1103 {
1104 size_t sizeLimit[3];
1105 int error =
1106 get_max_allowed_work_group_size(context, kernel, outMaxSize, sizeLimit);
1107 if (error != 0) return error;
1108
1109 /* Now find the largest factor of globalThreadSize that is <= maxCommonSize
1110 */
1111 /* Note for speed, we don't need to check the range of maxCommonSize, b/c
1112 once it gets to 1, the modulo test will succeed and break the loop anyway
1113 */
1114 for (;
1115 (globalThreadSize % *outMaxSize) != 0 || (*outMaxSize > sizeLimit[0]);
1116 (*outMaxSize)--)
1117 ;
1118 return 0;
1119 }
1120
get_max_common_2D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1121 int get_max_common_2D_work_group_size(cl_context context, cl_kernel kernel,
1122 size_t *globalThreadSizes,
1123 size_t *outMaxSizes)
1124 {
1125 size_t sizeLimit[3];
1126 size_t maxSize;
1127 int error =
1128 get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
1129 if (error != 0) return error;
1130
1131 /* Now find a set of factors, multiplied together less than maxSize, but
1132 each a factor of the global sizes */
1133
1134 /* Simple case */
1135 if (globalThreadSizes[0] * globalThreadSizes[1] <= maxSize)
1136 {
1137 if (globalThreadSizes[0] <= sizeLimit[0]
1138 && globalThreadSizes[1] <= sizeLimit[1])
1139 {
1140 outMaxSizes[0] = globalThreadSizes[0];
1141 outMaxSizes[1] = globalThreadSizes[1];
1142 return 0;
1143 }
1144 }
1145
1146 size_t remainingSize, sizeForThisOne;
1147 remainingSize = maxSize;
1148 int i, j;
1149 for (i = 0; i < 2; i++)
1150 {
1151 if (globalThreadSizes[i] > remainingSize)
1152 sizeForThisOne = remainingSize;
1153 else
1154 sizeForThisOne = globalThreadSizes[i];
1155 for (; (globalThreadSizes[i] % sizeForThisOne) != 0
1156 || (sizeForThisOne > sizeLimit[i]);
1157 sizeForThisOne--)
1158 ;
1159 outMaxSizes[i] = sizeForThisOne;
1160 remainingSize = maxSize;
1161 for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
1162 }
1163
1164 return 0;
1165 }
1166
get_max_common_3D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1167 int get_max_common_3D_work_group_size(cl_context context, cl_kernel kernel,
1168 size_t *globalThreadSizes,
1169 size_t *outMaxSizes)
1170 {
1171 size_t sizeLimit[3];
1172 size_t maxSize;
1173 int error =
1174 get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
1175 if (error != 0) return error;
1176 /* Now find a set of factors, multiplied together less than maxSize, but
1177 each a factor of the global sizes */
1178
1179 /* Simple case */
1180 if (globalThreadSizes[0] * globalThreadSizes[1] * globalThreadSizes[2]
1181 <= maxSize)
1182 {
1183 if (globalThreadSizes[0] <= sizeLimit[0]
1184 && globalThreadSizes[1] <= sizeLimit[1]
1185 && globalThreadSizes[2] <= sizeLimit[2])
1186 {
1187 outMaxSizes[0] = globalThreadSizes[0];
1188 outMaxSizes[1] = globalThreadSizes[1];
1189 outMaxSizes[2] = globalThreadSizes[2];
1190 return 0;
1191 }
1192 }
1193
1194 size_t remainingSize, sizeForThisOne;
1195 remainingSize = maxSize;
1196 int i, j;
1197 for (i = 0; i < 3; i++)
1198 {
1199 if (globalThreadSizes[i] > remainingSize)
1200 sizeForThisOne = remainingSize;
1201 else
1202 sizeForThisOne = globalThreadSizes[i];
1203 for (; (globalThreadSizes[i] % sizeForThisOne) != 0
1204 || (sizeForThisOne > sizeLimit[i]);
1205 sizeForThisOne--)
1206 ;
1207 outMaxSizes[i] = sizeForThisOne;
1208 remainingSize = maxSize;
1209 for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
1210 }
1211
1212 return 0;
1213 }
1214
1215 /* Helper to determine if a device supports an image format */
is_image_format_supported(cl_context context,cl_mem_flags flags,cl_mem_object_type image_type,const cl_image_format * fmt)1216 int is_image_format_supported(cl_context context, cl_mem_flags flags,
1217 cl_mem_object_type image_type,
1218 const cl_image_format *fmt)
1219 {
1220 cl_image_format *list;
1221 cl_uint count = 0;
1222 cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128,
1223 NULL, &count);
1224 if (count == 0) return 0;
1225
1226 list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
1227 if (NULL == list)
1228 {
1229 log_error("Error: unable to allocate %ld byte buffer for image format "
1230 "list at %s:%d (err = %d)\n",
1231 count * sizeof(cl_image_format), __FILE__, __LINE__, err);
1232 return 0;
1233 }
1234 BufferOwningPtr<cl_image_format> listBuf(list);
1235
1236
1237 cl_int error = clGetSupportedImageFormats(context, flags, image_type, count,
1238 list, NULL);
1239 if (error)
1240 {
1241 log_error("Error: failed to obtain supported image type list at %s:%d "
1242 "(err = %d)\n",
1243 __FILE__, __LINE__, err);
1244 return 0;
1245 }
1246
1247 // iterate looking for a match.
1248 cl_uint i;
1249 for (i = 0; i < count; i++)
1250 {
1251 if (fmt->image_channel_data_type == list[i].image_channel_data_type
1252 && fmt->image_channel_order == list[i].image_channel_order)
1253 break;
1254 }
1255
1256 return (i < count) ? 1 : 0;
1257 }
1258
1259 size_t get_pixel_bytes(const cl_image_format *fmt);
get_pixel_bytes(const cl_image_format * fmt)1260 size_t get_pixel_bytes(const cl_image_format *fmt)
1261 {
1262 size_t chanCount;
1263 switch (fmt->image_channel_order)
1264 {
1265 case CL_R:
1266 case CL_A:
1267 case CL_Rx:
1268 case CL_INTENSITY:
1269 case CL_LUMINANCE:
1270 case CL_DEPTH: chanCount = 1; break;
1271 case CL_RG:
1272 case CL_RA:
1273 case CL_RGx: chanCount = 2; break;
1274 case CL_RGB:
1275 case CL_RGBx:
1276 case CL_sRGB:
1277 case CL_sRGBx: chanCount = 3; break;
1278 case CL_RGBA:
1279 case CL_ARGB:
1280 case CL_BGRA:
1281 case CL_sBGRA:
1282 case CL_sRGBA:
1283 #ifdef CL_1RGB_APPLE
1284 case CL_1RGB_APPLE:
1285 #endif
1286 #ifdef CL_BGR1_APPLE
1287 case CL_BGR1_APPLE:
1288 #endif
1289 chanCount = 4;
1290 break;
1291 default:
1292 log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__);
1293 abort();
1294 break;
1295 }
1296
1297 switch (fmt->image_channel_data_type)
1298 {
1299 case CL_UNORM_SHORT_565:
1300 case CL_UNORM_SHORT_555: return 2;
1301
1302 case CL_UNORM_INT_101010: return 4;
1303
1304 case CL_SNORM_INT8:
1305 case CL_UNORM_INT8:
1306 case CL_SIGNED_INT8:
1307 case CL_UNSIGNED_INT8: return chanCount;
1308
1309 case CL_SNORM_INT16:
1310 case CL_UNORM_INT16:
1311 case CL_HALF_FLOAT:
1312 case CL_SIGNED_INT16:
1313 case CL_UNSIGNED_INT16:
1314 #ifdef CL_SFIXED14_APPLE
1315 case CL_SFIXED14_APPLE:
1316 #endif
1317 return chanCount * 2;
1318
1319 case CL_SIGNED_INT32:
1320 case CL_UNSIGNED_INT32:
1321 case CL_FLOAT: return chanCount * 4;
1322
1323 default:
1324 log_error("Unknown channel data type at %s:%d!\n", __FILE__,
1325 __LINE__);
1326 abort();
1327 }
1328
1329 return 0;
1330 }
1331
verifyImageSupport(cl_device_id device)1332 test_status verifyImageSupport(cl_device_id device)
1333 {
1334 int result = checkForImageSupport(device);
1335 if (result == 0)
1336 {
1337 return TEST_PASS;
1338 }
1339 if (result == CL_IMAGE_FORMAT_NOT_SUPPORTED)
1340 {
1341 log_error("SKIPPED: Device does not supported images as required by "
1342 "this test!\n");
1343 return TEST_SKIP;
1344 }
1345 return TEST_FAIL;
1346 }
1347
checkForImageSupport(cl_device_id device)1348 int checkForImageSupport(cl_device_id device)
1349 {
1350 cl_uint i;
1351 int error;
1352
1353
1354 /* Check the device props to see if images are supported at all first */
1355 error =
1356 clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
1357 test_error(error, "Unable to query device for image support");
1358 if (i == 0)
1359 {
1360 return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1361 }
1362
1363 /* So our support is good */
1364 return 0;
1365 }
1366
checkFor3DImageSupport(cl_device_id device)1367 int checkFor3DImageSupport(cl_device_id device)
1368 {
1369 cl_uint i;
1370 int error;
1371
1372 /* Check the device props to see if images are supported at all first */
1373 error =
1374 clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
1375 test_error(error, "Unable to query device for image support");
1376 if (i == 0)
1377 {
1378 return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1379 }
1380
1381 char profile[128];
1382 error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile,
1383 NULL);
1384 test_error(error, "Unable to query device for CL_DEVICE_PROFILE");
1385 if (0 == strcmp(profile, "EMBEDDED_PROFILE"))
1386 {
1387 size_t width = -1L;
1388 size_t height = -1L;
1389 size_t depth = -1L;
1390 error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH,
1391 sizeof(width), &width, NULL);
1392 test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH");
1393 error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
1394 sizeof(height), &height, NULL);
1395 test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT");
1396 error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH,
1397 sizeof(depth), &depth, NULL);
1398 test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH");
1399
1400 if (0 == (height | width | depth)) return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1401 }
1402
1403 /* So our support is good */
1404 return 0;
1405 }
1406
checkForReadWriteImageSupport(cl_device_id device)1407 int checkForReadWriteImageSupport(cl_device_id device)
1408 {
1409 if (checkForImageSupport(device))
1410 {
1411 return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1412 }
1413
1414 auto device_cl_version = get_device_cl_version(device);
1415 if (device_cl_version >= Version(3, 0))
1416 {
1417 // In OpenCL 3.0, Read-Write images are optional.
1418 // Check if they are supported.
1419 cl_uint are_rw_images_supported{};
1420 test_error(
1421 clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
1422 sizeof(are_rw_images_supported),
1423 &are_rw_images_supported, nullptr),
1424 "clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
1425 if (0 == are_rw_images_supported)
1426 {
1427 log_info("READ_WRITE_IMAGE tests skipped, not supported.\n");
1428 return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1429 }
1430 }
1431 // READ_WRITE images are not supported on 1.X devices.
1432 else if (device_cl_version < Version(2, 0))
1433 {
1434 log_info("READ_WRITE_IMAGE tests skipped, Opencl 2.0+ is requried.");
1435 return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1436 }
1437 // Support for read-write image arguments is required
1438 // for an 2.X device if the device supports images.
1439
1440 /* So our support is good */
1441 return 0;
1442 }
1443
get_min_alignment(cl_context context)1444 size_t get_min_alignment(cl_context context)
1445 {
1446 static cl_uint align_size = 0;
1447
1448 if (0 == align_size)
1449 {
1450 cl_device_id *devices;
1451 size_t devices_size = 0;
1452 cl_uint result = 0;
1453 cl_int error;
1454 int i;
1455
1456 error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
1457 &devices_size);
1458 test_error_ret(error, "clGetContextInfo failed", 0);
1459
1460 devices = (cl_device_id *)malloc(devices_size);
1461 if (devices == NULL)
1462 {
1463 print_error(error, "malloc failed");
1464 return 0;
1465 }
1466
1467 error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size,
1468 (void *)devices, NULL);
1469 test_error_ret(error, "clGetContextInfo failed", 0);
1470
1471 for (i = 0; i < (int)(devices_size / sizeof(cl_device_id)); i++)
1472 {
1473 cl_uint alignment = 0;
1474
1475 error = clGetDeviceInfo(devices[i], CL_DEVICE_MEM_BASE_ADDR_ALIGN,
1476 sizeof(cl_uint), (void *)&alignment, NULL);
1477
1478 if (error == CL_SUCCESS)
1479 {
1480 alignment >>= 3; // convert bits to bytes
1481 result = (alignment > result) ? alignment : result;
1482 }
1483 else
1484 print_error(error, "clGetDeviceInfo failed");
1485 }
1486
1487 align_size = result;
1488 free(devices);
1489 }
1490
1491 return align_size;
1492 }
1493
get_default_rounding_mode(cl_device_id device)1494 cl_device_fp_config get_default_rounding_mode(cl_device_id device)
1495 {
1496 char profileStr[128] = "";
1497 cl_device_fp_config single = 0;
1498 int error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
1499 sizeof(single), &single, NULL);
1500 if (error)
1501 test_error_ret(error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG",
1502 0);
1503
1504 if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST;
1505
1506 if (0 == (single & CL_FP_ROUND_TO_ZERO))
1507 test_error_ret(-1,
1508 "FAILURE: device must support either "
1509 "CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST",
1510 0);
1511
1512 // Make sure we are an embedded device before allowing a pass
1513 if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profileStr),
1514 &profileStr, NULL)))
1515 test_error_ret(error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0);
1516
1517 if (strcmp(profileStr, "EMBEDDED_PROFILE"))
1518 test_error_ret(error,
1519 "FAILURE: non-EMBEDDED_PROFILE devices must support "
1520 "CL_FP_ROUND_TO_NEAREST",
1521 0);
1522
1523 return CL_FP_ROUND_TO_ZERO;
1524 }
1525
checkDeviceForQueueSupport(cl_device_id device,cl_command_queue_properties prop)1526 int checkDeviceForQueueSupport(cl_device_id device,
1527 cl_command_queue_properties prop)
1528 {
1529 cl_command_queue_properties realProps;
1530 cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
1531 sizeof(realProps), &realProps, NULL);
1532 test_error_ret(error, "FAILURE: Unable to get device queue properties", 0);
1533
1534 return (realProps & prop) ? 1 : 0;
1535 }
1536
printDeviceHeader(cl_device_id device)1537 int printDeviceHeader(cl_device_id device)
1538 {
1539 char deviceName[512], deviceVendor[512], deviceVersion[512],
1540 cLangVersion[512];
1541 int error;
1542
1543 error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName),
1544 deviceName, NULL);
1545 test_error(error, "Unable to get CL_DEVICE_NAME for device");
1546
1547 error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(deviceVendor),
1548 deviceVendor, NULL);
1549 test_error(error, "Unable to get CL_DEVICE_VENDOR for device");
1550
1551 error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(deviceVersion),
1552 deviceVersion, NULL);
1553 test_error(error, "Unable to get CL_DEVICE_VERSION for device");
1554
1555 error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
1556 sizeof(cLangVersion), cLangVersion, NULL);
1557 test_error(error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device");
1558
1559 log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute "
1560 "Device Version = %s%s%s\n",
1561 deviceName, deviceVendor, deviceVersion,
1562 (error == CL_SUCCESS) ? ", CL C Version = " : "",
1563 (error == CL_SUCCESS) ? cLangVersion : "");
1564
1565 auto version = get_device_cl_version(device);
1566 if (version >= Version(3, 0))
1567 {
1568 auto ctsVersion = get_device_info_string(
1569 device, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED);
1570 log_info("Device latest conformance version passed: %s\n",
1571 ctsVersion.c_str());
1572 }
1573
1574 return CL_SUCCESS;
1575 }
1576
get_device_cl_c_version(cl_device_id device)1577 Version get_device_cl_c_version(cl_device_id device)
1578 {
1579 auto device_cl_version = get_device_cl_version(device);
1580
1581 // The second special case is OpenCL-1.0 where CL_DEVICE_OPENCL_C_VERSION
1582 // did not exist, but since this is just the first version we can
1583 // return 1.0.
1584 if (device_cl_version == Version{ 1, 0 })
1585 {
1586 return Version{ 1, 0 };
1587 }
1588
1589 // Otherwise we know we have a 1.1 <= device_version <= 2.0 where all CL C
1590 // versions are backwards compatible, hence querying with the
1591 // CL_DEVICE_OPENCL_C_VERSION query must return the most recent supported
1592 // OpenCL C version.
1593 size_t opencl_c_version_size_in_bytes{};
1594 auto error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr,
1595 &opencl_c_version_size_in_bytes);
1596 test_error_ret(error,
1597 "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
1598 (Version{ -1, 0 }));
1599
1600 std::string opencl_c_version(opencl_c_version_size_in_bytes, '\0');
1601 error =
1602 clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
1603 opencl_c_version.size(), &opencl_c_version[0], nullptr);
1604
1605 test_error_ret(error,
1606 "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
1607 (Version{ -1, 0 }));
1608
1609 // Scrape out the major, minor pair from the string.
1610 auto major = opencl_c_version[opencl_c_version.find('.') - 1];
1611 auto minor = opencl_c_version[opencl_c_version.find('.') + 1];
1612
1613 return Version{ major - '0', minor - '0' };
1614 }
1615
get_device_latest_cl_c_version(cl_device_id device)1616 Version get_device_latest_cl_c_version(cl_device_id device)
1617 {
1618 auto device_cl_version = get_device_cl_version(device);
1619
1620 // If the device version >= 3.0 it must support the
1621 // CL_DEVICE_OPENCL_C_ALL_VERSIONS query from which we can extract the most
1622 // recent CL C version supported by the device.
1623 if (device_cl_version >= Version{ 3, 0 })
1624 {
1625 size_t opencl_c_all_versions_size_in_bytes{};
1626 auto error =
1627 clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
1628 &opencl_c_all_versions_size_in_bytes);
1629 test_error_ret(
1630 error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1631 (Version{ -1, 0 }));
1632 std::vector<cl_name_version> name_versions(
1633 opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
1634 error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
1635 opencl_c_all_versions_size_in_bytes,
1636 name_versions.data(), nullptr);
1637 test_error_ret(
1638 error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1639 (Version{ -1, 0 }));
1640
1641 Version max_supported_cl_c_version{};
1642 for (const auto &name_version : name_versions)
1643 {
1644 Version current_version{ CL_VERSION_MAJOR(name_version.version),
1645 CL_VERSION_MINOR(name_version.version) };
1646 max_supported_cl_c_version =
1647 (current_version > max_supported_cl_c_version)
1648 ? current_version
1649 : max_supported_cl_c_version;
1650 }
1651 return max_supported_cl_c_version;
1652 }
1653
1654 return get_device_cl_c_version(device);
1655 }
1656
get_max_OpenCL_C_for_context(cl_context context)1657 Version get_max_OpenCL_C_for_context(cl_context context)
1658 {
1659 // Get all the devices in the context and find the maximum
1660 // universally supported OpenCL C version.
1661 size_t devices_size_in_bytes{};
1662 auto error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr,
1663 &devices_size_in_bytes);
1664 test_error_ret(error, "clGetDeviceInfo failed for CL_CONTEXT_DEVICES",
1665 (Version{ -1, 0 }));
1666 std::vector<cl_device_id> devices(devices_size_in_bytes
1667 / sizeof(cl_device_id));
1668 error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size_in_bytes,
1669 devices.data(), nullptr);
1670 auto current_version = get_device_latest_cl_c_version(devices[0]);
1671 std::for_each(std::next(devices.begin()), devices.end(),
1672 [¤t_version](cl_device_id device) {
1673 auto device_version =
1674 get_device_latest_cl_c_version(device);
1675 // OpenCL 3.0 is not backwards compatible with 2.0.
1676 // If we have 3.0 and 2.0 in the same driver we
1677 // use 1.2.
1678 if (((device_version >= Version(2, 0)
1679 && device_version < Version(3, 0))
1680 && current_version >= Version(3, 0))
1681 || (device_version >= Version(3, 0)
1682 && (current_version >= Version(2, 0)
1683 && current_version < Version(3, 0))))
1684 {
1685 current_version = Version(1, 2);
1686 }
1687 else
1688 {
1689 current_version =
1690 (std::min)(device_version, current_version);
1691 }
1692 });
1693 return current_version;
1694 }
1695
device_supports_cl_c_version(cl_device_id device,Version version)1696 bool device_supports_cl_c_version(cl_device_id device, Version version)
1697 {
1698 auto device_cl_version = get_device_cl_version(device);
1699
1700 // In general, a device does not support an OpenCL C version if it is <=
1701 // CL_DEVICE_OPENCL_C_VERSION AND it does not appear in the
1702 // CL_DEVICE_OPENCL_C_ALL_VERSIONS query.
1703
1704 // If the device version >= 3.0 it must support the
1705 // CL_DEVICE_OPENCL_C_ALL_VERSIONS query, and the version of OpenCL C being
1706 // used must appear in the query result if it's <=
1707 // CL_DEVICE_OPENCL_C_VERSION.
1708 if (device_cl_version >= Version{ 3, 0 })
1709 {
1710 size_t opencl_c_all_versions_size_in_bytes{};
1711 auto error =
1712 clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
1713 &opencl_c_all_versions_size_in_bytes);
1714 test_error_ret(
1715 error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1716 (false));
1717 std::vector<cl_name_version> name_versions(
1718 opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
1719 error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
1720 opencl_c_all_versions_size_in_bytes,
1721 name_versions.data(), nullptr);
1722 test_error_ret(
1723 error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1724 (false));
1725
1726 for (const auto &name_version : name_versions)
1727 {
1728 Version current_version{ CL_VERSION_MAJOR(name_version.version),
1729 CL_VERSION_MINOR(name_version.version) };
1730 if (current_version == version)
1731 {
1732 return true;
1733 }
1734 }
1735 }
1736
1737 return version <= get_device_cl_c_version(device);
1738 }
1739
poll_until(unsigned timeout_ms,unsigned interval_ms,std::function<bool ()> fn)1740 bool poll_until(unsigned timeout_ms, unsigned interval_ms,
1741 std::function<bool()> fn)
1742 {
1743 unsigned time_spent_ms = 0;
1744 bool ret = false;
1745
1746 while (time_spent_ms < timeout_ms)
1747 {
1748 ret = fn();
1749 if (ret)
1750 {
1751 break;
1752 }
1753 usleep(interval_ms * 1000);
1754 time_spent_ms += interval_ms;
1755 }
1756
1757 return ret;
1758 }
1759
device_supports_double(cl_device_id device)1760 bool device_supports_double(cl_device_id device)
1761 {
1762 if (is_extension_available(device, "cl_khr_fp64"))
1763 {
1764 return true;
1765 }
1766 else
1767 {
1768 cl_device_fp_config double_fp_config;
1769 cl_int err = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG,
1770 sizeof(double_fp_config),
1771 &double_fp_config, nullptr);
1772 test_error(err,
1773 "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
1774 return double_fp_config != 0;
1775 }
1776 }
1777
device_supports_half(cl_device_id device)1778 bool device_supports_half(cl_device_id device)
1779 {
1780 return is_extension_available(device, "cl_khr_fp16");
1781 }
1782