1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "crc32.h"
17 #include "kernelHelpers.h"
18 #include "deviceInfo.h"
19 #include "errorHelpers.h"
20 #include "imageHelpers.h"
21 #include "typeWrappers.h"
22 #include "testHarness.h"
23 #include "parseParameters.h"
24
25 #include <cassert>
26 #include <vector>
27 #include <string>
28 #include <fstream>
29 #include <sstream>
30 #include <iomanip>
31 #include <mutex>
32 #include <algorithm>
33
34 #if defined(_WIN32)
35 std::string slash = "\\";
36 #else
37 std::string slash = "/";
38 #endif
39
40 static std::mutex gCompilerMutex;
41
42 static cl_int get_first_device_id(const cl_context context,
43 cl_device_id &device);
44
get_file_size(const std::string & fileName)45 long get_file_size(const std::string &fileName)
46 {
47 std::ifstream ifs(fileName.c_str(), std::ios::binary);
48 if (!ifs.good()) return 0;
49 // get length of file:
50 ifs.seekg(0, std::ios::end);
51 std::ios::pos_type length = ifs.tellg();
52 return static_cast<long>(length);
53 }
54
get_kernel_content(unsigned int numKernelLines,const char * const * kernelProgram)55 static std::string get_kernel_content(unsigned int numKernelLines,
56 const char *const *kernelProgram)
57 {
58 std::string kernel;
59 for (size_t i = 0; i < numKernelLines; ++i)
60 {
61 std::string chunk(kernelProgram[i], 0, std::string::npos);
62 kernel += chunk;
63 }
64
65 return kernel;
66 }
67
get_kernel_name(const std::string & source)68 std::string get_kernel_name(const std::string &source)
69 {
70 // Create list of kernel names
71 std::string kernelsList;
72 size_t kPos = source.find("kernel");
73 while (kPos != std::string::npos)
74 {
75 // check for '__kernel'
76 size_t pos = kPos;
77 if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_')
78 pos -= 2;
79
80 // check character before 'kernel' (white space expected)
81 size_t wsPos = source.find_last_of(" \t\r\n", pos);
82 if (wsPos == std::string::npos || wsPos + 1 == pos)
83 {
84 // check character after 'kernel' (white space expected)
85 size_t akPos = kPos + sizeof("kernel") - 1;
86 wsPos = source.find_first_of(" \t\r\n", akPos);
87 if (!(wsPos == akPos))
88 {
89 kPos = source.find("kernel", kPos + 1);
90 continue;
91 }
92
93 bool attributeFound;
94 do
95 {
96 attributeFound = false;
97 // find '(' after kernel name name
98 size_t pPos = source.find("(", akPos);
99 if (!(pPos != std::string::npos)) continue;
100
101 // check for not empty kernel name before '('
102 pos = source.find_last_not_of(" \t\r\n", pPos - 1);
103 if (!(pos != std::string::npos && pos > akPos)) continue;
104
105 // find character before kernel name
106 wsPos = source.find_last_of(" \t\r\n", pos);
107 if (!(wsPos != std::string::npos && wsPos >= akPos)) continue;
108
109 std::string name =
110 source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
111 // check for kernel attribute
112 if (name == "__attribute__")
113 {
114 attributeFound = true;
115 int pCount = 1;
116 akPos = pPos + 1;
117 while (pCount > 0 && akPos != std::string::npos)
118 {
119 akPos = source.find_first_of("()", akPos + 1);
120 if (akPos != std::string::npos)
121 {
122 if (source[akPos] == '(')
123 pCount++;
124 else
125 pCount--;
126 }
127 }
128 }
129 else
130 {
131 kernelsList += name + ".";
132 }
133 } while (attributeFound);
134 }
135 kPos = source.find("kernel", kPos + 1);
136 }
137 std::ostringstream oss;
138 if (MAX_LEN_FOR_KERNEL_LIST > 0)
139 {
140 if (kernelsList.size() > MAX_LEN_FOR_KERNEL_LIST + 1)
141 {
142 kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
143 kernelsList[kernelsList.size() - 1] = '.';
144 kernelsList[kernelsList.size() - 1] = '.';
145 }
146 oss << kernelsList;
147 }
148 return oss.str();
149 }
150
151 static std::string
get_offline_compilation_file_type_str(const CompilationMode compilationMode)152 get_offline_compilation_file_type_str(const CompilationMode compilationMode)
153 {
154 switch (compilationMode)
155 {
156 default: assert(0 && "Invalid compilation mode"); abort();
157 case kOnline:
158 assert(0 && "Invalid compilation mode for offline compilation");
159 abort();
160 case kBinary: return "binary";
161 case kSpir_v: return "SPIR-V";
162 }
163 }
164
get_unique_filename_prefix(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)165 static std::string get_unique_filename_prefix(unsigned int numKernelLines,
166 const char *const *kernelProgram,
167 const char *buildOptions)
168 {
169 std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
170 std::string kernelName = get_kernel_name(kernel);
171 cl_uint kernelCrc = crc32(kernel.data(), kernel.size());
172 std::ostringstream oss;
173 oss << kernelName << std::hex << std::setfill('0') << std::setw(8)
174 << kernelCrc;
175 if (buildOptions)
176 {
177 cl_uint bOptionsCrc = crc32(buildOptions, strlen(buildOptions));
178 oss << '.' << std::hex << std::setfill('0') << std::setw(8)
179 << bOptionsCrc;
180 }
181 return oss.str();
182 }
183
184
185 static std::string
get_cl_build_options_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)186 get_cl_build_options_filename_with_path(const std::string &filePath,
187 const std::string &fileNamePrefix)
188 {
189 return filePath + slash + fileNamePrefix + ".options";
190 }
191
192 static std::string
get_cl_source_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)193 get_cl_source_filename_with_path(const std::string &filePath,
194 const std::string &fileNamePrefix)
195 {
196 return filePath + slash + fileNamePrefix + ".cl";
197 }
198
199 static std::string
get_binary_filename_with_path(CompilationMode mode,cl_uint deviceAddrSpaceSize,const std::string & filePath,const std::string & fileNamePrefix)200 get_binary_filename_with_path(CompilationMode mode, cl_uint deviceAddrSpaceSize,
201 const std::string &filePath,
202 const std::string &fileNamePrefix)
203 {
204 std::string binaryFilename = filePath + slash + fileNamePrefix;
205 if (kSpir_v == mode)
206 {
207 std::ostringstream extension;
208 extension << ".spv" << deviceAddrSpaceSize;
209 binaryFilename += extension.str();
210 }
211 return binaryFilename;
212 }
213
file_exist_on_disk(const std::string & filePath,const std::string & fileName)214 static bool file_exist_on_disk(const std::string &filePath,
215 const std::string &fileName)
216 {
217 std::string fileNameWithPath = filePath + slash + fileName;
218 bool exist = false;
219 std::ifstream ifs;
220
221 ifs.open(fileNameWithPath.c_str(), std::ios::binary);
222 if (ifs.good()) exist = true;
223 ifs.close();
224 return exist;
225 }
226
should_save_kernel_source_to_disk(CompilationMode mode,CompilationCacheMode cacheMode,const std::string & binaryPath,const std::string & binaryName)227 static bool should_save_kernel_source_to_disk(CompilationMode mode,
228 CompilationCacheMode cacheMode,
229 const std::string &binaryPath,
230 const std::string &binaryName)
231 {
232 bool saveToDisk = false;
233 if (cacheMode == kCacheModeDumpCl
234 || (cacheMode == kCacheModeOverwrite && mode != kOnline))
235 {
236 saveToDisk = true;
237 }
238 if (cacheMode == kCacheModeCompileIfAbsent && mode != kOnline)
239 {
240 saveToDisk = !file_exist_on_disk(binaryPath, binaryName);
241 }
242 return saveToDisk;
243 }
244
save_kernel_build_options_to_disk(const std::string & path,const std::string & prefix,const char * buildOptions)245 static int save_kernel_build_options_to_disk(const std::string &path,
246 const std::string &prefix,
247 const char *buildOptions)
248 {
249 std::string filename =
250 get_cl_build_options_filename_with_path(path, prefix);
251 std::ofstream ofs(filename.c_str(), std::ios::binary);
252 if (!ofs.good())
253 {
254 log_info("Can't save kernel build options: %s\n", filename.c_str());
255 return -1;
256 }
257 ofs.write(buildOptions, strlen(buildOptions));
258 ofs.close();
259 log_info("Saved kernel build options to file: %s\n", filename.c_str());
260 return CL_SUCCESS;
261 }
262
save_kernel_source_to_disk(const std::string & path,const std::string & prefix,const std::string & source)263 static int save_kernel_source_to_disk(const std::string &path,
264 const std::string &prefix,
265 const std::string &source)
266 {
267 std::string filename = get_cl_source_filename_with_path(path, prefix);
268 std::ofstream ofs(filename.c_str(), std::ios::binary);
269 if (!ofs.good())
270 {
271 log_info("Can't save kernel source: %s\n", filename.c_str());
272 return -1;
273 }
274 ofs.write(source.c_str(), source.size());
275 ofs.close();
276 log_info("Saved kernel source to file: %s\n", filename.c_str());
277 return CL_SUCCESS;
278 }
279
280 static int
save_kernel_source_and_options_to_disk(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)281 save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
282 const char *const *kernelProgram,
283 const char *buildOptions)
284 {
285 int error;
286
287 std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
288 std::string kernelNamePrefix =
289 get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
290
291 // save kernel source to disk
292 error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix,
293 kernel);
294
295 // save kernel build options to disk if exists
296 if (buildOptions != NULL)
297 error |= save_kernel_build_options_to_disk(
298 gCompilationCachePath, kernelNamePrefix, buildOptions);
299
300 return error;
301 }
302
303 static std::string
get_compilation_mode_str(const CompilationMode compilationMode)304 get_compilation_mode_str(const CompilationMode compilationMode)
305 {
306 switch (compilationMode)
307 {
308 default: assert(0 && "Invalid compilation mode"); abort();
309 case kOnline: return "online";
310 case kBinary: return "binary";
311 case kSpir_v: return "spir-v";
312 }
313 }
314
get_cl_device_info_str(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfo)315 static cl_int get_cl_device_info_str(const cl_device_id device,
316 const cl_uint device_address_space_size,
317 const CompilationMode compilationMode,
318 std::string &clDeviceInfo)
319 {
320 std::string extensionsString = get_device_extensions_string(device);
321 std::string versionString = get_device_version_string(device);
322
323 std::ostringstream clDeviceInfoStream;
324 std::string file_type =
325 get_offline_compilation_file_type_str(compilationMode);
326 clDeviceInfoStream << "# OpenCL device info affecting " << file_type
327 << " offline compilation:" << std::endl
328 << "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size
329 << std::endl
330 << "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\""
331 << std::endl;
332 /* We only need the device's supported IL version(s) when compiling IL
333 * that will be loaded with clCreateProgramWithIL() */
334 if (compilationMode == kSpir_v)
335 {
336 std::string ilVersionString = get_device_il_version_string(device);
337 clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString
338 << "\"" << std::endl;
339 }
340 clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\""
341 << std::endl;
342 clDeviceInfoStream << "CL_DEVICE_IMAGE_SUPPORT="
343 << (0 == checkForImageSupport(device)) << std::endl;
344 clDeviceInfoStream << "CL_DEVICE_NAME=\"" << get_device_name(device).c_str()
345 << "\"" << std::endl;
346
347 clDeviceInfo = clDeviceInfoStream.str();
348
349 return CL_SUCCESS;
350 }
351
write_cl_device_info(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfoFilename)352 static int write_cl_device_info(const cl_device_id device,
353 const cl_uint device_address_space_size,
354 const CompilationMode compilationMode,
355 std::string &clDeviceInfoFilename)
356 {
357 std::string clDeviceInfo;
358 int error = get_cl_device_info_str(device, device_address_space_size,
359 compilationMode, clDeviceInfo);
360 if (error != CL_SUCCESS)
361 {
362 return error;
363 }
364
365 cl_uint crc = crc32(clDeviceInfo.data(), clDeviceInfo.size());
366
367 /* Get the filename for the clDeviceInfo file.
368 * Note: the file includes the hash on its content, so it is usually
369 * unnecessary to delete it. */
370 std::ostringstream clDeviceInfoFilenameStream;
371 clDeviceInfoFilenameStream << gCompilationCachePath << slash
372 << "clDeviceInfo-";
373 clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8)
374 << crc << ".txt";
375
376 clDeviceInfoFilename = clDeviceInfoFilenameStream.str();
377
378 if ((size_t)get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
379 {
380 /* The CL device info file has already been created.
381 * Nothing to do. */
382 return 0;
383 }
384
385 /* The file does not exist or its length is not as expected.
386 * Create/overwrite it. */
387 std::ofstream ofs(clDeviceInfoFilename);
388 if (!ofs.good())
389 {
390 log_info("OfflineCompiler: can't create CL device info file: %s\n",
391 clDeviceInfoFilename.c_str());
392 return -1;
393 }
394 ofs << clDeviceInfo;
395 ofs.close();
396
397 return CL_SUCCESS;
398 }
399
get_offline_compilation_command(const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename,const std::string & clDeviceInfoFilename)400 static std::string get_offline_compilation_command(
401 const cl_uint device_address_space_size,
402 const CompilationMode compilationMode, const std::string &bOptions,
403 const std::string &sourceFilename, const std::string &outputFilename,
404 const std::string &clDeviceInfoFilename)
405 {
406 std::ostringstream wrapperOptions;
407
408 wrapperOptions << gCompilationProgram
409 << " --mode=" << get_compilation_mode_str(compilationMode)
410 << " --source=" << sourceFilename
411 << " --output=" << outputFilename
412 << " --cl-device-info=" << clDeviceInfoFilename;
413
414 if (bOptions != "")
415 {
416 // Add build options passed to this function
417 wrapperOptions << " -- " << bOptions;
418 }
419
420 return wrapperOptions.str();
421 }
422
invoke_offline_compiler(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename)423 static int invoke_offline_compiler(const cl_device_id device,
424 const cl_uint device_address_space_size,
425 const CompilationMode compilationMode,
426 const std::string &bOptions,
427 const std::string &sourceFilename,
428 const std::string &outputFilename)
429 {
430 std::string runString;
431 std::string clDeviceInfoFilename;
432
433 // See cl_offline_compiler-interface.txt for a description of the
434 // format of the CL device information file generated below, and
435 // the internal command line interface for invoking the offline
436 // compiler.
437
438 cl_int err = write_cl_device_info(device, device_address_space_size,
439 compilationMode, clDeviceInfoFilename);
440 if (err != CL_SUCCESS)
441 {
442 log_error("Failed writing CL device info file\n");
443 return err;
444 }
445
446 runString = get_offline_compilation_command(
447 device_address_space_size, compilationMode, bOptions, sourceFilename,
448 outputFilename, clDeviceInfoFilename);
449
450 // execute script
451 log_info("Executing command: %s\n", runString.c_str());
452 fflush(stdout);
453 int returnCode = system(runString.c_str());
454 if (returnCode != 0)
455 {
456 log_error("ERROR: Command finished with error: 0x%x\n", returnCode);
457 return CL_COMPILE_PROGRAM_FAILURE;
458 }
459
460 return CL_SUCCESS;
461 }
462
get_first_device_id(const cl_context context,cl_device_id & device)463 static cl_int get_first_device_id(const cl_context context,
464 cl_device_id &device)
465 {
466 cl_uint numDevices = 0;
467 cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
468 sizeof(cl_uint), &numDevices, NULL);
469 test_error(error, "clGetContextInfo failed getting CL_CONTEXT_NUM_DEVICES");
470
471 if (numDevices == 0)
472 {
473 log_error("ERROR: No CL devices found\n");
474 return -1;
475 }
476
477 std::vector<cl_device_id> devices(numDevices, 0);
478 error =
479 clGetContextInfo(context, CL_CONTEXT_DEVICES,
480 numDevices * sizeof(cl_device_id), &devices[0], NULL);
481 test_error(error, "clGetContextInfo failed getting CL_CONTEXT_DEVICES");
482
483 device = devices[0];
484 return CL_SUCCESS;
485 }
486
get_device_address_bits(const cl_device_id device,cl_uint & device_address_space_size)487 static cl_int get_device_address_bits(const cl_device_id device,
488 cl_uint &device_address_space_size)
489 {
490 cl_int error =
491 clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
492 &device_address_space_size, NULL);
493 test_error(error, "Unable to obtain device address bits");
494
495 if (device_address_space_size != 32 && device_address_space_size != 64)
496 {
497 log_error("ERROR: Unexpected number of device address bits: %u\n",
498 device_address_space_size);
499 return -1;
500 }
501
502 return CL_SUCCESS;
503 }
504
get_offline_compiler_output(std::ifstream & ifs,const cl_device_id device,cl_uint deviceAddrSpaceSize,const CompilationMode compilationMode,const std::string & bOptions,const std::string & kernelPath,const std::string & kernelNamePrefix)505 static int get_offline_compiler_output(
506 std::ifstream &ifs, const cl_device_id device, cl_uint deviceAddrSpaceSize,
507 const CompilationMode compilationMode, const std::string &bOptions,
508 const std::string &kernelPath, const std::string &kernelNamePrefix)
509 {
510 std::string sourceFilename =
511 get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
512 std::string outputFilename = get_binary_filename_with_path(
513 compilationMode, deviceAddrSpaceSize, kernelPath, kernelNamePrefix);
514
515 ifs.open(outputFilename.c_str(), std::ios::binary);
516 if (!ifs.good())
517 {
518 std::string file_type =
519 get_offline_compilation_file_type_str(compilationMode);
520 if (gCompilationCacheMode == kCacheModeForceRead)
521 {
522 log_info("OfflineCompiler: can't open cached %s file: %s\n",
523 file_type.c_str(), outputFilename.c_str());
524 return -1;
525 }
526 else
527 {
528 int error = invoke_offline_compiler(device, deviceAddrSpaceSize,
529 compilationMode, bOptions,
530 sourceFilename, outputFilename);
531 if (error != CL_SUCCESS) return error;
532
533 // open output file for reading
534 ifs.open(outputFilename.c_str(), std::ios::binary);
535 if (!ifs.good())
536 {
537 log_info("OfflineCompiler: can't read generated %s file: %s\n",
538 file_type.c_str(), outputFilename.c_str());
539 return -1;
540 }
541 }
542 }
543
544 if (compilationMode == kSpir_v && !gDisableSPIRVValidation)
545 {
546 std::string runString = gSPIRVValidator + " " + outputFilename;
547
548 int returnCode = system(runString.c_str());
549 if (returnCode == -1)
550 {
551 log_error("Error: failed to invoke SPIR-V validator\n");
552 return CL_COMPILE_PROGRAM_FAILURE;
553 }
554 else if (returnCode != 0)
555 {
556 log_error(
557 "Failed to validate SPIR-V file %s: system() returned 0x%x\n",
558 outputFilename.c_str(), returnCode);
559 return CL_COMPILE_PROGRAM_FAILURE;
560 }
561 }
562
563 return CL_SUCCESS;
564 }
565
create_single_kernel_helper_create_program_offline(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions,CompilationMode compilationMode)566 static int create_single_kernel_helper_create_program_offline(
567 cl_context context, cl_device_id device, cl_program *outProgram,
568 unsigned int numKernelLines, const char *const *kernelProgram,
569 const char *buildOptions, CompilationMode compilationMode)
570 {
571 if (kCacheModeDumpCl == gCompilationCacheMode)
572 {
573 return -1;
574 }
575
576 // Get device CL_DEVICE_ADDRESS_BITS
577 int error;
578 cl_uint device_address_space_size = 0;
579 if (device == NULL)
580 {
581 error = get_first_device_id(context, device);
582 test_error(error, "Failed to get device ID for first device");
583 }
584 error = get_device_address_bits(device, device_address_space_size);
585 if (error != CL_SUCCESS) return error;
586
587 // set build options
588 std::string bOptions;
589 bOptions += buildOptions ? std::string(buildOptions) : "";
590
591 std::string kernelName =
592 get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
593
594
595 std::ifstream ifs;
596 error = get_offline_compiler_output(ifs, device, device_address_space_size,
597 compilationMode, bOptions,
598 gCompilationCachePath, kernelName);
599 if (error != CL_SUCCESS) return error;
600
601 ifs.seekg(0, ifs.end);
602 size_t length = static_cast<size_t>(ifs.tellg());
603 ifs.seekg(0, ifs.beg);
604
605 // treat modifiedProgram as input for clCreateProgramWithBinary
606 if (compilationMode == kBinary)
607 {
608 // read binary from file:
609 std::vector<unsigned char> modifiedKernelBuf(length);
610
611 ifs.read((char *)&modifiedKernelBuf[0], length);
612 ifs.close();
613
614 size_t lengths = modifiedKernelBuf.size();
615 const unsigned char *binaries = { &modifiedKernelBuf[0] };
616 log_info("offlineCompiler: clCreateProgramWithSource replaced with "
617 "clCreateProgramWithBinary\n");
618 *outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths,
619 &binaries, NULL, &error);
620 if (*outProgram == NULL || error != CL_SUCCESS)
621 {
622 print_error(error, "clCreateProgramWithBinary failed");
623 return error;
624 }
625 }
626 // treat modifiedProgram as input for clCreateProgramWithIL
627 else if (compilationMode == kSpir_v)
628 {
629 // read spir-v from file:
630 std::vector<unsigned char> modifiedKernelBuf(length);
631
632 ifs.read((char *)&modifiedKernelBuf[0], length);
633 ifs.close();
634
635 size_t length = modifiedKernelBuf.size();
636 log_info("offlineCompiler: clCreateProgramWithSource replaced with "
637 "clCreateProgramWithIL\n");
638 if (gCoreILProgram)
639 {
640 *outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0],
641 length, &error);
642 }
643 else
644 {
645 cl_platform_id platform;
646 error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
647 sizeof(cl_platform_id), &platform, NULL);
648 test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
649
650 clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
651 clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
652 clGetExtensionFunctionAddressForPlatform(
653 platform, "clCreateProgramWithILKHR");
654 if (clCreateProgramWithILKHR == NULL)
655 {
656 log_error(
657 "ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
658 return -1;
659 }
660 *outProgram = clCreateProgramWithILKHR(
661 context, &modifiedKernelBuf[0], length, &error);
662 }
663
664 if (*outProgram == NULL || error != CL_SUCCESS)
665 {
666 if (gCoreILProgram)
667 {
668 print_error(error, "clCreateProgramWithIL failed");
669 }
670 else
671 {
672 print_error(error, "clCreateProgramWithILKHR failed");
673 }
674 return error;
675 }
676 }
677
678 return CL_SUCCESS;
679 }
680
create_single_kernel_helper_create_program(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions,CompilationMode compilationMode)681 static int create_single_kernel_helper_create_program(
682 cl_context context, cl_device_id device, cl_program *outProgram,
683 unsigned int numKernelLines, const char **kernelProgram,
684 const char *buildOptions, CompilationMode compilationMode)
685 {
686 std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
687
688 std::string filePrefix =
689 get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
690 bool shouldSaveToDisk = should_save_kernel_source_to_disk(
691 compilationMode, gCompilationCacheMode, gCompilationCachePath,
692 filePrefix);
693
694 if (shouldSaveToDisk)
695 {
696 if (CL_SUCCESS
697 != save_kernel_source_and_options_to_disk(
698 numKernelLines, kernelProgram, buildOptions))
699 {
700 log_error("Unable to dump kernel source to disk");
701 return -1;
702 }
703 }
704 if (compilationMode == kOnline)
705 {
706 int error = CL_SUCCESS;
707
708 /* Create the program object from source */
709 *outProgram = clCreateProgramWithSource(context, numKernelLines,
710 kernelProgram, NULL, &error);
711 if (*outProgram == NULL || error != CL_SUCCESS)
712 {
713 print_error(error, "clCreateProgramWithSource failed");
714 return error;
715 }
716 return CL_SUCCESS;
717 }
718 else
719 {
720 return create_single_kernel_helper_create_program_offline(
721 context, device, outProgram, numKernelLines, kernelProgram,
722 buildOptions, compilationMode);
723 }
724 }
725
create_single_kernel_helper_create_program(cl_context context,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions)726 int create_single_kernel_helper_create_program(cl_context context,
727 cl_program *outProgram,
728 unsigned int numKernelLines,
729 const char **kernelProgram,
730 const char *buildOptions)
731 {
732 return create_single_kernel_helper_create_program(
733 context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
734 gCompilationMode);
735 }
736
create_single_kernel_helper_create_program_for_device(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions)737 int create_single_kernel_helper_create_program_for_device(
738 cl_context context, cl_device_id device, cl_program *outProgram,
739 unsigned int numKernelLines, const char **kernelProgram,
740 const char *buildOptions)
741 {
742 return create_single_kernel_helper_create_program(
743 context, device, outProgram, numKernelLines, kernelProgram,
744 buildOptions, gCompilationMode);
745 }
746
create_single_kernel_helper_with_build_options(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)747 int create_single_kernel_helper_with_build_options(
748 cl_context context, cl_program *outProgram, cl_kernel *outKernel,
749 unsigned int numKernelLines, const char **kernelProgram,
750 const char *kernelName, const char *buildOptions)
751 {
752 return create_single_kernel_helper(context, outProgram, outKernel,
753 numKernelLines, kernelProgram,
754 kernelName, buildOptions);
755 }
756
757 // Creates and builds OpenCL C/C++ program, and creates a kernel
create_single_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)758 int create_single_kernel_helper(cl_context context, cl_program *outProgram,
759 cl_kernel *outKernel,
760 unsigned int numKernelLines,
761 const char **kernelProgram,
762 const char *kernelName,
763 const char *buildOptions)
764 {
765 // For the logic that automatically adds -cl-std it is much cleaner if the
766 // build options have RAII. This buffer will store the potentially updated
767 // build options, in which case buildOptions will point at the string owned
768 // by this buffer.
769 std::string build_options_internal{ buildOptions ? buildOptions : "" };
770
771 // Check the build options for the -cl-std option.
772 if (!buildOptions || !strstr(buildOptions, "-cl-std"))
773 {
774 // If the build option isn't present add it using the latest OpenCL-C
775 // version supported by the device. This allows calling code to force a
776 // particular CL C version if it is required, but also means that
777 // callers need not specify a version if they want to assume the most
778 // recent CL C.
779
780 auto version = get_max_OpenCL_C_for_context(context);
781
782 std::string cl_std{};
783 if (version >= Version(3, 0))
784 {
785 cl_std = "-cl-std=CL3.0";
786 }
787 else if (version >= Version(2, 0) && version < Version(3, 0))
788 {
789 cl_std = "-cl-std=CL2.0";
790 }
791 else
792 {
793 // If the -cl-std build option is not specified, the highest OpenCL
794 // C 1.x language version supported by each device is used when
795 // compiling the program for each device.
796 cl_std = "";
797 }
798 build_options_internal += ' ';
799 build_options_internal += cl_std;
800 buildOptions = build_options_internal.c_str();
801 }
802 int error = create_single_kernel_helper_create_program(
803 context, outProgram, numKernelLines, kernelProgram, buildOptions);
804 if (error != CL_SUCCESS)
805 {
806 log_error("Create program failed: %d, line: %d\n", error, __LINE__);
807 return error;
808 }
809
810 // Remove offline-compiler-only build options
811 std::string newBuildOptions;
812 if (buildOptions != NULL)
813 {
814 newBuildOptions = buildOptions;
815 std::string offlineCompierOptions[] = {
816 "-cl-fp16-enable", "-cl-fp64-enable", "-cl-zero-init-local-mem-vars"
817 };
818 for (auto &s : offlineCompierOptions)
819 {
820 std::string::size_type i = newBuildOptions.find(s);
821 if (i != std::string::npos) newBuildOptions.erase(i, s.length());
822 }
823 }
824 // Build program and create kernel
825 return build_program_create_kernel_helper(
826 context, outProgram, outKernel, numKernelLines, kernelProgram,
827 kernelName, newBuildOptions.c_str());
828 }
829
830 // Builds OpenCL C/C++ program and creates
build_program_create_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)831 int build_program_create_kernel_helper(
832 cl_context context, cl_program *outProgram, cl_kernel *outKernel,
833 unsigned int numKernelLines, const char **kernelProgram,
834 const char *kernelName, const char *buildOptions)
835 {
836 int error;
837 /* Compile the program */
838 int buildProgramFailed = 0;
839 int printedSource = 0;
840 error = clBuildProgram(*outProgram, 0, NULL, buildOptions, NULL, NULL);
841 if (error != CL_SUCCESS)
842 {
843 unsigned int i;
844 print_error(error, "clBuildProgram failed");
845 buildProgramFailed = 1;
846 printedSource = 1;
847 log_error("Build options: %s\n", buildOptions);
848 log_error("Original source is: ------------\n");
849 for (i = 0; i < numKernelLines; i++) log_error("%s", kernelProgram[i]);
850 }
851
852 // Verify the build status on all devices
853 cl_uint deviceCount = 0;
854 error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES,
855 sizeof(deviceCount), &deviceCount, NULL);
856 if (error != CL_SUCCESS)
857 {
858 print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
859 return error;
860 }
861
862 if (deviceCount == 0)
863 {
864 log_error("No devices found for program.\n");
865 return -1;
866 }
867
868 cl_device_id *devices =
869 (cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
870 if (NULL == devices) return -1;
871 BufferOwningPtr<cl_device_id> devicesBuf(devices);
872
873 memset(devices, 0, deviceCount * sizeof(cl_device_id));
874 error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES,
875 sizeof(cl_device_id) * deviceCount, devices, NULL);
876 if (error != CL_SUCCESS)
877 {
878 print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
879 return error;
880 }
881
882 cl_uint z;
883 bool buildFailed = false;
884 for (z = 0; z < deviceCount; z++)
885 {
886 char deviceName[4096] = "";
887 error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName),
888 deviceName, NULL);
889 if (error != CL_SUCCESS || deviceName[0] == '\0')
890 {
891 log_error("Device \"%d\" failed to return a name\n", z);
892 print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
893 }
894
895 cl_build_status buildStatus;
896 error = clGetProgramBuildInfo(*outProgram, devices[z],
897 CL_PROGRAM_BUILD_STATUS,
898 sizeof(buildStatus), &buildStatus, NULL);
899 if (error != CL_SUCCESS)
900 {
901 print_error(error,
902 "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
903 return error;
904 }
905
906 if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed
907 && deviceCount == 1)
908 {
909 buildFailed = true;
910 log_error("clBuildProgram returned an error, but buildStatus is "
911 "marked as CL_BUILD_SUCCESS.\n");
912 }
913
914 if (buildStatus != CL_BUILD_SUCCESS)
915 {
916
917 char statusString[64] = "";
918 if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
919 sprintf(statusString, "CL_BUILD_SUCCESS");
920 else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
921 sprintf(statusString, "CL_BUILD_NONE");
922 else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
923 sprintf(statusString, "CL_BUILD_ERROR");
924 else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
925 sprintf(statusString, "CL_BUILD_IN_PROGRESS");
926 else
927 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
928
929 if (buildStatus != CL_BUILD_SUCCESS)
930 log_error(
931 "Build not successful for device \"%s\", status: %s\n",
932 deviceName, statusString);
933 size_t paramSize = 0;
934 error = clGetProgramBuildInfo(*outProgram, devices[z],
935 CL_PROGRAM_BUILD_LOG, 0, NULL,
936 ¶mSize);
937 if (error != CL_SUCCESS)
938 {
939
940 print_error(
941 error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
942 return error;
943 }
944
945 std::string log;
946 log.resize(paramSize / sizeof(char));
947 error = clGetProgramBuildInfo(*outProgram, devices[z],
948 CL_PROGRAM_BUILD_LOG, paramSize,
949 &log[0], NULL);
950 if (error != CL_SUCCESS || log[0] == '\0')
951 {
952 log_error("Device %d (%s) failed to return a build log\n", z,
953 deviceName);
954 if (error)
955 {
956 print_error(
957 error,
958 "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
959 return error;
960 }
961 else
962 {
963 log_error("clGetProgramBuildInfo returned an empty log.\n");
964 return -1;
965 }
966 }
967 // In this case we've already printed out the code above.
968 if (!printedSource)
969 {
970 unsigned int i;
971 log_error("Original source is: ------------\n");
972 for (i = 0; i < numKernelLines; i++)
973 log_error("%s", kernelProgram[i]);
974 printedSource = 1;
975 }
976 log_error("Build log for device \"%s\" is: ------------\n",
977 deviceName);
978 log_error("%s\n", log.c_str());
979 log_error("\n----------\n");
980 return -1;
981 }
982 }
983
984 if (buildFailed)
985 {
986 return -1;
987 }
988
989 /* And create a kernel from it */
990 if (kernelName != NULL)
991 {
992 *outKernel = clCreateKernel(*outProgram, kernelName, &error);
993 if (*outKernel == NULL || error != CL_SUCCESS)
994 {
995 print_error(error, "Unable to create kernel");
996 return error;
997 }
998 }
999
1000 return 0;
1001 }
1002
get_max_allowed_work_group_size(cl_context context,cl_kernel kernel,size_t * outMaxSize,size_t * outLimits)1003 int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel,
1004 size_t *outMaxSize, size_t *outLimits)
1005 {
1006 cl_device_id *devices;
1007 size_t size, maxCommonSize = 0;
1008 int numDevices, i, j, error;
1009 cl_uint numDims;
1010 size_t outSize;
1011 size_t sizeLimit[] = { 1, 1, 1 };
1012
1013
1014 /* Assume fewer than 16 devices will be returned */
1015 error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &outSize);
1016 test_error(error, "Unable to obtain list of devices size for context");
1017 devices = (cl_device_id *)malloc(outSize);
1018 BufferOwningPtr<cl_device_id> devicesBuf(devices);
1019
1020 error =
1021 clGetContextInfo(context, CL_CONTEXT_DEVICES, outSize, devices, NULL);
1022 test_error(error, "Unable to obtain list of devices for context");
1023
1024 numDevices = (int)(outSize / sizeof(cl_device_id));
1025
1026 for (i = 0; i < numDevices; i++)
1027 {
1028 error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE,
1029 sizeof(size), &size, NULL);
1030 test_error(error, "Unable to obtain max work group size for device");
1031 if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
1032
1033 error = clGetKernelWorkGroupInfo(kernel, devices[i],
1034 CL_KERNEL_WORK_GROUP_SIZE,
1035 sizeof(size), &size, NULL);
1036 test_error(
1037 error,
1038 "Unable to obtain max work group size for device and kernel combo");
1039 if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
1040
1041 error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
1042 sizeof(numDims), &numDims, NULL);
1043 test_error(
1044 error,
1045 "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
1046 sizeLimit[0] = 1;
1047 error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES,
1048 numDims * sizeof(size_t), sizeLimit, NULL);
1049 test_error(error,
1050 "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
1051
1052 if (outLimits != NULL)
1053 {
1054 if (i == 0)
1055 {
1056 for (j = 0; j < 3; j++) outLimits[j] = sizeLimit[j];
1057 }
1058 else
1059 {
1060 for (j = 0; j < (int)numDims; j++)
1061 {
1062 if (sizeLimit[j] < outLimits[j])
1063 outLimits[j] = sizeLimit[j];
1064 }
1065 }
1066 }
1067 }
1068
1069 *outMaxSize = (unsigned int)maxCommonSize;
1070 return 0;
1071 }
1072
1073
get_max_allowed_1d_work_group_size_on_device(cl_device_id device,cl_kernel kernel,size_t * outSize)1074 extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device,
1075 cl_kernel kernel,
1076 size_t *outSize)
1077 {
1078 cl_uint maxDim;
1079 size_t maxWgSize;
1080 size_t *maxWgSizePerDim;
1081 int error;
1082
1083 error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
1084 sizeof(size_t), &maxWgSize, NULL);
1085 test_error(error,
1086 "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed");
1087
1088 error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
1089 sizeof(cl_uint), &maxDim, NULL);
1090 test_error(error,
1091 "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
1092 maxWgSizePerDim = (size_t *)malloc(maxDim * sizeof(size_t));
1093 if (!maxWgSizePerDim)
1094 {
1095 log_error("Unable to allocate maxWgSizePerDim\n");
1096 return -1;
1097 }
1098
1099 error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1100 maxDim * sizeof(size_t), maxWgSizePerDim, NULL);
1101 if (error != CL_SUCCESS)
1102 {
1103 log_error("clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n");
1104 free(maxWgSizePerDim);
1105 return error;
1106 }
1107
1108 // "maxWgSize" is limited to that of the first dimension.
1109 if (maxWgSize > maxWgSizePerDim[0])
1110 {
1111 maxWgSize = maxWgSizePerDim[0];
1112 }
1113
1114 free(maxWgSizePerDim);
1115
1116 *outSize = maxWgSize;
1117 return 0;
1118 }
1119
1120
get_max_common_work_group_size(cl_context context,cl_kernel kernel,size_t globalThreadSize,size_t * outMaxSize)1121 int get_max_common_work_group_size(cl_context context, cl_kernel kernel,
1122 size_t globalThreadSize, size_t *outMaxSize)
1123 {
1124 size_t sizeLimit[3];
1125 int error =
1126 get_max_allowed_work_group_size(context, kernel, outMaxSize, sizeLimit);
1127 if (error != 0) return error;
1128
1129 /* Now find the largest factor of globalThreadSize that is <= maxCommonSize
1130 */
1131 /* Note for speed, we don't need to check the range of maxCommonSize, b/c
1132 once it gets to 1, the modulo test will succeed and break the loop anyway
1133 */
1134 for (;
1135 (globalThreadSize % *outMaxSize) != 0 || (*outMaxSize > sizeLimit[0]);
1136 (*outMaxSize)--)
1137 ;
1138 return 0;
1139 }
1140
get_max_common_2D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1141 int get_max_common_2D_work_group_size(cl_context context, cl_kernel kernel,
1142 size_t *globalThreadSizes,
1143 size_t *outMaxSizes)
1144 {
1145 size_t sizeLimit[3];
1146 size_t maxSize;
1147 int error =
1148 get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
1149 if (error != 0) return error;
1150
1151 /* Now find a set of factors, multiplied together less than maxSize, but
1152 each a factor of the global sizes */
1153
1154 /* Simple case */
1155 if (globalThreadSizes[0] * globalThreadSizes[1] <= maxSize)
1156 {
1157 if (globalThreadSizes[0] <= sizeLimit[0]
1158 && globalThreadSizes[1] <= sizeLimit[1])
1159 {
1160 outMaxSizes[0] = globalThreadSizes[0];
1161 outMaxSizes[1] = globalThreadSizes[1];
1162 return 0;
1163 }
1164 }
1165
1166 size_t remainingSize, sizeForThisOne;
1167 remainingSize = maxSize;
1168 int i, j;
1169 for (i = 0; i < 2; i++)
1170 {
1171 if (globalThreadSizes[i] > remainingSize)
1172 sizeForThisOne = remainingSize;
1173 else
1174 sizeForThisOne = globalThreadSizes[i];
1175 for (; (globalThreadSizes[i] % sizeForThisOne) != 0
1176 || (sizeForThisOne > sizeLimit[i]);
1177 sizeForThisOne--)
1178 ;
1179 outMaxSizes[i] = sizeForThisOne;
1180 remainingSize = maxSize;
1181 for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
1182 }
1183
1184 return 0;
1185 }
1186
get_max_common_3D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1187 int get_max_common_3D_work_group_size(cl_context context, cl_kernel kernel,
1188 size_t *globalThreadSizes,
1189 size_t *outMaxSizes)
1190 {
1191 size_t sizeLimit[3];
1192 size_t maxSize;
1193 int error =
1194 get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
1195 if (error != 0) return error;
1196 /* Now find a set of factors, multiplied together less than maxSize, but
1197 each a factor of the global sizes */
1198
1199 /* Simple case */
1200 if (globalThreadSizes[0] * globalThreadSizes[1] * globalThreadSizes[2]
1201 <= maxSize)
1202 {
1203 if (globalThreadSizes[0] <= sizeLimit[0]
1204 && globalThreadSizes[1] <= sizeLimit[1]
1205 && globalThreadSizes[2] <= sizeLimit[2])
1206 {
1207 outMaxSizes[0] = globalThreadSizes[0];
1208 outMaxSizes[1] = globalThreadSizes[1];
1209 outMaxSizes[2] = globalThreadSizes[2];
1210 return 0;
1211 }
1212 }
1213
1214 size_t remainingSize, sizeForThisOne;
1215 remainingSize = maxSize;
1216 int i, j;
1217 for (i = 0; i < 3; i++)
1218 {
1219 if (globalThreadSizes[i] > remainingSize)
1220 sizeForThisOne = remainingSize;
1221 else
1222 sizeForThisOne = globalThreadSizes[i];
1223 for (; (globalThreadSizes[i] % sizeForThisOne) != 0
1224 || (sizeForThisOne > sizeLimit[i]);
1225 sizeForThisOne--)
1226 ;
1227 outMaxSizes[i] = sizeForThisOne;
1228 remainingSize = maxSize;
1229 for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
1230 }
1231
1232 return 0;
1233 }
1234
1235 /* Helper to determine if a device supports an image format */
is_image_format_supported(cl_context context,cl_mem_flags flags,cl_mem_object_type image_type,const cl_image_format * fmt)1236 int is_image_format_supported(cl_context context, cl_mem_flags flags,
1237 cl_mem_object_type image_type,
1238 const cl_image_format *fmt)
1239 {
1240 cl_image_format *list;
1241 cl_uint count = 0;
1242 cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128,
1243 NULL, &count);
1244 if (count == 0) return 0;
1245
1246 list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
1247 if (NULL == list)
1248 {
1249 log_error("Error: unable to allocate %zu byte buffer for image format "
1250 "list at %s:%d (err = %d)\n",
1251 count * sizeof(cl_image_format), __FILE__, __LINE__, err);
1252 return 0;
1253 }
1254 BufferOwningPtr<cl_image_format> listBuf(list);
1255
1256
1257 cl_int error = clGetSupportedImageFormats(context, flags, image_type, count,
1258 list, NULL);
1259 if (error)
1260 {
1261 log_error("Error: failed to obtain supported image type list at %s:%d "
1262 "(err = %d)\n",
1263 __FILE__, __LINE__, err);
1264 return 0;
1265 }
1266
1267 // iterate looking for a match.
1268 cl_uint i;
1269 for (i = 0; i < count; i++)
1270 {
1271 if (fmt->image_channel_data_type == list[i].image_channel_data_type
1272 && fmt->image_channel_order == list[i].image_channel_order)
1273 break;
1274 }
1275
1276 return (i < count) ? 1 : 0;
1277 }
1278
1279 size_t get_pixel_bytes(const cl_image_format *fmt);
get_pixel_bytes(const cl_image_format * fmt)1280 size_t get_pixel_bytes(const cl_image_format *fmt)
1281 {
1282 size_t chanCount;
1283 switch (fmt->image_channel_order)
1284 {
1285 case CL_R:
1286 case CL_A:
1287 case CL_Rx:
1288 case CL_INTENSITY:
1289 case CL_LUMINANCE:
1290 case CL_DEPTH: chanCount = 1; break;
1291 case CL_RG:
1292 case CL_RA:
1293 case CL_RGx: chanCount = 2; break;
1294 case CL_RGB:
1295 case CL_RGBx:
1296 case CL_sRGB:
1297 case CL_sRGBx: chanCount = 3; break;
1298 case CL_RGBA:
1299 case CL_ARGB:
1300 case CL_BGRA:
1301 case CL_sBGRA:
1302 case CL_sRGBA:
1303 #ifdef CL_1RGB_APPLE
1304 case CL_1RGB_APPLE:
1305 #endif
1306 #ifdef CL_BGR1_APPLE
1307 case CL_BGR1_APPLE:
1308 #endif
1309 chanCount = 4;
1310 break;
1311 default:
1312 log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__);
1313 abort();
1314 break;
1315 }
1316
1317 switch (fmt->image_channel_data_type)
1318 {
1319 case CL_UNORM_SHORT_565:
1320 case CL_UNORM_SHORT_555: return 2;
1321
1322 case CL_UNORM_INT_101010: return 4;
1323
1324 case CL_SNORM_INT8:
1325 case CL_UNORM_INT8:
1326 case CL_SIGNED_INT8:
1327 case CL_UNSIGNED_INT8: return chanCount;
1328
1329 case CL_SNORM_INT16:
1330 case CL_UNORM_INT16:
1331 case CL_HALF_FLOAT:
1332 case CL_SIGNED_INT16:
1333 case CL_UNSIGNED_INT16:
1334 #ifdef CL_SFIXED14_APPLE
1335 case CL_SFIXED14_APPLE:
1336 #endif
1337 return chanCount * 2;
1338
1339 case CL_SIGNED_INT32:
1340 case CL_UNSIGNED_INT32:
1341 case CL_FLOAT: return chanCount * 4;
1342
1343 default:
1344 log_error("Unknown channel data type at %s:%d!\n", __FILE__,
1345 __LINE__);
1346 abort();
1347 }
1348
1349 return 0;
1350 }
1351
verifyImageSupport(cl_device_id device)1352 test_status verifyImageSupport(cl_device_id device)
1353 {
1354 int result = checkForImageSupport(device);
1355 if (result == 0)
1356 {
1357 return TEST_PASS;
1358 }
1359 if (result == CL_IMAGE_FORMAT_NOT_SUPPORTED)
1360 {
1361 log_error("SKIPPED: Device does not supported images as required by "
1362 "this test!\n");
1363 return TEST_SKIP;
1364 }
1365 return TEST_FAIL;
1366 }
1367
checkForImageSupport(cl_device_id device)1368 int checkForImageSupport(cl_device_id device)
1369 {
1370 cl_uint i;
1371 int error;
1372
1373
1374 /* Check the device props to see if images are supported at all first */
1375 error =
1376 clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
1377 test_error(error, "Unable to query device for image support");
1378 if (i == 0)
1379 {
1380 return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1381 }
1382
1383 /* So our support is good */
1384 return 0;
1385 }
1386
checkFor3DImageSupport(cl_device_id device)1387 int checkFor3DImageSupport(cl_device_id device)
1388 {
1389 cl_uint i;
1390 int error;
1391
1392 /* Check the device props to see if images are supported at all first */
1393 error =
1394 clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
1395 test_error(error, "Unable to query device for image support");
1396 if (i == 0)
1397 {
1398 return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1399 }
1400
1401 char profile[128];
1402 error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile,
1403 NULL);
1404 test_error(error, "Unable to query device for CL_DEVICE_PROFILE");
1405 if (0 == strcmp(profile, "EMBEDDED_PROFILE"))
1406 {
1407 size_t width = -1L;
1408 size_t height = -1L;
1409 size_t depth = -1L;
1410 error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH,
1411 sizeof(width), &width, NULL);
1412 test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH");
1413 error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
1414 sizeof(height), &height, NULL);
1415 test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT");
1416 error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH,
1417 sizeof(depth), &depth, NULL);
1418 test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH");
1419
1420 if (0 == (height | width | depth)) return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1421 }
1422
1423 /* So our support is good */
1424 return 0;
1425 }
1426
checkForReadWriteImageSupport(cl_device_id device)1427 int checkForReadWriteImageSupport(cl_device_id device)
1428 {
1429 if (checkForImageSupport(device))
1430 {
1431 return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1432 }
1433
1434 auto device_cl_version = get_device_cl_version(device);
1435 if (device_cl_version >= Version(3, 0))
1436 {
1437 // In OpenCL 3.0, Read-Write images are optional.
1438 // Check if they are supported.
1439 cl_uint are_rw_images_supported{};
1440 test_error(
1441 clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
1442 sizeof(are_rw_images_supported),
1443 &are_rw_images_supported, nullptr),
1444 "clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
1445 if (0 == are_rw_images_supported)
1446 {
1447 log_info("READ_WRITE_IMAGE tests skipped, not supported.\n");
1448 return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1449 }
1450 }
1451 // READ_WRITE images are not supported on 1.X devices.
1452 else if (device_cl_version < Version(2, 0))
1453 {
1454 log_info("READ_WRITE_IMAGE tests skipped, Opencl 2.0+ is requried.");
1455 return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1456 }
1457 // Support for read-write image arguments is required
1458 // for an 2.X device if the device supports images.
1459
1460 /* So our support is good */
1461 return 0;
1462 }
1463
get_min_alignment(cl_context context)1464 size_t get_min_alignment(cl_context context)
1465 {
1466 static cl_uint align_size = 0;
1467
1468 if (0 == align_size)
1469 {
1470 cl_device_id *devices;
1471 size_t devices_size = 0;
1472 cl_uint result = 0;
1473 cl_int error;
1474 int i;
1475
1476 error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
1477 &devices_size);
1478 test_error_ret(error, "clGetContextInfo failed", 0);
1479
1480 devices = (cl_device_id *)malloc(devices_size);
1481 if (devices == NULL)
1482 {
1483 print_error(error, "malloc failed");
1484 return 0;
1485 }
1486
1487 error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size,
1488 (void *)devices, NULL);
1489 test_error_ret(error, "clGetContextInfo failed", 0);
1490
1491 for (i = 0; i < (int)(devices_size / sizeof(cl_device_id)); i++)
1492 {
1493 cl_uint alignment = 0;
1494
1495 error = clGetDeviceInfo(devices[i], CL_DEVICE_MEM_BASE_ADDR_ALIGN,
1496 sizeof(cl_uint), (void *)&alignment, NULL);
1497
1498 if (error == CL_SUCCESS)
1499 {
1500 alignment >>= 3; // convert bits to bytes
1501 result = (alignment > result) ? alignment : result;
1502 }
1503 else
1504 print_error(error, "clGetDeviceInfo failed");
1505 }
1506
1507 align_size = result;
1508 free(devices);
1509 }
1510
1511 return align_size;
1512 }
1513
get_default_rounding_mode(cl_device_id device)1514 cl_device_fp_config get_default_rounding_mode(cl_device_id device)
1515 {
1516 char profileStr[128] = "";
1517 cl_device_fp_config single = 0;
1518 int error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
1519 sizeof(single), &single, NULL);
1520 if (error)
1521 test_error_ret(error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG",
1522 0);
1523
1524 if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST;
1525
1526 if (0 == (single & CL_FP_ROUND_TO_ZERO))
1527 test_error_ret(-1,
1528 "FAILURE: device must support either "
1529 "CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST",
1530 0);
1531
1532 // Make sure we are an embedded device before allowing a pass
1533 if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profileStr),
1534 &profileStr, NULL)))
1535 test_error_ret(error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0);
1536
1537 if (strcmp(profileStr, "EMBEDDED_PROFILE"))
1538 test_error_ret(error,
1539 "FAILURE: non-EMBEDDED_PROFILE devices must support "
1540 "CL_FP_ROUND_TO_NEAREST",
1541 0);
1542
1543 return CL_FP_ROUND_TO_ZERO;
1544 }
1545
checkDeviceForQueueSupport(cl_device_id device,cl_command_queue_properties prop)1546 int checkDeviceForQueueSupport(cl_device_id device,
1547 cl_command_queue_properties prop)
1548 {
1549 cl_command_queue_properties realProps;
1550 cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
1551 sizeof(realProps), &realProps, NULL);
1552 test_error_ret(error, "FAILURE: Unable to get device queue properties", 0);
1553
1554 return (realProps & prop) ? 1 : 0;
1555 }
1556
printDeviceHeader(cl_device_id device)1557 int printDeviceHeader(cl_device_id device)
1558 {
1559 char deviceName[512], deviceVendor[512], deviceVersion[512],
1560 cLangVersion[512];
1561 int error;
1562
1563 error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName),
1564 deviceName, NULL);
1565 test_error(error, "Unable to get CL_DEVICE_NAME for device");
1566
1567 error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(deviceVendor),
1568 deviceVendor, NULL);
1569 test_error(error, "Unable to get CL_DEVICE_VENDOR for device");
1570
1571 error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(deviceVersion),
1572 deviceVersion, NULL);
1573 test_error(error, "Unable to get CL_DEVICE_VERSION for device");
1574
1575 error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
1576 sizeof(cLangVersion), cLangVersion, NULL);
1577 test_error(error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device");
1578
1579 log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute "
1580 "Device Version = %s%s%s\n",
1581 deviceName, deviceVendor, deviceVersion,
1582 (error == CL_SUCCESS) ? ", CL C Version = " : "",
1583 (error == CL_SUCCESS) ? cLangVersion : "");
1584
1585 auto version = get_device_cl_version(device);
1586 if (version >= Version(3, 0))
1587 {
1588 auto ctsVersion = get_device_info_string(
1589 device, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED);
1590 log_info("Device latest conformance version passed: %s\n",
1591 ctsVersion.c_str());
1592 }
1593
1594 return CL_SUCCESS;
1595 }
1596
get_device_cl_c_version(cl_device_id device)1597 Version get_device_cl_c_version(cl_device_id device)
1598 {
1599 auto device_cl_version = get_device_cl_version(device);
1600
1601 // The second special case is OpenCL-1.0 where CL_DEVICE_OPENCL_C_VERSION
1602 // did not exist, but since this is just the first version we can
1603 // return 1.0.
1604 if (device_cl_version == Version{ 1, 0 })
1605 {
1606 return Version{ 1, 0 };
1607 }
1608
1609 // Otherwise we know we have a 1.1 <= device_version <= 2.0 where all CL C
1610 // versions are backwards compatible, hence querying with the
1611 // CL_DEVICE_OPENCL_C_VERSION query must return the most recent supported
1612 // OpenCL C version.
1613 size_t opencl_c_version_size_in_bytes{};
1614 auto error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr,
1615 &opencl_c_version_size_in_bytes);
1616 test_error_ret(error,
1617 "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
1618 (Version{ -1, 0 }));
1619
1620 std::string opencl_c_version(opencl_c_version_size_in_bytes, '\0');
1621 error =
1622 clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
1623 opencl_c_version.size(), &opencl_c_version[0], nullptr);
1624
1625 test_error_ret(error,
1626 "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
1627 (Version{ -1, 0 }));
1628
1629 // Scrape out the major, minor pair from the string.
1630 auto major = opencl_c_version[opencl_c_version.find('.') - 1];
1631 auto minor = opencl_c_version[opencl_c_version.find('.') + 1];
1632
1633 return Version{ major - '0', minor - '0' };
1634 }
1635
get_device_latest_cl_c_version(cl_device_id device)1636 Version get_device_latest_cl_c_version(cl_device_id device)
1637 {
1638 auto device_cl_version = get_device_cl_version(device);
1639
1640 // If the device version >= 3.0 it must support the
1641 // CL_DEVICE_OPENCL_C_ALL_VERSIONS query from which we can extract the most
1642 // recent CL C version supported by the device.
1643 if (device_cl_version >= Version{ 3, 0 })
1644 {
1645 size_t opencl_c_all_versions_size_in_bytes{};
1646 auto error =
1647 clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
1648 &opencl_c_all_versions_size_in_bytes);
1649 test_error_ret(
1650 error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1651 (Version{ -1, 0 }));
1652 std::vector<cl_name_version> name_versions(
1653 opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
1654 error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
1655 opencl_c_all_versions_size_in_bytes,
1656 name_versions.data(), nullptr);
1657 test_error_ret(
1658 error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1659 (Version{ -1, 0 }));
1660
1661 Version max_supported_cl_c_version{};
1662 for (const auto &name_version : name_versions)
1663 {
1664 Version current_version{
1665 static_cast<int>(CL_VERSION_MAJOR(name_version.version)),
1666 static_cast<int>(CL_VERSION_MINOR(name_version.version))
1667 };
1668 max_supported_cl_c_version =
1669 (current_version > max_supported_cl_c_version)
1670 ? current_version
1671 : max_supported_cl_c_version;
1672 }
1673 return max_supported_cl_c_version;
1674 }
1675
1676 return get_device_cl_c_version(device);
1677 }
1678
get_max_OpenCL_C_for_context(cl_context context)1679 Version get_max_OpenCL_C_for_context(cl_context context)
1680 {
1681 // Get all the devices in the context and find the maximum
1682 // universally supported OpenCL C version.
1683 size_t devices_size_in_bytes{};
1684 auto error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr,
1685 &devices_size_in_bytes);
1686 test_error_ret(error, "clGetDeviceInfo failed for CL_CONTEXT_DEVICES",
1687 (Version{ -1, 0 }));
1688 std::vector<cl_device_id> devices(devices_size_in_bytes
1689 / sizeof(cl_device_id));
1690 error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size_in_bytes,
1691 devices.data(), nullptr);
1692 auto current_version = get_device_latest_cl_c_version(devices[0]);
1693 std::for_each(std::next(devices.begin()), devices.end(),
1694 [¤t_version](cl_device_id device) {
1695 auto device_version =
1696 get_device_latest_cl_c_version(device);
1697 // OpenCL 3.0 is not backwards compatible with 2.0.
1698 // If we have 3.0 and 2.0 in the same driver we
1699 // use 1.2.
1700 if (((device_version >= Version(2, 0)
1701 && device_version < Version(3, 0))
1702 && current_version >= Version(3, 0))
1703 || (device_version >= Version(3, 0)
1704 && (current_version >= Version(2, 0)
1705 && current_version < Version(3, 0))))
1706 {
1707 current_version = Version(1, 2);
1708 }
1709 else
1710 {
1711 current_version =
1712 std::min(device_version, current_version);
1713 }
1714 });
1715 return current_version;
1716 }
1717
device_supports_cl_c_version(cl_device_id device,Version version)1718 bool device_supports_cl_c_version(cl_device_id device, Version version)
1719 {
1720 auto device_cl_version = get_device_cl_version(device);
1721
1722 // In general, a device does not support an OpenCL C version if it is <=
1723 // CL_DEVICE_OPENCL_C_VERSION AND it does not appear in the
1724 // CL_DEVICE_OPENCL_C_ALL_VERSIONS query.
1725
1726 // If the device version >= 3.0 it must support the
1727 // CL_DEVICE_OPENCL_C_ALL_VERSIONS query, and the version of OpenCL C being
1728 // used must appear in the query result if it's <=
1729 // CL_DEVICE_OPENCL_C_VERSION.
1730 if (device_cl_version >= Version{ 3, 0 })
1731 {
1732 size_t opencl_c_all_versions_size_in_bytes{};
1733 auto error =
1734 clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
1735 &opencl_c_all_versions_size_in_bytes);
1736 test_error_ret(
1737 error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1738 (false));
1739 std::vector<cl_name_version> name_versions(
1740 opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
1741 error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
1742 opencl_c_all_versions_size_in_bytes,
1743 name_versions.data(), nullptr);
1744 test_error_ret(
1745 error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1746 (false));
1747
1748 for (const auto &name_version : name_versions)
1749 {
1750 Version current_version{
1751 static_cast<int>(CL_VERSION_MAJOR(name_version.version)),
1752 static_cast<int>(CL_VERSION_MINOR(name_version.version))
1753 };
1754 if (current_version == version)
1755 {
1756 return true;
1757 }
1758 }
1759 }
1760
1761 return version <= get_device_cl_c_version(device);
1762 }
1763
poll_until(unsigned timeout_ms,unsigned interval_ms,std::function<bool ()> fn)1764 bool poll_until(unsigned timeout_ms, unsigned interval_ms,
1765 std::function<bool()> fn)
1766 {
1767 unsigned time_spent_ms = 0;
1768 bool ret = false;
1769
1770 while (time_spent_ms < timeout_ms)
1771 {
1772 ret = fn();
1773 if (ret)
1774 {
1775 break;
1776 }
1777 usleep(interval_ms * 1000);
1778 time_spent_ms += interval_ms;
1779 }
1780
1781 return ret;
1782 }
1783
device_supports_double(cl_device_id device)1784 bool device_supports_double(cl_device_id device)
1785 {
1786 if (is_extension_available(device, "cl_khr_fp64"))
1787 {
1788 return true;
1789 }
1790 else
1791 {
1792 cl_device_fp_config double_fp_config;
1793 cl_int err = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG,
1794 sizeof(double_fp_config),
1795 &double_fp_config, nullptr);
1796 test_error(err,
1797 "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
1798 return double_fp_config != 0;
1799 }
1800 }
1801
device_supports_half(cl_device_id device)1802 bool device_supports_half(cl_device_id device)
1803 {
1804 return is_extension_available(device, "cl_khr_fp16");
1805 }
1806