• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "crc32.h"
17 #include "kernelHelpers.h"
18 #include "deviceInfo.h"
19 #include "errorHelpers.h"
20 #include "imageHelpers.h"
21 #include "typeWrappers.h"
22 #include "testHarness.h"
23 #include "parseParameters.h"
24 
25 #include <cassert>
26 #include <vector>
27 #include <string>
28 #include <fstream>
29 #include <sstream>
30 #include <iomanip>
31 #include <mutex>
32 #include <algorithm>
33 
34 #if defined(_WIN32)
35 std::string slash = "\\";
36 #else
37 std::string slash = "/";
38 #endif
39 
40 static std::mutex gCompilerMutex;
41 
42 static cl_int get_first_device_id(const cl_context context,
43                                   cl_device_id &device);
44 
get_file_size(const std::string & fileName)45 long get_file_size(const std::string &fileName)
46 {
47     std::ifstream ifs(fileName.c_str(), std::ios::binary);
48     if (!ifs.good()) return 0;
49     // get length of file:
50     ifs.seekg(0, std::ios::end);
51     std::ios::pos_type length = ifs.tellg();
52     return static_cast<long>(length);
53 }
54 
get_kernel_content(unsigned int numKernelLines,const char * const * kernelProgram)55 static std::string get_kernel_content(unsigned int numKernelLines,
56                                       const char *const *kernelProgram)
57 {
58     std::string kernel;
59     for (size_t i = 0; i < numKernelLines; ++i)
60     {
61         std::string chunk(kernelProgram[i], 0, std::string::npos);
62         kernel += chunk;
63     }
64 
65     return kernel;
66 }
67 
get_kernel_name(const std::string & source)68 std::string get_kernel_name(const std::string &source)
69 {
70     // Create list of kernel names
71     std::string kernelsList;
72     size_t kPos = source.find("kernel");
73     while (kPos != std::string::npos)
74     {
75         // check for '__kernel'
76         size_t pos = kPos;
77         if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_')
78             pos -= 2;
79 
80         // check character before 'kernel' (white space expected)
81         size_t wsPos = source.find_last_of(" \t\r\n", pos);
82         if (wsPos == std::string::npos || wsPos + 1 == pos)
83         {
84             // check character after 'kernel' (white space expected)
85             size_t akPos = kPos + sizeof("kernel") - 1;
86             wsPos = source.find_first_of(" \t\r\n", akPos);
87             if (!(wsPos == akPos))
88             {
89                 kPos = source.find("kernel", kPos + 1);
90                 continue;
91             }
92 
93             bool attributeFound;
94             do
95             {
96                 attributeFound = false;
97                 // find '(' after kernel name name
98                 size_t pPos = source.find("(", akPos);
99                 if (!(pPos != std::string::npos)) continue;
100 
101                 // check for not empty kernel name before '('
102                 pos = source.find_last_not_of(" \t\r\n", pPos - 1);
103                 if (!(pos != std::string::npos && pos > akPos)) continue;
104 
105                 // find character before kernel name
106                 wsPos = source.find_last_of(" \t\r\n", pos);
107                 if (!(wsPos != std::string::npos && wsPos >= akPos)) continue;
108 
109                 std::string name =
110                     source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
111                 // check for kernel attribute
112                 if (name == "__attribute__")
113                 {
114                     attributeFound = true;
115                     int pCount = 1;
116                     akPos = pPos + 1;
117                     while (pCount > 0 && akPos != std::string::npos)
118                     {
119                         akPos = source.find_first_of("()", akPos + 1);
120                         if (akPos != std::string::npos)
121                         {
122                             if (source[akPos] == '(')
123                                 pCount++;
124                             else
125                                 pCount--;
126                         }
127                     }
128                 }
129                 else
130                 {
131                     kernelsList += name + ".";
132                 }
133             } while (attributeFound);
134         }
135         kPos = source.find("kernel", kPos + 1);
136     }
137     std::ostringstream oss;
138     if (MAX_LEN_FOR_KERNEL_LIST > 0)
139     {
140         if (kernelsList.size() > MAX_LEN_FOR_KERNEL_LIST + 1)
141         {
142             kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
143             kernelsList[kernelsList.size() - 1] = '.';
144             kernelsList[kernelsList.size() - 1] = '.';
145         }
146         oss << kernelsList;
147     }
148     return oss.str();
149 }
150 
151 static std::string
get_offline_compilation_file_type_str(const CompilationMode compilationMode)152 get_offline_compilation_file_type_str(const CompilationMode compilationMode)
153 {
154     switch (compilationMode)
155     {
156         default: assert(0 && "Invalid compilation mode"); abort();
157         case kOnline:
158             assert(0 && "Invalid compilation mode for offline compilation");
159             abort();
160         case kBinary: return "binary";
161         case kSpir_v: return "SPIR-V";
162     }
163 }
164 
get_unique_filename_prefix(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)165 static std::string get_unique_filename_prefix(unsigned int numKernelLines,
166                                               const char *const *kernelProgram,
167                                               const char *buildOptions)
168 {
169     std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
170     std::string kernelName = get_kernel_name(kernel);
171     cl_uint kernelCrc = crc32(kernel.data(), kernel.size());
172     std::ostringstream oss;
173     oss << kernelName << std::hex << std::setfill('0') << std::setw(8)
174         << kernelCrc;
175     if (buildOptions)
176     {
177         cl_uint bOptionsCrc = crc32(buildOptions, strlen(buildOptions));
178         oss << '.' << std::hex << std::setfill('0') << std::setw(8)
179             << bOptionsCrc;
180     }
181     return oss.str();
182 }
183 
184 
185 static std::string
get_cl_build_options_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)186 get_cl_build_options_filename_with_path(const std::string &filePath,
187                                         const std::string &fileNamePrefix)
188 {
189     return filePath + slash + fileNamePrefix + ".options";
190 }
191 
192 static std::string
get_cl_source_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)193 get_cl_source_filename_with_path(const std::string &filePath,
194                                  const std::string &fileNamePrefix)
195 {
196     return filePath + slash + fileNamePrefix + ".cl";
197 }
198 
199 static std::string
get_binary_filename_with_path(CompilationMode mode,cl_uint deviceAddrSpaceSize,const std::string & filePath,const std::string & fileNamePrefix)200 get_binary_filename_with_path(CompilationMode mode, cl_uint deviceAddrSpaceSize,
201                               const std::string &filePath,
202                               const std::string &fileNamePrefix)
203 {
204     std::string binaryFilename = filePath + slash + fileNamePrefix;
205     if (kSpir_v == mode)
206     {
207         std::ostringstream extension;
208         extension << ".spv" << deviceAddrSpaceSize;
209         binaryFilename += extension.str();
210     }
211     return binaryFilename;
212 }
213 
file_exist_on_disk(const std::string & filePath,const std::string & fileName)214 static bool file_exist_on_disk(const std::string &filePath,
215                                const std::string &fileName)
216 {
217     std::string fileNameWithPath = filePath + slash + fileName;
218     bool exist = false;
219     std::ifstream ifs;
220 
221     ifs.open(fileNameWithPath.c_str(), std::ios::binary);
222     if (ifs.good()) exist = true;
223     ifs.close();
224     return exist;
225 }
226 
should_save_kernel_source_to_disk(CompilationMode mode,CompilationCacheMode cacheMode,const std::string & binaryPath,const std::string & binaryName)227 static bool should_save_kernel_source_to_disk(CompilationMode mode,
228                                               CompilationCacheMode cacheMode,
229                                               const std::string &binaryPath,
230                                               const std::string &binaryName)
231 {
232     bool saveToDisk = false;
233     if (cacheMode == kCacheModeDumpCl
234         || (cacheMode == kCacheModeOverwrite && mode != kOnline))
235     {
236         saveToDisk = true;
237     }
238     if (cacheMode == kCacheModeCompileIfAbsent && mode != kOnline)
239     {
240         saveToDisk = !file_exist_on_disk(binaryPath, binaryName);
241     }
242     return saveToDisk;
243 }
244 
save_kernel_build_options_to_disk(const std::string & path,const std::string & prefix,const char * buildOptions)245 static int save_kernel_build_options_to_disk(const std::string &path,
246                                              const std::string &prefix,
247                                              const char *buildOptions)
248 {
249     std::string filename =
250         get_cl_build_options_filename_with_path(path, prefix);
251     std::ofstream ofs(filename.c_str(), std::ios::binary);
252     if (!ofs.good())
253     {
254         log_info("Can't save kernel build options: %s\n", filename.c_str());
255         return -1;
256     }
257     ofs.write(buildOptions, strlen(buildOptions));
258     ofs.close();
259     log_info("Saved kernel build options to file: %s\n", filename.c_str());
260     return CL_SUCCESS;
261 }
262 
save_kernel_source_to_disk(const std::string & path,const std::string & prefix,const std::string & source)263 static int save_kernel_source_to_disk(const std::string &path,
264                                       const std::string &prefix,
265                                       const std::string &source)
266 {
267     std::string filename = get_cl_source_filename_with_path(path, prefix);
268     std::ofstream ofs(filename.c_str(), std::ios::binary);
269     if (!ofs.good())
270     {
271         log_info("Can't save kernel source: %s\n", filename.c_str());
272         return -1;
273     }
274     ofs.write(source.c_str(), source.size());
275     ofs.close();
276     log_info("Saved kernel source to file: %s\n", filename.c_str());
277     return CL_SUCCESS;
278 }
279 
280 static int
save_kernel_source_and_options_to_disk(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)281 save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
282                                        const char *const *kernelProgram,
283                                        const char *buildOptions)
284 {
285     int error;
286 
287     std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
288     std::string kernelNamePrefix =
289         get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
290 
291     // save kernel source to disk
292     error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix,
293                                        kernel);
294 
295     // save kernel build options to disk if exists
296     if (buildOptions != NULL)
297         error |= save_kernel_build_options_to_disk(
298             gCompilationCachePath, kernelNamePrefix, buildOptions);
299 
300     return error;
301 }
302 
303 static std::string
get_compilation_mode_str(const CompilationMode compilationMode)304 get_compilation_mode_str(const CompilationMode compilationMode)
305 {
306     switch (compilationMode)
307     {
308         default: assert(0 && "Invalid compilation mode"); abort();
309         case kOnline: return "online";
310         case kBinary: return "binary";
311         case kSpir_v: return "spir-v";
312     }
313 }
314 
get_cl_device_info_str(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfo)315 static cl_int get_cl_device_info_str(const cl_device_id device,
316                                      const cl_uint device_address_space_size,
317                                      const CompilationMode compilationMode,
318                                      std::string &clDeviceInfo)
319 {
320     std::string extensionsString = get_device_extensions_string(device);
321     std::string versionString = get_device_version_string(device);
322 
323     std::ostringstream clDeviceInfoStream;
324     std::string file_type =
325         get_offline_compilation_file_type_str(compilationMode);
326     clDeviceInfoStream << "# OpenCL device info affecting " << file_type
327                        << " offline compilation:" << std::endl
328                        << "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size
329                        << std::endl
330                        << "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\""
331                        << std::endl;
332     /* We only need the device's supported IL version(s) when compiling IL
333      * that will be loaded with clCreateProgramWithIL() */
334     if (compilationMode == kSpir_v)
335     {
336         std::string ilVersionString = get_device_il_version_string(device);
337         clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString
338                            << "\"" << std::endl;
339     }
340     clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\""
341                        << std::endl;
342     clDeviceInfoStream << "CL_DEVICE_IMAGE_SUPPORT="
343                        << (0 == checkForImageSupport(device)) << std::endl;
344     clDeviceInfoStream << "CL_DEVICE_NAME=\"" << get_device_name(device).c_str()
345                        << "\"" << std::endl;
346 
347     clDeviceInfo = clDeviceInfoStream.str();
348 
349     return CL_SUCCESS;
350 }
351 
write_cl_device_info(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfoFilename)352 static int write_cl_device_info(const cl_device_id device,
353                                 const cl_uint device_address_space_size,
354                                 const CompilationMode compilationMode,
355                                 std::string &clDeviceInfoFilename)
356 {
357     std::string clDeviceInfo;
358     int error = get_cl_device_info_str(device, device_address_space_size,
359                                        compilationMode, clDeviceInfo);
360     if (error != CL_SUCCESS)
361     {
362         return error;
363     }
364 
365     cl_uint crc = crc32(clDeviceInfo.data(), clDeviceInfo.size());
366 
367     /* Get the filename for the clDeviceInfo file.
368      * Note: the file includes the hash on its content, so it is usually
369      * unnecessary to delete it. */
370     std::ostringstream clDeviceInfoFilenameStream;
371     clDeviceInfoFilenameStream << gCompilationCachePath << slash
372                                << "clDeviceInfo-";
373     clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8)
374                                << crc << ".txt";
375 
376     clDeviceInfoFilename = clDeviceInfoFilenameStream.str();
377 
378     if ((size_t)get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
379     {
380         /* The CL device info file has already been created.
381          * Nothing to do. */
382         return 0;
383     }
384 
385     /* The file does not exist or its length is not as expected.
386      * Create/overwrite it. */
387     std::ofstream ofs(clDeviceInfoFilename);
388     if (!ofs.good())
389     {
390         log_info("OfflineCompiler: can't create CL device info file: %s\n",
391                  clDeviceInfoFilename.c_str());
392         return -1;
393     }
394     ofs << clDeviceInfo;
395     ofs.close();
396 
397     return CL_SUCCESS;
398 }
399 
get_offline_compilation_command(const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename,const std::string & clDeviceInfoFilename)400 static std::string get_offline_compilation_command(
401     const cl_uint device_address_space_size,
402     const CompilationMode compilationMode, const std::string &bOptions,
403     const std::string &sourceFilename, const std::string &outputFilename,
404     const std::string &clDeviceInfoFilename)
405 {
406     std::ostringstream wrapperOptions;
407 
408     wrapperOptions << gCompilationProgram
409                    << " --mode=" << get_compilation_mode_str(compilationMode)
410                    << " --source=" << sourceFilename
411                    << " --output=" << outputFilename
412                    << " --cl-device-info=" << clDeviceInfoFilename;
413 
414     if (bOptions != "")
415     {
416         // Add build options passed to this function
417         wrapperOptions << " -- " << bOptions;
418     }
419 
420     return wrapperOptions.str();
421 }
422 
invoke_offline_compiler(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename)423 static int invoke_offline_compiler(const cl_device_id device,
424                                    const cl_uint device_address_space_size,
425                                    const CompilationMode compilationMode,
426                                    const std::string &bOptions,
427                                    const std::string &sourceFilename,
428                                    const std::string &outputFilename)
429 {
430     std::string runString;
431     std::string clDeviceInfoFilename;
432 
433     // See cl_offline_compiler-interface.txt for a description of the
434     // format of the CL device information file generated below, and
435     // the internal command line interface for invoking the offline
436     // compiler.
437 
438     cl_int err = write_cl_device_info(device, device_address_space_size,
439                                       compilationMode, clDeviceInfoFilename);
440     if (err != CL_SUCCESS)
441     {
442         log_error("Failed writing CL device info file\n");
443         return err;
444     }
445 
446     runString = get_offline_compilation_command(
447         device_address_space_size, compilationMode, bOptions, sourceFilename,
448         outputFilename, clDeviceInfoFilename);
449 
450     // execute script
451     log_info("Executing command: %s\n", runString.c_str());
452     fflush(stdout);
453     int returnCode = system(runString.c_str());
454     if (returnCode != 0)
455     {
456         log_error("ERROR: Command finished with error: 0x%x\n", returnCode);
457         return CL_COMPILE_PROGRAM_FAILURE;
458     }
459 
460     return CL_SUCCESS;
461 }
462 
get_first_device_id(const cl_context context,cl_device_id & device)463 static cl_int get_first_device_id(const cl_context context,
464                                   cl_device_id &device)
465 {
466     cl_uint numDevices = 0;
467     cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
468                                     sizeof(cl_uint), &numDevices, NULL);
469     test_error(error, "clGetContextInfo failed getting CL_CONTEXT_NUM_DEVICES");
470 
471     if (numDevices == 0)
472     {
473         log_error("ERROR: No CL devices found\n");
474         return -1;
475     }
476 
477     std::vector<cl_device_id> devices(numDevices, 0);
478     error =
479         clGetContextInfo(context, CL_CONTEXT_DEVICES,
480                          numDevices * sizeof(cl_device_id), &devices[0], NULL);
481     test_error(error, "clGetContextInfo failed getting CL_CONTEXT_DEVICES");
482 
483     device = devices[0];
484     return CL_SUCCESS;
485 }
486 
get_device_address_bits(const cl_device_id device,cl_uint & device_address_space_size)487 static cl_int get_device_address_bits(const cl_device_id device,
488                                       cl_uint &device_address_space_size)
489 {
490     cl_int error =
491         clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
492                         &device_address_space_size, NULL);
493     test_error(error, "Unable to obtain device address bits");
494 
495     if (device_address_space_size != 32 && device_address_space_size != 64)
496     {
497         log_error("ERROR: Unexpected number of device address bits: %u\n",
498                   device_address_space_size);
499         return -1;
500     }
501 
502     return CL_SUCCESS;
503 }
504 
get_offline_compiler_output(std::ifstream & ifs,const cl_device_id device,cl_uint deviceAddrSpaceSize,const CompilationMode compilationMode,const std::string & bOptions,const std::string & kernelPath,const std::string & kernelNamePrefix)505 static int get_offline_compiler_output(
506     std::ifstream &ifs, const cl_device_id device, cl_uint deviceAddrSpaceSize,
507     const CompilationMode compilationMode, const std::string &bOptions,
508     const std::string &kernelPath, const std::string &kernelNamePrefix)
509 {
510     std::string sourceFilename =
511         get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
512     std::string outputFilename = get_binary_filename_with_path(
513         compilationMode, deviceAddrSpaceSize, kernelPath, kernelNamePrefix);
514 
515     ifs.open(outputFilename.c_str(), std::ios::binary);
516     if (!ifs.good())
517     {
518         std::string file_type =
519             get_offline_compilation_file_type_str(compilationMode);
520         if (gCompilationCacheMode == kCacheModeForceRead)
521         {
522             log_info("OfflineCompiler: can't open cached %s file: %s\n",
523                      file_type.c_str(), outputFilename.c_str());
524             return -1;
525         }
526         else
527         {
528             int error = invoke_offline_compiler(device, deviceAddrSpaceSize,
529                                                 compilationMode, bOptions,
530                                                 sourceFilename, outputFilename);
531             if (error != CL_SUCCESS) return error;
532 
533             // open output file for reading
534             ifs.open(outputFilename.c_str(), std::ios::binary);
535             if (!ifs.good())
536             {
537                 log_info("OfflineCompiler: can't read generated %s file: %s\n",
538                          file_type.c_str(), outputFilename.c_str());
539                 return -1;
540             }
541         }
542     }
543 
544     if (compilationMode == kSpir_v && !gDisableSPIRVValidation)
545     {
546         std::string runString = gSPIRVValidator + " " + outputFilename;
547 
548         int returnCode = system(runString.c_str());
549         if (returnCode == -1)
550         {
551             log_error("Error: failed to invoke SPIR-V validator\n");
552             return CL_COMPILE_PROGRAM_FAILURE;
553         }
554         else if (returnCode != 0)
555         {
556             log_error(
557                 "Failed to validate SPIR-V file %s: system() returned 0x%x\n",
558                 outputFilename.c_str(), returnCode);
559             return CL_COMPILE_PROGRAM_FAILURE;
560         }
561     }
562 
563     return CL_SUCCESS;
564 }
565 
create_single_kernel_helper_create_program_offline(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions,CompilationMode compilationMode)566 static int create_single_kernel_helper_create_program_offline(
567     cl_context context, cl_device_id device, cl_program *outProgram,
568     unsigned int numKernelLines, const char *const *kernelProgram,
569     const char *buildOptions, CompilationMode compilationMode)
570 {
571     if (kCacheModeDumpCl == gCompilationCacheMode)
572     {
573         return -1;
574     }
575 
576     // Get device CL_DEVICE_ADDRESS_BITS
577     int error;
578     cl_uint device_address_space_size = 0;
579     if (device == NULL)
580     {
581         error = get_first_device_id(context, device);
582         test_error(error, "Failed to get device ID for first device");
583     }
584     error = get_device_address_bits(device, device_address_space_size);
585     if (error != CL_SUCCESS) return error;
586 
587     // set build options
588     std::string bOptions;
589     bOptions += buildOptions ? std::string(buildOptions) : "";
590 
591     std::string kernelName =
592         get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
593 
594 
595     std::ifstream ifs;
596     error = get_offline_compiler_output(ifs, device, device_address_space_size,
597                                         compilationMode, bOptions,
598                                         gCompilationCachePath, kernelName);
599     if (error != CL_SUCCESS) return error;
600 
601     ifs.seekg(0, ifs.end);
602     size_t length = static_cast<size_t>(ifs.tellg());
603     ifs.seekg(0, ifs.beg);
604 
605     // treat modifiedProgram as input for clCreateProgramWithBinary
606     if (compilationMode == kBinary)
607     {
608         // read binary from file:
609         std::vector<unsigned char> modifiedKernelBuf(length);
610 
611         ifs.read((char *)&modifiedKernelBuf[0], length);
612         ifs.close();
613 
614         size_t lengths = modifiedKernelBuf.size();
615         const unsigned char *binaries = { &modifiedKernelBuf[0] };
616         log_info("offlineCompiler: clCreateProgramWithSource replaced with "
617                  "clCreateProgramWithBinary\n");
618         *outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths,
619                                                 &binaries, NULL, &error);
620         if (*outProgram == NULL || error != CL_SUCCESS)
621         {
622             print_error(error, "clCreateProgramWithBinary failed");
623             return error;
624         }
625     }
626     // treat modifiedProgram as input for clCreateProgramWithIL
627     else if (compilationMode == kSpir_v)
628     {
629         // read spir-v from file:
630         std::vector<unsigned char> modifiedKernelBuf(length);
631 
632         ifs.read((char *)&modifiedKernelBuf[0], length);
633         ifs.close();
634 
635         size_t length = modifiedKernelBuf.size();
636         log_info("offlineCompiler: clCreateProgramWithSource replaced with "
637                  "clCreateProgramWithIL\n");
638         if (gCoreILProgram)
639         {
640             *outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0],
641                                                 length, &error);
642         }
643         else
644         {
645             cl_platform_id platform;
646             error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
647                                     sizeof(cl_platform_id), &platform, NULL);
648             test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
649 
650             clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
651             clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
652                 clGetExtensionFunctionAddressForPlatform(
653                     platform, "clCreateProgramWithILKHR");
654             if (clCreateProgramWithILKHR == NULL)
655             {
656                 log_error(
657                     "ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
658                 return -1;
659             }
660             *outProgram = clCreateProgramWithILKHR(
661                 context, &modifiedKernelBuf[0], length, &error);
662         }
663 
664         if (*outProgram == NULL || error != CL_SUCCESS)
665         {
666             if (gCoreILProgram)
667             {
668                 print_error(error, "clCreateProgramWithIL failed");
669             }
670             else
671             {
672                 print_error(error, "clCreateProgramWithILKHR failed");
673             }
674             return error;
675         }
676     }
677 
678     return CL_SUCCESS;
679 }
680 
create_single_kernel_helper_create_program(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions,CompilationMode compilationMode)681 static int create_single_kernel_helper_create_program(
682     cl_context context, cl_device_id device, cl_program *outProgram,
683     unsigned int numKernelLines, const char **kernelProgram,
684     const char *buildOptions, CompilationMode compilationMode)
685 {
686     std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
687 
688     std::string filePrefix =
689         get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
690     bool shouldSaveToDisk = should_save_kernel_source_to_disk(
691         compilationMode, gCompilationCacheMode, gCompilationCachePath,
692         filePrefix);
693 
694     if (shouldSaveToDisk)
695     {
696         if (CL_SUCCESS
697             != save_kernel_source_and_options_to_disk(
698                 numKernelLines, kernelProgram, buildOptions))
699         {
700             log_error("Unable to dump kernel source to disk");
701             return -1;
702         }
703     }
704     if (compilationMode == kOnline)
705     {
706         int error = CL_SUCCESS;
707 
708         /* Create the program object from source */
709         *outProgram = clCreateProgramWithSource(context, numKernelLines,
710                                                 kernelProgram, NULL, &error);
711         if (*outProgram == NULL || error != CL_SUCCESS)
712         {
713             print_error(error, "clCreateProgramWithSource failed");
714             return error;
715         }
716         return CL_SUCCESS;
717     }
718     else
719     {
720         return create_single_kernel_helper_create_program_offline(
721             context, device, outProgram, numKernelLines, kernelProgram,
722             buildOptions, compilationMode);
723     }
724 }
725 
create_single_kernel_helper_create_program(cl_context context,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions)726 int create_single_kernel_helper_create_program(cl_context context,
727                                                cl_program *outProgram,
728                                                unsigned int numKernelLines,
729                                                const char **kernelProgram,
730                                                const char *buildOptions)
731 {
732     return create_single_kernel_helper_create_program(
733         context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
734         gCompilationMode);
735 }
736 
create_single_kernel_helper_create_program_for_device(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions)737 int create_single_kernel_helper_create_program_for_device(
738     cl_context context, cl_device_id device, cl_program *outProgram,
739     unsigned int numKernelLines, const char **kernelProgram,
740     const char *buildOptions)
741 {
742     return create_single_kernel_helper_create_program(
743         context, device, outProgram, numKernelLines, kernelProgram,
744         buildOptions, gCompilationMode);
745 }
746 
create_single_kernel_helper_with_build_options(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)747 int create_single_kernel_helper_with_build_options(
748     cl_context context, cl_program *outProgram, cl_kernel *outKernel,
749     unsigned int numKernelLines, const char **kernelProgram,
750     const char *kernelName, const char *buildOptions)
751 {
752     return create_single_kernel_helper(context, outProgram, outKernel,
753                                        numKernelLines, kernelProgram,
754                                        kernelName, buildOptions);
755 }
756 
757 // Creates and builds OpenCL C/C++ program, and creates a kernel
create_single_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)758 int create_single_kernel_helper(cl_context context, cl_program *outProgram,
759                                 cl_kernel *outKernel,
760                                 unsigned int numKernelLines,
761                                 const char **kernelProgram,
762                                 const char *kernelName,
763                                 const char *buildOptions)
764 {
765     // For the logic that automatically adds -cl-std it is much cleaner if the
766     // build options have RAII. This buffer will store the potentially updated
767     // build options, in which case buildOptions will point at the string owned
768     // by this buffer.
769     std::string build_options_internal{ buildOptions ? buildOptions : "" };
770 
771     // Check the build options for the -cl-std option.
772     if (!buildOptions || !strstr(buildOptions, "-cl-std"))
773     {
774         // If the build option isn't present add it using the latest OpenCL-C
775         // version supported by the device. This allows calling code to force a
776         // particular CL C version if it is required, but also means that
777         // callers need not specify a version if they want to assume the most
778         // recent CL C.
779 
780         auto version = get_max_OpenCL_C_for_context(context);
781 
782         std::string cl_std{};
783         if (version >= Version(3, 0))
784         {
785             cl_std = "-cl-std=CL3.0";
786         }
787         else if (version >= Version(2, 0) && version < Version(3, 0))
788         {
789             cl_std = "-cl-std=CL2.0";
790         }
791         else
792         {
793             // If the -cl-std build option is not specified, the highest OpenCL
794             // C 1.x language version supported by each device is used when
795             // compiling the program for each device.
796             cl_std = "";
797         }
798         build_options_internal += ' ';
799         build_options_internal += cl_std;
800         buildOptions = build_options_internal.c_str();
801     }
802     int error = create_single_kernel_helper_create_program(
803         context, outProgram, numKernelLines, kernelProgram, buildOptions);
804     if (error != CL_SUCCESS)
805     {
806         log_error("Create program failed: %d, line: %d\n", error, __LINE__);
807         return error;
808     }
809 
810     // Remove offline-compiler-only build options
811     std::string newBuildOptions;
812     if (buildOptions != NULL)
813     {
814         newBuildOptions = buildOptions;
815         std::string offlineCompierOptions[] = {
816             "-cl-fp16-enable", "-cl-fp64-enable", "-cl-zero-init-local-mem-vars"
817         };
818         for (auto &s : offlineCompierOptions)
819         {
820             std::string::size_type i = newBuildOptions.find(s);
821             if (i != std::string::npos) newBuildOptions.erase(i, s.length());
822         }
823     }
824     // Build program and create kernel
825     return build_program_create_kernel_helper(
826         context, outProgram, outKernel, numKernelLines, kernelProgram,
827         kernelName, newBuildOptions.c_str());
828 }
829 
830 // Builds OpenCL C/C++ program and creates
build_program_create_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)831 int build_program_create_kernel_helper(
832     cl_context context, cl_program *outProgram, cl_kernel *outKernel,
833     unsigned int numKernelLines, const char **kernelProgram,
834     const char *kernelName, const char *buildOptions)
835 {
836     int error;
837     /* Compile the program */
838     int buildProgramFailed = 0;
839     int printedSource = 0;
840     error = clBuildProgram(*outProgram, 0, NULL, buildOptions, NULL, NULL);
841     if (error != CL_SUCCESS)
842     {
843         unsigned int i;
844         print_error(error, "clBuildProgram failed");
845         buildProgramFailed = 1;
846         printedSource = 1;
847         log_error("Build options: %s\n", buildOptions);
848         log_error("Original source is: ------------\n");
849         for (i = 0; i < numKernelLines; i++) log_error("%s", kernelProgram[i]);
850     }
851 
852     // Verify the build status on all devices
853     cl_uint deviceCount = 0;
854     error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES,
855                              sizeof(deviceCount), &deviceCount, NULL);
856     if (error != CL_SUCCESS)
857     {
858         print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
859         return error;
860     }
861 
862     if (deviceCount == 0)
863     {
864         log_error("No devices found for program.\n");
865         return -1;
866     }
867 
868     cl_device_id *devices =
869         (cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
870     if (NULL == devices) return -1;
871     BufferOwningPtr<cl_device_id> devicesBuf(devices);
872 
873     memset(devices, 0, deviceCount * sizeof(cl_device_id));
874     error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES,
875                              sizeof(cl_device_id) * deviceCount, devices, NULL);
876     if (error != CL_SUCCESS)
877     {
878         print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
879         return error;
880     }
881 
882     cl_uint z;
883     bool buildFailed = false;
884     for (z = 0; z < deviceCount; z++)
885     {
886         char deviceName[4096] = "";
887         error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName),
888                                 deviceName, NULL);
889         if (error != CL_SUCCESS || deviceName[0] == '\0')
890         {
891             log_error("Device \"%d\" failed to return a name\n", z);
892             print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
893         }
894 
895         cl_build_status buildStatus;
896         error = clGetProgramBuildInfo(*outProgram, devices[z],
897                                       CL_PROGRAM_BUILD_STATUS,
898                                       sizeof(buildStatus), &buildStatus, NULL);
899         if (error != CL_SUCCESS)
900         {
901             print_error(error,
902                         "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
903             return error;
904         }
905 
906         if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed
907             && deviceCount == 1)
908         {
909             buildFailed = true;
910             log_error("clBuildProgram returned an error, but buildStatus is "
911                       "marked as CL_BUILD_SUCCESS.\n");
912         }
913 
914         if (buildStatus != CL_BUILD_SUCCESS)
915         {
916 
917             char statusString[64] = "";
918             if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
919                 sprintf(statusString, "CL_BUILD_SUCCESS");
920             else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
921                 sprintf(statusString, "CL_BUILD_NONE");
922             else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
923                 sprintf(statusString, "CL_BUILD_ERROR");
924             else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
925                 sprintf(statusString, "CL_BUILD_IN_PROGRESS");
926             else
927                 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
928 
929             if (buildStatus != CL_BUILD_SUCCESS)
930                 log_error(
931                     "Build not successful for device \"%s\", status: %s\n",
932                     deviceName, statusString);
933             size_t paramSize = 0;
934             error = clGetProgramBuildInfo(*outProgram, devices[z],
935                                           CL_PROGRAM_BUILD_LOG, 0, NULL,
936                                           &paramSize);
937             if (error != CL_SUCCESS)
938             {
939 
940                 print_error(
941                     error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
942                 return error;
943             }
944 
945             std::string log;
946             log.resize(paramSize / sizeof(char));
947             error = clGetProgramBuildInfo(*outProgram, devices[z],
948                                           CL_PROGRAM_BUILD_LOG, paramSize,
949                                           &log[0], NULL);
950             if (error != CL_SUCCESS || log[0] == '\0')
951             {
952                 log_error("Device %d (%s) failed to return a build log\n", z,
953                           deviceName);
954                 if (error)
955                 {
956                     print_error(
957                         error,
958                         "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
959                     return error;
960                 }
961                 else
962                 {
963                     log_error("clGetProgramBuildInfo returned an empty log.\n");
964                     return -1;
965                 }
966             }
967             // In this case we've already printed out the code above.
968             if (!printedSource)
969             {
970                 unsigned int i;
971                 log_error("Original source is: ------------\n");
972                 for (i = 0; i < numKernelLines; i++)
973                     log_error("%s", kernelProgram[i]);
974                 printedSource = 1;
975             }
976             log_error("Build log for device \"%s\" is: ------------\n",
977                       deviceName);
978             log_error("%s\n", log.c_str());
979             log_error("\n----------\n");
980             return -1;
981         }
982     }
983 
984     if (buildFailed)
985     {
986         return -1;
987     }
988 
989     /* And create a kernel from it */
990     if (kernelName != NULL)
991     {
992         *outKernel = clCreateKernel(*outProgram, kernelName, &error);
993         if (*outKernel == NULL || error != CL_SUCCESS)
994         {
995             print_error(error, "Unable to create kernel");
996             return error;
997         }
998     }
999 
1000     return 0;
1001 }
1002 
get_max_allowed_work_group_size(cl_context context,cl_kernel kernel,size_t * outMaxSize,size_t * outLimits)1003 int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel,
1004                                     size_t *outMaxSize, size_t *outLimits)
1005 {
1006     cl_device_id *devices;
1007     size_t size, maxCommonSize = 0;
1008     int numDevices, i, j, error;
1009     cl_uint numDims;
1010     size_t outSize;
1011     size_t sizeLimit[] = { 1, 1, 1 };
1012 
1013 
1014     /* Assume fewer than 16 devices will be returned */
1015     error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &outSize);
1016     test_error(error, "Unable to obtain list of devices size for context");
1017     devices = (cl_device_id *)malloc(outSize);
1018     BufferOwningPtr<cl_device_id> devicesBuf(devices);
1019 
1020     error =
1021         clGetContextInfo(context, CL_CONTEXT_DEVICES, outSize, devices, NULL);
1022     test_error(error, "Unable to obtain list of devices for context");
1023 
1024     numDevices = (int)(outSize / sizeof(cl_device_id));
1025 
1026     for (i = 0; i < numDevices; i++)
1027     {
1028         error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE,
1029                                 sizeof(size), &size, NULL);
1030         test_error(error, "Unable to obtain max work group size for device");
1031         if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
1032 
1033         error = clGetKernelWorkGroupInfo(kernel, devices[i],
1034                                          CL_KERNEL_WORK_GROUP_SIZE,
1035                                          sizeof(size), &size, NULL);
1036         test_error(
1037             error,
1038             "Unable to obtain max work group size for device and kernel combo");
1039         if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
1040 
1041         error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
1042                                 sizeof(numDims), &numDims, NULL);
1043         test_error(
1044             error,
1045             "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
1046         sizeLimit[0] = 1;
1047         error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES,
1048                                 numDims * sizeof(size_t), sizeLimit, NULL);
1049         test_error(error,
1050                    "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
1051 
1052         if (outLimits != NULL)
1053         {
1054             if (i == 0)
1055             {
1056                 for (j = 0; j < 3; j++) outLimits[j] = sizeLimit[j];
1057             }
1058             else
1059             {
1060                 for (j = 0; j < (int)numDims; j++)
1061                 {
1062                     if (sizeLimit[j] < outLimits[j])
1063                         outLimits[j] = sizeLimit[j];
1064                 }
1065             }
1066         }
1067     }
1068 
1069     *outMaxSize = (unsigned int)maxCommonSize;
1070     return 0;
1071 }
1072 
1073 
get_max_allowed_1d_work_group_size_on_device(cl_device_id device,cl_kernel kernel,size_t * outSize)1074 extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device,
1075                                                         cl_kernel kernel,
1076                                                         size_t *outSize)
1077 {
1078     cl_uint maxDim;
1079     size_t maxWgSize;
1080     size_t *maxWgSizePerDim;
1081     int error;
1082 
1083     error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
1084                                      sizeof(size_t), &maxWgSize, NULL);
1085     test_error(error,
1086                "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed");
1087 
1088     error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
1089                             sizeof(cl_uint), &maxDim, NULL);
1090     test_error(error,
1091                "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
1092     maxWgSizePerDim = (size_t *)malloc(maxDim * sizeof(size_t));
1093     if (!maxWgSizePerDim)
1094     {
1095         log_error("Unable to allocate maxWgSizePerDim\n");
1096         return -1;
1097     }
1098 
1099     error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1100                             maxDim * sizeof(size_t), maxWgSizePerDim, NULL);
1101     if (error != CL_SUCCESS)
1102     {
1103         log_error("clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n");
1104         free(maxWgSizePerDim);
1105         return error;
1106     }
1107 
1108     // "maxWgSize" is limited to that of the first dimension.
1109     if (maxWgSize > maxWgSizePerDim[0])
1110     {
1111         maxWgSize = maxWgSizePerDim[0];
1112     }
1113 
1114     free(maxWgSizePerDim);
1115 
1116     *outSize = maxWgSize;
1117     return 0;
1118 }
1119 
1120 
get_max_common_work_group_size(cl_context context,cl_kernel kernel,size_t globalThreadSize,size_t * outMaxSize)1121 int get_max_common_work_group_size(cl_context context, cl_kernel kernel,
1122                                    size_t globalThreadSize, size_t *outMaxSize)
1123 {
1124     size_t sizeLimit[3];
1125     int error =
1126         get_max_allowed_work_group_size(context, kernel, outMaxSize, sizeLimit);
1127     if (error != 0) return error;
1128 
1129     /* Now find the largest factor of globalThreadSize that is <= maxCommonSize
1130      */
1131     /* Note for speed, we don't need to check the range of maxCommonSize, b/c
1132      once it gets to 1, the modulo test will succeed and break the loop anyway
1133    */
1134     for (;
1135          (globalThreadSize % *outMaxSize) != 0 || (*outMaxSize > sizeLimit[0]);
1136          (*outMaxSize)--)
1137         ;
1138     return 0;
1139 }
1140 
get_max_common_2D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1141 int get_max_common_2D_work_group_size(cl_context context, cl_kernel kernel,
1142                                       size_t *globalThreadSizes,
1143                                       size_t *outMaxSizes)
1144 {
1145     size_t sizeLimit[3];
1146     size_t maxSize;
1147     int error =
1148         get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
1149     if (error != 0) return error;
1150 
1151     /* Now find a set of factors, multiplied together less than maxSize, but
1152        each a factor of the global sizes */
1153 
1154     /* Simple case */
1155     if (globalThreadSizes[0] * globalThreadSizes[1] <= maxSize)
1156     {
1157         if (globalThreadSizes[0] <= sizeLimit[0]
1158             && globalThreadSizes[1] <= sizeLimit[1])
1159         {
1160             outMaxSizes[0] = globalThreadSizes[0];
1161             outMaxSizes[1] = globalThreadSizes[1];
1162             return 0;
1163         }
1164     }
1165 
1166     size_t remainingSize, sizeForThisOne;
1167     remainingSize = maxSize;
1168     int i, j;
1169     for (i = 0; i < 2; i++)
1170     {
1171         if (globalThreadSizes[i] > remainingSize)
1172             sizeForThisOne = remainingSize;
1173         else
1174             sizeForThisOne = globalThreadSizes[i];
1175         for (; (globalThreadSizes[i] % sizeForThisOne) != 0
1176              || (sizeForThisOne > sizeLimit[i]);
1177              sizeForThisOne--)
1178             ;
1179         outMaxSizes[i] = sizeForThisOne;
1180         remainingSize = maxSize;
1181         for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
1182     }
1183 
1184     return 0;
1185 }
1186 
get_max_common_3D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1187 int get_max_common_3D_work_group_size(cl_context context, cl_kernel kernel,
1188                                       size_t *globalThreadSizes,
1189                                       size_t *outMaxSizes)
1190 {
1191     size_t sizeLimit[3];
1192     size_t maxSize;
1193     int error =
1194         get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
1195     if (error != 0) return error;
1196     /* Now find a set of factors, multiplied together less than maxSize, but
1197      each a factor of the global sizes */
1198 
1199     /* Simple case */
1200     if (globalThreadSizes[0] * globalThreadSizes[1] * globalThreadSizes[2]
1201         <= maxSize)
1202     {
1203         if (globalThreadSizes[0] <= sizeLimit[0]
1204             && globalThreadSizes[1] <= sizeLimit[1]
1205             && globalThreadSizes[2] <= sizeLimit[2])
1206         {
1207             outMaxSizes[0] = globalThreadSizes[0];
1208             outMaxSizes[1] = globalThreadSizes[1];
1209             outMaxSizes[2] = globalThreadSizes[2];
1210             return 0;
1211         }
1212     }
1213 
1214     size_t remainingSize, sizeForThisOne;
1215     remainingSize = maxSize;
1216     int i, j;
1217     for (i = 0; i < 3; i++)
1218     {
1219         if (globalThreadSizes[i] > remainingSize)
1220             sizeForThisOne = remainingSize;
1221         else
1222             sizeForThisOne = globalThreadSizes[i];
1223         for (; (globalThreadSizes[i] % sizeForThisOne) != 0
1224              || (sizeForThisOne > sizeLimit[i]);
1225              sizeForThisOne--)
1226             ;
1227         outMaxSizes[i] = sizeForThisOne;
1228         remainingSize = maxSize;
1229         for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
1230     }
1231 
1232     return 0;
1233 }
1234 
1235 /* Helper to determine if a device supports an image format */
is_image_format_supported(cl_context context,cl_mem_flags flags,cl_mem_object_type image_type,const cl_image_format * fmt)1236 int is_image_format_supported(cl_context context, cl_mem_flags flags,
1237                               cl_mem_object_type image_type,
1238                               const cl_image_format *fmt)
1239 {
1240     cl_image_format *list;
1241     cl_uint count = 0;
1242     cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128,
1243                                             NULL, &count);
1244     if (count == 0) return 0;
1245 
1246     list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
1247     if (NULL == list)
1248     {
1249         log_error("Error: unable to allocate %zu byte buffer for image format "
1250                   "list at %s:%d (err = %d)\n",
1251                   count * sizeof(cl_image_format), __FILE__, __LINE__, err);
1252         return 0;
1253     }
1254     BufferOwningPtr<cl_image_format> listBuf(list);
1255 
1256 
1257     cl_int error = clGetSupportedImageFormats(context, flags, image_type, count,
1258                                               list, NULL);
1259     if (error)
1260     {
1261         log_error("Error: failed to obtain supported image type list at %s:%d "
1262                   "(err = %d)\n",
1263                   __FILE__, __LINE__, err);
1264         return 0;
1265     }
1266 
1267     // iterate looking for a match.
1268     cl_uint i;
1269     for (i = 0; i < count; i++)
1270     {
1271         if (fmt->image_channel_data_type == list[i].image_channel_data_type
1272             && fmt->image_channel_order == list[i].image_channel_order)
1273             break;
1274     }
1275 
1276     return (i < count) ? 1 : 0;
1277 }
1278 
1279 size_t get_pixel_bytes(const cl_image_format *fmt);
get_pixel_bytes(const cl_image_format * fmt)1280 size_t get_pixel_bytes(const cl_image_format *fmt)
1281 {
1282     size_t chanCount;
1283     switch (fmt->image_channel_order)
1284     {
1285         case CL_R:
1286         case CL_A:
1287         case CL_Rx:
1288         case CL_INTENSITY:
1289         case CL_LUMINANCE:
1290         case CL_DEPTH: chanCount = 1; break;
1291         case CL_RG:
1292         case CL_RA:
1293         case CL_RGx: chanCount = 2; break;
1294         case CL_RGB:
1295         case CL_RGBx:
1296         case CL_sRGB:
1297         case CL_sRGBx: chanCount = 3; break;
1298         case CL_RGBA:
1299         case CL_ARGB:
1300         case CL_BGRA:
1301         case CL_sBGRA:
1302         case CL_sRGBA:
1303 #ifdef CL_1RGB_APPLE
1304         case CL_1RGB_APPLE:
1305 #endif
1306 #ifdef CL_BGR1_APPLE
1307         case CL_BGR1_APPLE:
1308 #endif
1309             chanCount = 4;
1310             break;
1311         default:
1312             log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__);
1313             abort();
1314             break;
1315     }
1316 
1317     switch (fmt->image_channel_data_type)
1318     {
1319         case CL_UNORM_SHORT_565:
1320         case CL_UNORM_SHORT_555: return 2;
1321 
1322         case CL_UNORM_INT_101010: return 4;
1323 
1324         case CL_SNORM_INT8:
1325         case CL_UNORM_INT8:
1326         case CL_SIGNED_INT8:
1327         case CL_UNSIGNED_INT8: return chanCount;
1328 
1329         case CL_SNORM_INT16:
1330         case CL_UNORM_INT16:
1331         case CL_HALF_FLOAT:
1332         case CL_SIGNED_INT16:
1333         case CL_UNSIGNED_INT16:
1334 #ifdef CL_SFIXED14_APPLE
1335         case CL_SFIXED14_APPLE:
1336 #endif
1337             return chanCount * 2;
1338 
1339         case CL_SIGNED_INT32:
1340         case CL_UNSIGNED_INT32:
1341         case CL_FLOAT: return chanCount * 4;
1342 
1343         default:
1344             log_error("Unknown channel data type at %s:%d!\n", __FILE__,
1345                       __LINE__);
1346             abort();
1347     }
1348 
1349     return 0;
1350 }
1351 
verifyImageSupport(cl_device_id device)1352 test_status verifyImageSupport(cl_device_id device)
1353 {
1354     int result = checkForImageSupport(device);
1355     if (result == 0)
1356     {
1357         return TEST_PASS;
1358     }
1359     if (result == CL_IMAGE_FORMAT_NOT_SUPPORTED)
1360     {
1361         log_error("SKIPPED: Device does not supported images as required by "
1362                   "this test!\n");
1363         return TEST_SKIP;
1364     }
1365     return TEST_FAIL;
1366 }
1367 
checkForImageSupport(cl_device_id device)1368 int checkForImageSupport(cl_device_id device)
1369 {
1370     cl_uint i;
1371     int error;
1372 
1373 
1374     /* Check the device props to see if images are supported at all first */
1375     error =
1376         clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
1377     test_error(error, "Unable to query device for image support");
1378     if (i == 0)
1379     {
1380         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1381     }
1382 
1383     /* So our support is good */
1384     return 0;
1385 }
1386 
checkFor3DImageSupport(cl_device_id device)1387 int checkFor3DImageSupport(cl_device_id device)
1388 {
1389     cl_uint i;
1390     int error;
1391 
1392     /* Check the device props to see if images are supported at all first */
1393     error =
1394         clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
1395     test_error(error, "Unable to query device for image support");
1396     if (i == 0)
1397     {
1398         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1399     }
1400 
1401     char profile[128];
1402     error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile,
1403                             NULL);
1404     test_error(error, "Unable to query device for CL_DEVICE_PROFILE");
1405     if (0 == strcmp(profile, "EMBEDDED_PROFILE"))
1406     {
1407         size_t width = -1L;
1408         size_t height = -1L;
1409         size_t depth = -1L;
1410         error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH,
1411                                 sizeof(width), &width, NULL);
1412         test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH");
1413         error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
1414                                 sizeof(height), &height, NULL);
1415         test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT");
1416         error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH,
1417                                 sizeof(depth), &depth, NULL);
1418         test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH");
1419 
1420         if (0 == (height | width | depth)) return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1421     }
1422 
1423     /* So our support is good */
1424     return 0;
1425 }
1426 
checkForReadWriteImageSupport(cl_device_id device)1427 int checkForReadWriteImageSupport(cl_device_id device)
1428 {
1429     if (checkForImageSupport(device))
1430     {
1431         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1432     }
1433 
1434     auto device_cl_version = get_device_cl_version(device);
1435     if (device_cl_version >= Version(3, 0))
1436     {
1437         // In OpenCL 3.0, Read-Write images are optional.
1438         // Check if they are supported.
1439         cl_uint are_rw_images_supported{};
1440         test_error(
1441             clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
1442                             sizeof(are_rw_images_supported),
1443                             &are_rw_images_supported, nullptr),
1444             "clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
1445         if (0 == are_rw_images_supported)
1446         {
1447             log_info("READ_WRITE_IMAGE tests skipped, not supported.\n");
1448             return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1449         }
1450     }
1451     // READ_WRITE images are not supported on 1.X devices.
1452     else if (device_cl_version < Version(2, 0))
1453     {
1454         log_info("READ_WRITE_IMAGE tests skipped, Opencl 2.0+ is requried.");
1455         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1456     }
1457     // Support for read-write image arguments is required
1458     // for an 2.X device if the device supports images.
1459 
1460     /* So our support is good */
1461     return 0;
1462 }
1463 
get_min_alignment(cl_context context)1464 size_t get_min_alignment(cl_context context)
1465 {
1466     static cl_uint align_size = 0;
1467 
1468     if (0 == align_size)
1469     {
1470         cl_device_id *devices;
1471         size_t devices_size = 0;
1472         cl_uint result = 0;
1473         cl_int error;
1474         int i;
1475 
1476         error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
1477                                  &devices_size);
1478         test_error_ret(error, "clGetContextInfo failed", 0);
1479 
1480         devices = (cl_device_id *)malloc(devices_size);
1481         if (devices == NULL)
1482         {
1483             print_error(error, "malloc failed");
1484             return 0;
1485         }
1486 
1487         error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size,
1488                                  (void *)devices, NULL);
1489         test_error_ret(error, "clGetContextInfo failed", 0);
1490 
1491         for (i = 0; i < (int)(devices_size / sizeof(cl_device_id)); i++)
1492         {
1493             cl_uint alignment = 0;
1494 
1495             error = clGetDeviceInfo(devices[i], CL_DEVICE_MEM_BASE_ADDR_ALIGN,
1496                                     sizeof(cl_uint), (void *)&alignment, NULL);
1497 
1498             if (error == CL_SUCCESS)
1499             {
1500                 alignment >>= 3; // convert bits to bytes
1501                 result = (alignment > result) ? alignment : result;
1502             }
1503             else
1504                 print_error(error, "clGetDeviceInfo failed");
1505         }
1506 
1507         align_size = result;
1508         free(devices);
1509     }
1510 
1511     return align_size;
1512 }
1513 
get_default_rounding_mode(cl_device_id device)1514 cl_device_fp_config get_default_rounding_mode(cl_device_id device)
1515 {
1516     char profileStr[128] = "";
1517     cl_device_fp_config single = 0;
1518     int error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
1519                                 sizeof(single), &single, NULL);
1520     if (error)
1521         test_error_ret(error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG",
1522                        0);
1523 
1524     if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST;
1525 
1526     if (0 == (single & CL_FP_ROUND_TO_ZERO))
1527         test_error_ret(-1,
1528                        "FAILURE: device must support either "
1529                        "CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST",
1530                        0);
1531 
1532     // Make sure we are an embedded device before allowing a pass
1533     if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profileStr),
1534                                  &profileStr, NULL)))
1535         test_error_ret(error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0);
1536 
1537     if (strcmp(profileStr, "EMBEDDED_PROFILE"))
1538         test_error_ret(error,
1539                        "FAILURE: non-EMBEDDED_PROFILE devices must support "
1540                        "CL_FP_ROUND_TO_NEAREST",
1541                        0);
1542 
1543     return CL_FP_ROUND_TO_ZERO;
1544 }
1545 
checkDeviceForQueueSupport(cl_device_id device,cl_command_queue_properties prop)1546 int checkDeviceForQueueSupport(cl_device_id device,
1547                                cl_command_queue_properties prop)
1548 {
1549     cl_command_queue_properties realProps;
1550     cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
1551                                    sizeof(realProps), &realProps, NULL);
1552     test_error_ret(error, "FAILURE: Unable to get device queue properties", 0);
1553 
1554     return (realProps & prop) ? 1 : 0;
1555 }
1556 
printDeviceHeader(cl_device_id device)1557 int printDeviceHeader(cl_device_id device)
1558 {
1559     char deviceName[512], deviceVendor[512], deviceVersion[512],
1560         cLangVersion[512];
1561     int error;
1562 
1563     error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName),
1564                             deviceName, NULL);
1565     test_error(error, "Unable to get CL_DEVICE_NAME for device");
1566 
1567     error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(deviceVendor),
1568                             deviceVendor, NULL);
1569     test_error(error, "Unable to get CL_DEVICE_VENDOR for device");
1570 
1571     error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(deviceVersion),
1572                             deviceVersion, NULL);
1573     test_error(error, "Unable to get CL_DEVICE_VERSION for device");
1574 
1575     error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
1576                             sizeof(cLangVersion), cLangVersion, NULL);
1577     test_error(error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device");
1578 
1579     log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute "
1580              "Device Version = %s%s%s\n",
1581              deviceName, deviceVendor, deviceVersion,
1582              (error == CL_SUCCESS) ? ", CL C Version = " : "",
1583              (error == CL_SUCCESS) ? cLangVersion : "");
1584 
1585     auto version = get_device_cl_version(device);
1586     if (version >= Version(3, 0))
1587     {
1588         auto ctsVersion = get_device_info_string(
1589             device, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED);
1590         log_info("Device latest conformance version passed: %s\n",
1591                  ctsVersion.c_str());
1592     }
1593 
1594     return CL_SUCCESS;
1595 }
1596 
get_device_cl_c_version(cl_device_id device)1597 Version get_device_cl_c_version(cl_device_id device)
1598 {
1599     auto device_cl_version = get_device_cl_version(device);
1600 
1601     // The second special case is OpenCL-1.0 where CL_DEVICE_OPENCL_C_VERSION
1602     // did not exist, but since this is just the first version we can
1603     // return 1.0.
1604     if (device_cl_version == Version{ 1, 0 })
1605     {
1606         return Version{ 1, 0 };
1607     }
1608 
1609     // Otherwise we know we have a 1.1 <= device_version <= 2.0 where all CL C
1610     // versions are backwards compatible, hence querying with the
1611     // CL_DEVICE_OPENCL_C_VERSION query must return the most recent supported
1612     // OpenCL C version.
1613     size_t opencl_c_version_size_in_bytes{};
1614     auto error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr,
1615                                  &opencl_c_version_size_in_bytes);
1616     test_error_ret(error,
1617                    "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
1618                    (Version{ -1, 0 }));
1619 
1620     std::string opencl_c_version(opencl_c_version_size_in_bytes, '\0');
1621     error =
1622         clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
1623                         opencl_c_version.size(), &opencl_c_version[0], nullptr);
1624 
1625     test_error_ret(error,
1626                    "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
1627                    (Version{ -1, 0 }));
1628 
1629     // Scrape out the major, minor pair from the string.
1630     auto major = opencl_c_version[opencl_c_version.find('.') - 1];
1631     auto minor = opencl_c_version[opencl_c_version.find('.') + 1];
1632 
1633     return Version{ major - '0', minor - '0' };
1634 }
1635 
get_device_latest_cl_c_version(cl_device_id device)1636 Version get_device_latest_cl_c_version(cl_device_id device)
1637 {
1638     auto device_cl_version = get_device_cl_version(device);
1639 
1640     // If the device version >= 3.0 it must support the
1641     // CL_DEVICE_OPENCL_C_ALL_VERSIONS query from which we can extract the most
1642     // recent CL C version supported by the device.
1643     if (device_cl_version >= Version{ 3, 0 })
1644     {
1645         size_t opencl_c_all_versions_size_in_bytes{};
1646         auto error =
1647             clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
1648                             &opencl_c_all_versions_size_in_bytes);
1649         test_error_ret(
1650             error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1651             (Version{ -1, 0 }));
1652         std::vector<cl_name_version> name_versions(
1653             opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
1654         error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
1655                                 opencl_c_all_versions_size_in_bytes,
1656                                 name_versions.data(), nullptr);
1657         test_error_ret(
1658             error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1659             (Version{ -1, 0 }));
1660 
1661         Version max_supported_cl_c_version{};
1662         for (const auto &name_version : name_versions)
1663         {
1664             Version current_version{
1665                 static_cast<int>(CL_VERSION_MAJOR(name_version.version)),
1666                 static_cast<int>(CL_VERSION_MINOR(name_version.version))
1667             };
1668             max_supported_cl_c_version =
1669                 (current_version > max_supported_cl_c_version)
1670                 ? current_version
1671                 : max_supported_cl_c_version;
1672         }
1673         return max_supported_cl_c_version;
1674     }
1675 
1676     return get_device_cl_c_version(device);
1677 }
1678 
get_max_OpenCL_C_for_context(cl_context context)1679 Version get_max_OpenCL_C_for_context(cl_context context)
1680 {
1681     // Get all the devices in the context and find the maximum
1682     // universally supported OpenCL C version.
1683     size_t devices_size_in_bytes{};
1684     auto error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr,
1685                                   &devices_size_in_bytes);
1686     test_error_ret(error, "clGetDeviceInfo failed for CL_CONTEXT_DEVICES",
1687                    (Version{ -1, 0 }));
1688     std::vector<cl_device_id> devices(devices_size_in_bytes
1689                                       / sizeof(cl_device_id));
1690     error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size_in_bytes,
1691                              devices.data(), nullptr);
1692     auto current_version = get_device_latest_cl_c_version(devices[0]);
1693     std::for_each(std::next(devices.begin()), devices.end(),
1694                   [&current_version](cl_device_id device) {
1695                       auto device_version =
1696                           get_device_latest_cl_c_version(device);
1697                       // OpenCL 3.0 is not backwards compatible with 2.0.
1698                       // If we have 3.0 and 2.0 in the same driver we
1699                       // use 1.2.
1700                       if (((device_version >= Version(2, 0)
1701                             && device_version < Version(3, 0))
1702                            && current_version >= Version(3, 0))
1703                           || (device_version >= Version(3, 0)
1704                               && (current_version >= Version(2, 0)
1705                                   && current_version < Version(3, 0))))
1706                       {
1707                           current_version = Version(1, 2);
1708                       }
1709                       else
1710                       {
1711                           current_version =
1712                               std::min(device_version, current_version);
1713                       }
1714                   });
1715     return current_version;
1716 }
1717 
device_supports_cl_c_version(cl_device_id device,Version version)1718 bool device_supports_cl_c_version(cl_device_id device, Version version)
1719 {
1720     auto device_cl_version = get_device_cl_version(device);
1721 
1722     // In general, a device does not support an OpenCL C version if it is <=
1723     // CL_DEVICE_OPENCL_C_VERSION AND it does not appear in the
1724     // CL_DEVICE_OPENCL_C_ALL_VERSIONS query.
1725 
1726     // If the device version >= 3.0 it must support the
1727     // CL_DEVICE_OPENCL_C_ALL_VERSIONS query, and the version of OpenCL C being
1728     // used must appear in the query result if it's <=
1729     // CL_DEVICE_OPENCL_C_VERSION.
1730     if (device_cl_version >= Version{ 3, 0 })
1731     {
1732         size_t opencl_c_all_versions_size_in_bytes{};
1733         auto error =
1734             clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
1735                             &opencl_c_all_versions_size_in_bytes);
1736         test_error_ret(
1737             error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1738             (false));
1739         std::vector<cl_name_version> name_versions(
1740             opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
1741         error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
1742                                 opencl_c_all_versions_size_in_bytes,
1743                                 name_versions.data(), nullptr);
1744         test_error_ret(
1745             error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1746             (false));
1747 
1748         for (const auto &name_version : name_versions)
1749         {
1750             Version current_version{
1751                 static_cast<int>(CL_VERSION_MAJOR(name_version.version)),
1752                 static_cast<int>(CL_VERSION_MINOR(name_version.version))
1753             };
1754             if (current_version == version)
1755             {
1756                 return true;
1757             }
1758         }
1759     }
1760 
1761     return version <= get_device_cl_c_version(device);
1762 }
1763 
poll_until(unsigned timeout_ms,unsigned interval_ms,std::function<bool ()> fn)1764 bool poll_until(unsigned timeout_ms, unsigned interval_ms,
1765                 std::function<bool()> fn)
1766 {
1767     unsigned time_spent_ms = 0;
1768     bool ret = false;
1769 
1770     while (time_spent_ms < timeout_ms)
1771     {
1772         ret = fn();
1773         if (ret)
1774         {
1775             break;
1776         }
1777         usleep(interval_ms * 1000);
1778         time_spent_ms += interval_ms;
1779     }
1780 
1781     return ret;
1782 }
1783 
device_supports_double(cl_device_id device)1784 bool device_supports_double(cl_device_id device)
1785 {
1786     if (is_extension_available(device, "cl_khr_fp64"))
1787     {
1788         return true;
1789     }
1790     else
1791     {
1792         cl_device_fp_config double_fp_config;
1793         cl_int err = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG,
1794                                      sizeof(double_fp_config),
1795                                      &double_fp_config, nullptr);
1796         test_error(err,
1797                    "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
1798         return double_fp_config != 0;
1799     }
1800 }
1801 
device_supports_half(cl_device_id device)1802 bool device_supports_half(cl_device_id device)
1803 {
1804     return is_extension_available(device, "cl_khr_fp16");
1805 }
1806