1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "crc32.h"
17 #include "kernelHelpers.h"
18 #include "deviceInfo.h"
19 #include "errorHelpers.h"
20 #include "imageHelpers.h"
21 #include "typeWrappers.h"
22 #include "testHarness.h"
23 #include "parseParameters.h"
24 
25 #include <cassert>
26 #include <vector>
27 #include <string>
28 #include <fstream>
29 #include <sstream>
30 #include <iomanip>
31 #include <mutex>
32 #include <algorithm>
33 
34 #if defined(_WIN32)
35 std::string slash = "\\";
36 #else
37 std::string slash = "/";
38 #endif
39 
40 static std::mutex gCompilerMutex;
41 
42 static cl_int get_first_device_id(const cl_context context,
43                                   cl_device_id &device);
44 
get_file_size(const std::string & fileName)45 long get_file_size(const std::string &fileName)
46 {
47     std::ifstream ifs(fileName.c_str(), std::ios::binary);
48     if (!ifs.good()) return 0;
49     // get length of file:
50     ifs.seekg(0, std::ios::end);
51     std::ios::pos_type length = ifs.tellg();
52     return static_cast<long>(length);
53 }
54 
get_kernel_content(unsigned int numKernelLines,const char * const * kernelProgram)55 static std::string get_kernel_content(unsigned int numKernelLines,
56                                       const char *const *kernelProgram)
57 {
58     std::string kernel;
59     for (size_t i = 0; i < numKernelLines; ++i)
60     {
61         std::string chunk(kernelProgram[i], 0, std::string::npos);
62         kernel += chunk;
63     }
64 
65     return kernel;
66 }
67 
get_kernel_name(const std::string & source)68 std::string get_kernel_name(const std::string &source)
69 {
70     // Create list of kernel names
71     std::string kernelsList;
72     size_t kPos = source.find("kernel");
73     while (kPos != std::string::npos)
74     {
75         // check for '__kernel'
76         size_t pos = kPos;
77         if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_')
78             pos -= 2;
79 
80         // check character before 'kernel' (white space expected)
81         size_t wsPos = source.find_last_of(" \t\r\n", pos);
82         if (wsPos == std::string::npos || wsPos + 1 == pos)
83         {
84             // check character after 'kernel' (white space expected)
85             size_t akPos = kPos + sizeof("kernel") - 1;
86             wsPos = source.find_first_of(" \t\r\n", akPos);
87             if (!(wsPos == akPos))
88             {
89                 kPos = source.find("kernel", kPos + 1);
90                 continue;
91             }
92 
93             bool attributeFound;
94             do
95             {
96                 attributeFound = false;
97                 // find '(' after kernel name name
98                 size_t pPos = source.find("(", akPos);
99                 if (!(pPos != std::string::npos)) continue;
100 
101                 // check for not empty kernel name before '('
102                 pos = source.find_last_not_of(" \t\r\n", pPos - 1);
103                 if (!(pos != std::string::npos && pos > akPos)) continue;
104 
105                 // find character before kernel name
106                 wsPos = source.find_last_of(" \t\r\n", pos);
107                 if (!(wsPos != std::string::npos && wsPos >= akPos)) continue;
108 
109                 std::string name =
110                     source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
111                 // check for kernel attribute
112                 if (name == "__attribute__")
113                 {
114                     attributeFound = true;
115                     int pCount = 1;
116                     akPos = pPos + 1;
117                     while (pCount > 0 && akPos != std::string::npos)
118                     {
119                         akPos = source.find_first_of("()", akPos + 1);
120                         if (akPos != std::string::npos)
121                         {
122                             if (source[akPos] == '(')
123                                 pCount++;
124                             else
125                                 pCount--;
126                         }
127                     }
128                 }
129                 else
130                 {
131                     kernelsList += name + ".";
132                 }
133             } while (attributeFound);
134         }
135         kPos = source.find("kernel", kPos + 1);
136     }
137     std::ostringstream oss;
138     if (MAX_LEN_FOR_KERNEL_LIST > 0)
139     {
140         if (kernelsList.size() > MAX_LEN_FOR_KERNEL_LIST + 1)
141         {
142             kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
143             kernelsList[kernelsList.size() - 1] = '.';
144             kernelsList[kernelsList.size() - 1] = '.';
145         }
146         oss << kernelsList;
147     }
148     return oss.str();
149 }
150 
151 static std::string
get_offline_compilation_file_type_str(const CompilationMode compilationMode)152 get_offline_compilation_file_type_str(const CompilationMode compilationMode)
153 {
154     switch (compilationMode)
155     {
156         default: assert(0 && "Invalid compilation mode"); abort();
157         case kOnline:
158             assert(0 && "Invalid compilation mode for offline compilation");
159             abort();
160         case kBinary: return "binary";
161         case kSpir_v: return "SPIR-V";
162     }
163 }
164 
get_unique_filename_prefix(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)165 static std::string get_unique_filename_prefix(unsigned int numKernelLines,
166                                               const char *const *kernelProgram,
167                                               const char *buildOptions)
168 {
169     std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
170     std::string kernelName = get_kernel_name(kernel);
171     cl_uint kernelCrc = crc32(kernel.data(), kernel.size());
172     std::ostringstream oss;
173     oss << kernelName << std::hex << std::setfill('0') << std::setw(8)
174         << kernelCrc;
175     if (buildOptions)
176     {
177         cl_uint bOptionsCrc = crc32(buildOptions, strlen(buildOptions));
178         oss << '.' << std::hex << std::setfill('0') << std::setw(8)
179             << bOptionsCrc;
180     }
181     return oss.str();
182 }
183 
184 
185 static std::string
get_cl_build_options_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)186 get_cl_build_options_filename_with_path(const std::string &filePath,
187                                         const std::string &fileNamePrefix)
188 {
189     return filePath + slash + fileNamePrefix + ".options";
190 }
191 
192 static std::string
get_cl_source_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)193 get_cl_source_filename_with_path(const std::string &filePath,
194                                  const std::string &fileNamePrefix)
195 {
196     return filePath + slash + fileNamePrefix + ".cl";
197 }
198 
199 static std::string
get_binary_filename_with_path(CompilationMode mode,cl_uint deviceAddrSpaceSize,const std::string & filePath,const std::string & fileNamePrefix)200 get_binary_filename_with_path(CompilationMode mode, cl_uint deviceAddrSpaceSize,
201                               const std::string &filePath,
202                               const std::string &fileNamePrefix)
203 {
204     std::string binaryFilename = filePath + slash + fileNamePrefix;
205     if (kSpir_v == mode)
206     {
207         std::ostringstream extension;
208         extension << ".spv" << deviceAddrSpaceSize;
209         binaryFilename += extension.str();
210     }
211     return binaryFilename;
212 }
213 
file_exist_on_disk(const std::string & filePath,const std::string & fileName)214 static bool file_exist_on_disk(const std::string &filePath,
215                                const std::string &fileName)
216 {
217     std::string fileNameWithPath = filePath + slash + fileName;
218     bool exist = false;
219     std::ifstream ifs;
220 
221     ifs.open(fileNameWithPath.c_str(), std::ios::binary);
222     if (ifs.good()) exist = true;
223     ifs.close();
224     return exist;
225 }
226 
should_save_kernel_source_to_disk(CompilationMode mode,CompilationCacheMode cacheMode,const std::string & binaryPath,const std::string & binaryName)227 static bool should_save_kernel_source_to_disk(CompilationMode mode,
228                                               CompilationCacheMode cacheMode,
229                                               const std::string &binaryPath,
230                                               const std::string &binaryName)
231 {
232     bool saveToDisk = false;
233     if (cacheMode == kCacheModeDumpCl
234         || (cacheMode == kCacheModeOverwrite && mode != kOnline))
235     {
236         saveToDisk = true;
237     }
238     if (cacheMode == kCacheModeCompileIfAbsent && mode != kOnline)
239     {
240         saveToDisk = !file_exist_on_disk(binaryPath, binaryName);
241     }
242     return saveToDisk;
243 }
244 
save_kernel_build_options_to_disk(const std::string & path,const std::string & prefix,const char * buildOptions)245 static int save_kernel_build_options_to_disk(const std::string &path,
246                                              const std::string &prefix,
247                                              const char *buildOptions)
248 {
249     std::string filename =
250         get_cl_build_options_filename_with_path(path, prefix);
251     std::ofstream ofs(filename.c_str(), std::ios::binary);
252     if (!ofs.good())
253     {
254         log_info("Can't save kernel build options: %s\n", filename.c_str());
255         return -1;
256     }
257     ofs.write(buildOptions, strlen(buildOptions));
258     ofs.close();
259     log_info("Saved kernel build options to file: %s\n", filename.c_str());
260     return CL_SUCCESS;
261 }
262 
save_kernel_source_to_disk(const std::string & path,const std::string & prefix,const std::string & source)263 static int save_kernel_source_to_disk(const std::string &path,
264                                       const std::string &prefix,
265                                       const std::string &source)
266 {
267     std::string filename = get_cl_source_filename_with_path(path, prefix);
268     std::ofstream ofs(filename.c_str(), std::ios::binary);
269     if (!ofs.good())
270     {
271         log_info("Can't save kernel source: %s\n", filename.c_str());
272         return -1;
273     }
274     ofs.write(source.c_str(), source.size());
275     ofs.close();
276     log_info("Saved kernel source to file: %s\n", filename.c_str());
277     return CL_SUCCESS;
278 }
279 
280 static int
save_kernel_source_and_options_to_disk(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)281 save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
282                                        const char *const *kernelProgram,
283                                        const char *buildOptions)
284 {
285     int error;
286 
287     std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
288     std::string kernelNamePrefix =
289         get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
290 
291     // save kernel source to disk
292     error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix,
293                                        kernel);
294 
295     // save kernel build options to disk if exists
296     if (buildOptions != NULL)
297         error |= save_kernel_build_options_to_disk(
298             gCompilationCachePath, kernelNamePrefix, buildOptions);
299 
300     return error;
301 }
302 
303 static std::string
get_compilation_mode_str(const CompilationMode compilationMode)304 get_compilation_mode_str(const CompilationMode compilationMode)
305 {
306     switch (compilationMode)
307     {
308         default: assert(0 && "Invalid compilation mode"); abort();
309         case kOnline: return "online";
310         case kBinary: return "binary";
311         case kSpir_v: return "spir-v";
312     }
313 }
314 
get_cl_device_info_str(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfo)315 static cl_int get_cl_device_info_str(const cl_device_id device,
316                                      const cl_uint device_address_space_size,
317                                      const CompilationMode compilationMode,
318                                      std::string &clDeviceInfo)
319 {
320     std::string extensionsString = get_device_extensions_string(device);
321     std::string versionString = get_device_version_string(device);
322 
323     std::ostringstream clDeviceInfoStream;
324     std::string file_type =
325         get_offline_compilation_file_type_str(compilationMode);
326     clDeviceInfoStream << "# OpenCL device info affecting " << file_type
327                        << " offline compilation:" << std::endl
328                        << "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size
329                        << std::endl
330                        << "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\""
331                        << std::endl;
332     /* We only need the device's supported IL version(s) when compiling IL
333      * that will be loaded with clCreateProgramWithIL() */
334     if (compilationMode == kSpir_v)
335     {
336         std::string ilVersionString = get_device_il_version_string(device);
337         clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString
338                            << "\"" << std::endl;
339     }
340     clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\""
341                        << std::endl;
342     clDeviceInfoStream << "CL_DEVICE_IMAGE_SUPPORT="
343                        << (0 == checkForImageSupport(device)) << std::endl;
344     clDeviceInfoStream << "CL_DEVICE_NAME=\"" << get_device_name(device).c_str()
345                        << "\"" << std::endl;
346 
347     clDeviceInfo = clDeviceInfoStream.str();
348 
349     return CL_SUCCESS;
350 }
351 
write_cl_device_info(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfoFilename)352 static int write_cl_device_info(const cl_device_id device,
353                                 const cl_uint device_address_space_size,
354                                 const CompilationMode compilationMode,
355                                 std::string &clDeviceInfoFilename)
356 {
357     std::string clDeviceInfo;
358     int error = get_cl_device_info_str(device, device_address_space_size,
359                                        compilationMode, clDeviceInfo);
360     if (error != CL_SUCCESS)
361     {
362         return error;
363     }
364 
365     cl_uint crc = crc32(clDeviceInfo.data(), clDeviceInfo.size());
366 
367     /* Get the filename for the clDeviceInfo file.
368      * Note: the file includes the hash on its content, so it is usually
369      * unnecessary to delete it. */
370     std::ostringstream clDeviceInfoFilenameStream;
371     clDeviceInfoFilenameStream << gCompilationCachePath << slash
372                                << "clDeviceInfo-";
373     clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8)
374                                << crc << ".txt";
375 
376     clDeviceInfoFilename = clDeviceInfoFilenameStream.str();
377 
378     if ((size_t)get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
379     {
380         /* The CL device info file has already been created.
381          * Nothing to do. */
382         return 0;
383     }
384 
385     /* The file does not exist or its length is not as expected.
386      * Create/overwrite it. */
387     std::ofstream ofs(clDeviceInfoFilename);
388     if (!ofs.good())
389     {
390         log_info("OfflineCompiler: can't create CL device info file: %s\n",
391                  clDeviceInfoFilename.c_str());
392         return -1;
393     }
394     ofs << clDeviceInfo;
395     ofs.close();
396 
397     return CL_SUCCESS;
398 }
399 
get_offline_compilation_command(const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename,const std::string & clDeviceInfoFilename)400 static std::string get_offline_compilation_command(
401     const cl_uint device_address_space_size,
402     const CompilationMode compilationMode, const std::string &bOptions,
403     const std::string &sourceFilename, const std::string &outputFilename,
404     const std::string &clDeviceInfoFilename)
405 {
406     std::ostringstream wrapperOptions;
407 
408     wrapperOptions << gCompilationProgram
409                    << " --mode=" << get_compilation_mode_str(compilationMode)
410                    << " --source=" << sourceFilename
411                    << " --output=" << outputFilename
412                    << " --cl-device-info=" << clDeviceInfoFilename;
413 
414     if (bOptions != "")
415     {
416         // Add build options passed to this function
417         wrapperOptions << " -- " << bOptions;
418     }
419 
420     return wrapperOptions.str();
421 }
422 
invoke_offline_compiler(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename)423 static int invoke_offline_compiler(const cl_device_id device,
424                                    const cl_uint device_address_space_size,
425                                    const CompilationMode compilationMode,
426                                    const std::string &bOptions,
427                                    const std::string &sourceFilename,
428                                    const std::string &outputFilename)
429 {
430     std::string runString;
431     std::string clDeviceInfoFilename;
432 
433     // See cl_offline_compiler-interface.txt for a description of the
434     // format of the CL device information file generated below, and
435     // the internal command line interface for invoking the offline
436     // compiler.
437 
438     cl_int err = write_cl_device_info(device, device_address_space_size,
439                                       compilationMode, clDeviceInfoFilename);
440     if (err != CL_SUCCESS)
441     {
442         log_error("Failed writing CL device info file\n");
443         return err;
444     }
445 
446     runString = get_offline_compilation_command(
447         device_address_space_size, compilationMode, bOptions, sourceFilename,
448         outputFilename, clDeviceInfoFilename);
449 
450     // execute script
451     log_info("Executing command: %s\n", runString.c_str());
452     fflush(stdout);
453     int returnCode = system(runString.c_str());
454     if (returnCode != 0)
455     {
456         log_error("ERROR: Command finished with error: 0x%x\n", returnCode);
457         return CL_COMPILE_PROGRAM_FAILURE;
458     }
459 
460     return CL_SUCCESS;
461 }
462 
get_first_device_id(const cl_context context,cl_device_id & device)463 static cl_int get_first_device_id(const cl_context context,
464                                   cl_device_id &device)
465 {
466     cl_uint numDevices = 0;
467     cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
468                                     sizeof(cl_uint), &numDevices, NULL);
469     test_error(error, "clGetContextInfo failed getting CL_CONTEXT_NUM_DEVICES");
470 
471     if (numDevices == 0)
472     {
473         log_error("ERROR: No CL devices found\n");
474         return -1;
475     }
476 
477     std::vector<cl_device_id> devices(numDevices, 0);
478     error =
479         clGetContextInfo(context, CL_CONTEXT_DEVICES,
480                          numDevices * sizeof(cl_device_id), &devices[0], NULL);
481     test_error(error, "clGetContextInfo failed getting CL_CONTEXT_DEVICES");
482 
483     device = devices[0];
484     return CL_SUCCESS;
485 }
486 
get_device_address_bits(const cl_device_id device,cl_uint & device_address_space_size)487 static cl_int get_device_address_bits(const cl_device_id device,
488                                       cl_uint &device_address_space_size)
489 {
490     cl_int error =
491         clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
492                         &device_address_space_size, NULL);
493     test_error(error, "Unable to obtain device address bits");
494 
495     if (device_address_space_size != 32 && device_address_space_size != 64)
496     {
497         log_error("ERROR: Unexpected number of device address bits: %u\n",
498                   device_address_space_size);
499         return -1;
500     }
501 
502     return CL_SUCCESS;
503 }
504 
get_offline_compiler_output(std::ifstream & ifs,const cl_device_id device,cl_uint deviceAddrSpaceSize,const CompilationMode compilationMode,const std::string & bOptions,const std::string & kernelPath,const std::string & kernelNamePrefix)505 static int get_offline_compiler_output(
506     std::ifstream &ifs, const cl_device_id device, cl_uint deviceAddrSpaceSize,
507     const CompilationMode compilationMode, const std::string &bOptions,
508     const std::string &kernelPath, const std::string &kernelNamePrefix)
509 {
510     std::string sourceFilename =
511         get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
512     std::string outputFilename = get_binary_filename_with_path(
513         compilationMode, deviceAddrSpaceSize, kernelPath, kernelNamePrefix);
514 
515     ifs.open(outputFilename.c_str(), std::ios::binary);
516     if (!ifs.good())
517     {
518         std::string file_type =
519             get_offline_compilation_file_type_str(compilationMode);
520         if (gCompilationCacheMode == kCacheModeForceRead)
521         {
522             log_info("OfflineCompiler: can't open cached %s file: %s\n",
523                      file_type.c_str(), outputFilename.c_str());
524             return -1;
525         }
526         else
527         {
528             int error = invoke_offline_compiler(device, deviceAddrSpaceSize,
529                                                 compilationMode, bOptions,
530                                                 sourceFilename, outputFilename);
531             if (error != CL_SUCCESS) return error;
532 
533             // read output file
534             ifs.open(outputFilename.c_str(), std::ios::binary);
535             if (!ifs.good())
536             {
537                 log_info("OfflineCompiler: can't read generated %s file: %s\n",
538                          file_type.c_str(), outputFilename.c_str());
539                 return -1;
540             }
541         }
542     }
543     return CL_SUCCESS;
544 }
545 
create_single_kernel_helper_create_program_offline(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions,CompilationMode compilationMode)546 static int create_single_kernel_helper_create_program_offline(
547     cl_context context, cl_device_id device, cl_program *outProgram,
548     unsigned int numKernelLines, const char *const *kernelProgram,
549     const char *buildOptions, CompilationMode compilationMode)
550 {
551     if (kCacheModeDumpCl == gCompilationCacheMode)
552     {
553         return -1;
554     }
555 
556     // Get device CL_DEVICE_ADDRESS_BITS
557     int error;
558     cl_uint device_address_space_size = 0;
559     if (device == NULL)
560     {
561         error = get_first_device_id(context, device);
562         test_error(error, "Failed to get device ID for first device");
563     }
564     error = get_device_address_bits(device, device_address_space_size);
565     if (error != CL_SUCCESS) return error;
566 
567     // set build options
568     std::string bOptions;
569     bOptions += buildOptions ? std::string(buildOptions) : "";
570 
571     std::string kernelName =
572         get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
573 
574 
575     std::ifstream ifs;
576     error = get_offline_compiler_output(ifs, device, device_address_space_size,
577                                         compilationMode, bOptions,
578                                         gCompilationCachePath, kernelName);
579     if (error != CL_SUCCESS) return error;
580 
581     ifs.seekg(0, ifs.end);
582     int length = ifs.tellg();
583     ifs.seekg(0, ifs.beg);
584 
585     // treat modifiedProgram as input for clCreateProgramWithBinary
586     if (compilationMode == kBinary)
587     {
588         // read binary from file:
589         std::vector<unsigned char> modifiedKernelBuf(length);
590 
591         ifs.read((char *)&modifiedKernelBuf[0], length);
592         ifs.close();
593 
594         size_t lengths = modifiedKernelBuf.size();
595         const unsigned char *binaries = { &modifiedKernelBuf[0] };
596         log_info("offlineCompiler: clCreateProgramWithSource replaced with "
597                  "clCreateProgramWithBinary\n");
598         *outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths,
599                                                 &binaries, NULL, &error);
600         if (*outProgram == NULL || error != CL_SUCCESS)
601         {
602             print_error(error, "clCreateProgramWithBinary failed");
603             return error;
604         }
605     }
606     // treat modifiedProgram as input for clCreateProgramWithIL
607     else if (compilationMode == kSpir_v)
608     {
609         // read spir-v from file:
610         std::vector<unsigned char> modifiedKernelBuf(length);
611 
612         ifs.read((char *)&modifiedKernelBuf[0], length);
613         ifs.close();
614 
615         size_t length = modifiedKernelBuf.size();
616         log_info("offlineCompiler: clCreateProgramWithSource replaced with "
617                  "clCreateProgramWithIL\n");
618         if (gCoreILProgram)
619         {
620             *outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0],
621                                                 length, &error);
622         }
623         else
624         {
625             cl_platform_id platform;
626             error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
627                                     sizeof(cl_platform_id), &platform, NULL);
628             test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
629 
630             clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
631             clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
632                 clGetExtensionFunctionAddressForPlatform(
633                     platform, "clCreateProgramWithILKHR");
634             if (clCreateProgramWithILKHR == NULL)
635             {
636                 log_error(
637                     "ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
638                 return -1;
639             }
640             *outProgram = clCreateProgramWithILKHR(
641                 context, &modifiedKernelBuf[0], length, &error);
642         }
643 
644         if (*outProgram == NULL || error != CL_SUCCESS)
645         {
646             if (gCoreILProgram)
647             {
648                 print_error(error, "clCreateProgramWithIL failed");
649             }
650             else
651             {
652                 print_error(error, "clCreateProgramWithILKHR failed");
653             }
654             return error;
655         }
656     }
657 
658     return CL_SUCCESS;
659 }
660 
create_single_kernel_helper_create_program(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions,CompilationMode compilationMode)661 static int create_single_kernel_helper_create_program(
662     cl_context context, cl_device_id device, cl_program *outProgram,
663     unsigned int numKernelLines, const char **kernelProgram,
664     const char *buildOptions, CompilationMode compilationMode)
665 {
666     std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
667 
668     std::string filePrefix =
669         get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
670     bool shouldSaveToDisk = should_save_kernel_source_to_disk(
671         compilationMode, gCompilationCacheMode, gCompilationCachePath,
672         filePrefix);
673 
674     if (shouldSaveToDisk)
675     {
676         if (CL_SUCCESS
677             != save_kernel_source_and_options_to_disk(
678                 numKernelLines, kernelProgram, buildOptions))
679         {
680             log_error("Unable to dump kernel source to disk");
681             return -1;
682         }
683     }
684     if (compilationMode == kOnline)
685     {
686         int error = CL_SUCCESS;
687 
688         /* Create the program object from source */
689         *outProgram = clCreateProgramWithSource(context, numKernelLines,
690                                                 kernelProgram, NULL, &error);
691         if (*outProgram == NULL || error != CL_SUCCESS)
692         {
693             print_error(error, "clCreateProgramWithSource failed");
694             return error;
695         }
696         return CL_SUCCESS;
697     }
698     else
699     {
700         return create_single_kernel_helper_create_program_offline(
701             context, device, outProgram, numKernelLines, kernelProgram,
702             buildOptions, compilationMode);
703     }
704 }
705 
create_single_kernel_helper_create_program(cl_context context,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions)706 int create_single_kernel_helper_create_program(cl_context context,
707                                                cl_program *outProgram,
708                                                unsigned int numKernelLines,
709                                                const char **kernelProgram,
710                                                const char *buildOptions)
711 {
712     return create_single_kernel_helper_create_program(
713         context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
714         gCompilationMode);
715 }
716 
create_single_kernel_helper_create_program_for_device(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions)717 int create_single_kernel_helper_create_program_for_device(
718     cl_context context, cl_device_id device, cl_program *outProgram,
719     unsigned int numKernelLines, const char **kernelProgram,
720     const char *buildOptions)
721 {
722     return create_single_kernel_helper_create_program(
723         context, device, outProgram, numKernelLines, kernelProgram,
724         buildOptions, gCompilationMode);
725 }
726 
create_single_kernel_helper_with_build_options(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)727 int create_single_kernel_helper_with_build_options(
728     cl_context context, cl_program *outProgram, cl_kernel *outKernel,
729     unsigned int numKernelLines, const char **kernelProgram,
730     const char *kernelName, const char *buildOptions)
731 {
732     return create_single_kernel_helper(context, outProgram, outKernel,
733                                        numKernelLines, kernelProgram,
734                                        kernelName, buildOptions);
735 }
736 
737 // Creates and builds OpenCL C/C++ program, and creates a kernel
create_single_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)738 int create_single_kernel_helper(cl_context context, cl_program *outProgram,
739                                 cl_kernel *outKernel,
740                                 unsigned int numKernelLines,
741                                 const char **kernelProgram,
742                                 const char *kernelName,
743                                 const char *buildOptions)
744 {
745     // For the logic that automatically adds -cl-std it is much cleaner if the
746     // build options have RAII. This buffer will store the potentially updated
747     // build options, in which case buildOptions will point at the string owned
748     // by this buffer.
749     std::string build_options_internal{ buildOptions ? buildOptions : "" };
750 
751     // Check the build options for the -cl-std option.
752     if (!buildOptions || !strstr(buildOptions, "-cl-std"))
753     {
754         // If the build option isn't present add it using the latest OpenCL-C
755         // version supported by the device. This allows calling code to force a
756         // particular CL C version if it is required, but also means that
757         // callers need not specify a version if they want to assume the most
758         // recent CL C.
759 
760         auto version = get_max_OpenCL_C_for_context(context);
761 
762         std::string cl_std{};
763         if (version >= Version(3, 0))
764         {
765             cl_std = "-cl-std=CL3.0";
766         }
767         else if (version >= Version(2, 0) && version < Version(3, 0))
768         {
769             cl_std = "-cl-std=CL2.0";
770         }
771         else
772         {
773             // If the -cl-std build option is not specified, the highest OpenCL
774             // C 1.x language version supported by each device is used when
775             // compiling the program for each device.
776             cl_std = "";
777         }
778         build_options_internal += ' ';
779         build_options_internal += cl_std;
780         buildOptions = build_options_internal.c_str();
781     }
782     int error = create_single_kernel_helper_create_program(
783         context, outProgram, numKernelLines, kernelProgram, buildOptions);
784     if (error != CL_SUCCESS)
785     {
786         log_error("Create program failed: %d, line: %d\n", error, __LINE__);
787         return error;
788     }
789 
790     // Remove offline-compiler-only build options
791     std::string newBuildOptions;
792     if (buildOptions != NULL)
793     {
794         newBuildOptions = buildOptions;
795         std::string offlineCompierOptions[] = {
796             "-cl-fp16-enable", "-cl-fp64-enable", "-cl-zero-init-local-mem-vars"
797         };
798         for (auto &s : offlineCompierOptions)
799         {
800             std::string::size_type i = newBuildOptions.find(s);
801             if (i != std::string::npos) newBuildOptions.erase(i, s.length());
802         }
803     }
804     // Build program and create kernel
805     return build_program_create_kernel_helper(
806         context, outProgram, outKernel, numKernelLines, kernelProgram,
807         kernelName, newBuildOptions.c_str());
808 }
809 
810 // Builds OpenCL C/C++ program and creates
build_program_create_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)811 int build_program_create_kernel_helper(
812     cl_context context, cl_program *outProgram, cl_kernel *outKernel,
813     unsigned int numKernelLines, const char **kernelProgram,
814     const char *kernelName, const char *buildOptions)
815 {
816     int error;
817     /* Compile the program */
818     int buildProgramFailed = 0;
819     int printedSource = 0;
820     error = clBuildProgram(*outProgram, 0, NULL, buildOptions, NULL, NULL);
821     if (error != CL_SUCCESS)
822     {
823         unsigned int i;
824         print_error(error, "clBuildProgram failed");
825         buildProgramFailed = 1;
826         printedSource = 1;
827         log_error("Build options: %s\n", buildOptions);
828         log_error("Original source is: ------------\n");
829         for (i = 0; i < numKernelLines; i++) log_error("%s", kernelProgram[i]);
830     }
831 
832     // Verify the build status on all devices
833     cl_uint deviceCount = 0;
834     error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES,
835                              sizeof(deviceCount), &deviceCount, NULL);
836     if (error != CL_SUCCESS)
837     {
838         print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
839         return error;
840     }
841 
842     if (deviceCount == 0)
843     {
844         log_error("No devices found for program.\n");
845         return -1;
846     }
847 
848     cl_device_id *devices =
849         (cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
850     if (NULL == devices) return -1;
851     BufferOwningPtr<cl_device_id> devicesBuf(devices);
852 
853     memset(devices, 0, deviceCount * sizeof(cl_device_id));
854     error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES,
855                              sizeof(cl_device_id) * deviceCount, devices, NULL);
856     if (error != CL_SUCCESS)
857     {
858         print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
859         return error;
860     }
861 
862     cl_uint z;
863     bool buildFailed = false;
864     for (z = 0; z < deviceCount; z++)
865     {
866         char deviceName[4096] = "";
867         error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName),
868                                 deviceName, NULL);
869         if (error != CL_SUCCESS || deviceName[0] == '\0')
870         {
871             log_error("Device \"%d\" failed to return a name\n", z);
872             print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
873         }
874 
875         cl_build_status buildStatus;
876         error = clGetProgramBuildInfo(*outProgram, devices[z],
877                                       CL_PROGRAM_BUILD_STATUS,
878                                       sizeof(buildStatus), &buildStatus, NULL);
879         if (error != CL_SUCCESS)
880         {
881             print_error(error,
882                         "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
883             return error;
884         }
885 
886         if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed
887             && deviceCount == 1)
888         {
889             buildFailed = true;
890             log_error("clBuildProgram returned an error, but buildStatus is "
891                       "marked as CL_BUILD_SUCCESS.\n");
892         }
893 
894         if (buildStatus != CL_BUILD_SUCCESS)
895         {
896 
897             char statusString[64] = "";
898             if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
899                 sprintf(statusString, "CL_BUILD_SUCCESS");
900             else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
901                 sprintf(statusString, "CL_BUILD_NONE");
902             else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
903                 sprintf(statusString, "CL_BUILD_ERROR");
904             else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
905                 sprintf(statusString, "CL_BUILD_IN_PROGRESS");
906             else
907                 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
908 
909             if (buildStatus != CL_BUILD_SUCCESS)
910                 log_error(
911                     "Build not successful for device \"%s\", status: %s\n",
912                     deviceName, statusString);
913             size_t paramSize = 0;
914             error = clGetProgramBuildInfo(*outProgram, devices[z],
915                                           CL_PROGRAM_BUILD_LOG, 0, NULL,
916                                           ¶mSize);
917             if (error != CL_SUCCESS)
918             {
919 
920                 print_error(
921                     error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
922                 return error;
923             }
924 
925             std::string log;
926             log.resize(paramSize / sizeof(char));
927             error = clGetProgramBuildInfo(*outProgram, devices[z],
928                                           CL_PROGRAM_BUILD_LOG, paramSize,
929                                           &log[0], NULL);
930             if (error != CL_SUCCESS || log[0] == '\0')
931             {
932                 log_error("Device %d (%s) failed to return a build log\n", z,
933                           deviceName);
934                 if (error)
935                 {
936                     print_error(
937                         error,
938                         "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
939                     return error;
940                 }
941                 else
942                 {
943                     log_error("clGetProgramBuildInfo returned an empty log.\n");
944                     return -1;
945                 }
946             }
947             // In this case we've already printed out the code above.
948             if (!printedSource)
949             {
950                 unsigned int i;
951                 log_error("Original source is: ------------\n");
952                 for (i = 0; i < numKernelLines; i++)
953                     log_error("%s", kernelProgram[i]);
954                 printedSource = 1;
955             }
956             log_error("Build log for device \"%s\" is: ------------\n",
957                       deviceName);
958             log_error("%s\n", log.c_str());
959             log_error("\n----------\n");
960             return -1;
961         }
962     }
963 
964     if (buildFailed)
965     {
966         return -1;
967     }
968 
969     /* And create a kernel from it */
970     if (kernelName != NULL)
971     {
972         *outKernel = clCreateKernel(*outProgram, kernelName, &error);
973         if (*outKernel == NULL || error != CL_SUCCESS)
974         {
975             print_error(error, "Unable to create kernel");
976             return error;
977         }
978     }
979 
980     return 0;
981 }
982 
get_max_allowed_work_group_size(cl_context context,cl_kernel kernel,size_t * outMaxSize,size_t * outLimits)983 int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel,
984                                     size_t *outMaxSize, size_t *outLimits)
985 {
986     cl_device_id *devices;
987     size_t size, maxCommonSize = 0;
988     int numDevices, i, j, error;
989     cl_uint numDims;
990     size_t outSize;
991     size_t sizeLimit[] = { 1, 1, 1 };
992 
993 
994     /* Assume fewer than 16 devices will be returned */
995     error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &outSize);
996     test_error(error, "Unable to obtain list of devices size for context");
997     devices = (cl_device_id *)malloc(outSize);
998     BufferOwningPtr<cl_device_id> devicesBuf(devices);
999 
1000     error =
1001         clGetContextInfo(context, CL_CONTEXT_DEVICES, outSize, devices, NULL);
1002     test_error(error, "Unable to obtain list of devices for context");
1003 
1004     numDevices = (int)(outSize / sizeof(cl_device_id));
1005 
1006     for (i = 0; i < numDevices; i++)
1007     {
1008         error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE,
1009                                 sizeof(size), &size, NULL);
1010         test_error(error, "Unable to obtain max work group size for device");
1011         if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
1012 
1013         error = clGetKernelWorkGroupInfo(kernel, devices[i],
1014                                          CL_KERNEL_WORK_GROUP_SIZE,
1015                                          sizeof(size), &size, NULL);
1016         test_error(
1017             error,
1018             "Unable to obtain max work group size for device and kernel combo");
1019         if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
1020 
1021         error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
1022                                 sizeof(numDims), &numDims, NULL);
1023         test_error(
1024             error,
1025             "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
1026         sizeLimit[0] = 1;
1027         error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES,
1028                                 numDims * sizeof(size_t), sizeLimit, NULL);
1029         test_error(error,
1030                    "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
1031 
1032         if (outLimits != NULL)
1033         {
1034             if (i == 0)
1035             {
1036                 for (j = 0; j < 3; j++) outLimits[j] = sizeLimit[j];
1037             }
1038             else
1039             {
1040                 for (j = 0; j < (int)numDims; j++)
1041                 {
1042                     if (sizeLimit[j] < outLimits[j])
1043                         outLimits[j] = sizeLimit[j];
1044                 }
1045             }
1046         }
1047     }
1048 
1049     *outMaxSize = (unsigned int)maxCommonSize;
1050     return 0;
1051 }
1052 
1053 
get_max_allowed_1d_work_group_size_on_device(cl_device_id device,cl_kernel kernel,size_t * outSize)1054 extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device,
1055                                                         cl_kernel kernel,
1056                                                         size_t *outSize)
1057 {
1058     cl_uint maxDim;
1059     size_t maxWgSize;
1060     size_t *maxWgSizePerDim;
1061     int error;
1062 
1063     error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
1064                                      sizeof(size_t), &maxWgSize, NULL);
1065     test_error(error,
1066                "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed");
1067 
1068     error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
1069                             sizeof(cl_uint), &maxDim, NULL);
1070     test_error(error,
1071                "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
1072     maxWgSizePerDim = (size_t *)malloc(maxDim * sizeof(size_t));
1073     if (!maxWgSizePerDim)
1074     {
1075         log_error("Unable to allocate maxWgSizePerDim\n");
1076         return -1;
1077     }
1078 
1079     error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
1080                             maxDim * sizeof(size_t), maxWgSizePerDim, NULL);
1081     if (error != CL_SUCCESS)
1082     {
1083         log_error("clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n");
1084         free(maxWgSizePerDim);
1085         return error;
1086     }
1087 
1088     // "maxWgSize" is limited to that of the first dimension.
1089     if (maxWgSize > maxWgSizePerDim[0])
1090     {
1091         maxWgSize = maxWgSizePerDim[0];
1092     }
1093 
1094     free(maxWgSizePerDim);
1095 
1096     *outSize = maxWgSize;
1097     return 0;
1098 }
1099 
1100 
get_max_common_work_group_size(cl_context context,cl_kernel kernel,size_t globalThreadSize,size_t * outMaxSize)1101 int get_max_common_work_group_size(cl_context context, cl_kernel kernel,
1102                                    size_t globalThreadSize, size_t *outMaxSize)
1103 {
1104     size_t sizeLimit[3];
1105     int error =
1106         get_max_allowed_work_group_size(context, kernel, outMaxSize, sizeLimit);
1107     if (error != 0) return error;
1108 
1109     /* Now find the largest factor of globalThreadSize that is <= maxCommonSize
1110      */
1111     /* Note for speed, we don't need to check the range of maxCommonSize, b/c
1112      once it gets to 1, the modulo test will succeed and break the loop anyway
1113    */
1114     for (;
1115          (globalThreadSize % *outMaxSize) != 0 || (*outMaxSize > sizeLimit[0]);
1116          (*outMaxSize)--)
1117         ;
1118     return 0;
1119 }
1120 
get_max_common_2D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1121 int get_max_common_2D_work_group_size(cl_context context, cl_kernel kernel,
1122                                       size_t *globalThreadSizes,
1123                                       size_t *outMaxSizes)
1124 {
1125     size_t sizeLimit[3];
1126     size_t maxSize;
1127     int error =
1128         get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
1129     if (error != 0) return error;
1130 
1131     /* Now find a set of factors, multiplied together less than maxSize, but
1132        each a factor of the global sizes */
1133 
1134     /* Simple case */
1135     if (globalThreadSizes[0] * globalThreadSizes[1] <= maxSize)
1136     {
1137         if (globalThreadSizes[0] <= sizeLimit[0]
1138             && globalThreadSizes[1] <= sizeLimit[1])
1139         {
1140             outMaxSizes[0] = globalThreadSizes[0];
1141             outMaxSizes[1] = globalThreadSizes[1];
1142             return 0;
1143         }
1144     }
1145 
1146     size_t remainingSize, sizeForThisOne;
1147     remainingSize = maxSize;
1148     int i, j;
1149     for (i = 0; i < 2; i++)
1150     {
1151         if (globalThreadSizes[i] > remainingSize)
1152             sizeForThisOne = remainingSize;
1153         else
1154             sizeForThisOne = globalThreadSizes[i];
1155         for (; (globalThreadSizes[i] % sizeForThisOne) != 0
1156              || (sizeForThisOne > sizeLimit[i]);
1157              sizeForThisOne--)
1158             ;
1159         outMaxSizes[i] = sizeForThisOne;
1160         remainingSize = maxSize;
1161         for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
1162     }
1163 
1164     return 0;
1165 }
1166 
get_max_common_3D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1167 int get_max_common_3D_work_group_size(cl_context context, cl_kernel kernel,
1168                                       size_t *globalThreadSizes,
1169                                       size_t *outMaxSizes)
1170 {
1171     size_t sizeLimit[3];
1172     size_t maxSize;
1173     int error =
1174         get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
1175     if (error != 0) return error;
1176     /* Now find a set of factors, multiplied together less than maxSize, but
1177      each a factor of the global sizes */
1178 
1179     /* Simple case */
1180     if (globalThreadSizes[0] * globalThreadSizes[1] * globalThreadSizes[2]
1181         <= maxSize)
1182     {
1183         if (globalThreadSizes[0] <= sizeLimit[0]
1184             && globalThreadSizes[1] <= sizeLimit[1]
1185             && globalThreadSizes[2] <= sizeLimit[2])
1186         {
1187             outMaxSizes[0] = globalThreadSizes[0];
1188             outMaxSizes[1] = globalThreadSizes[1];
1189             outMaxSizes[2] = globalThreadSizes[2];
1190             return 0;
1191         }
1192     }
1193 
1194     size_t remainingSize, sizeForThisOne;
1195     remainingSize = maxSize;
1196     int i, j;
1197     for (i = 0; i < 3; i++)
1198     {
1199         if (globalThreadSizes[i] > remainingSize)
1200             sizeForThisOne = remainingSize;
1201         else
1202             sizeForThisOne = globalThreadSizes[i];
1203         for (; (globalThreadSizes[i] % sizeForThisOne) != 0
1204              || (sizeForThisOne > sizeLimit[i]);
1205              sizeForThisOne--)
1206             ;
1207         outMaxSizes[i] = sizeForThisOne;
1208         remainingSize = maxSize;
1209         for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
1210     }
1211 
1212     return 0;
1213 }
1214 
1215 /* Helper to determine if a device supports an image format */
is_image_format_supported(cl_context context,cl_mem_flags flags,cl_mem_object_type image_type,const cl_image_format * fmt)1216 int is_image_format_supported(cl_context context, cl_mem_flags flags,
1217                               cl_mem_object_type image_type,
1218                               const cl_image_format *fmt)
1219 {
1220     cl_image_format *list;
1221     cl_uint count = 0;
1222     cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128,
1223                                             NULL, &count);
1224     if (count == 0) return 0;
1225 
1226     list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
1227     if (NULL == list)
1228     {
1229         log_error("Error: unable to allocate %ld byte buffer for image format "
1230                   "list at %s:%d (err = %d)\n",
1231                   count * sizeof(cl_image_format), __FILE__, __LINE__, err);
1232         return 0;
1233     }
1234     BufferOwningPtr<cl_image_format> listBuf(list);
1235 
1236 
1237     cl_int error = clGetSupportedImageFormats(context, flags, image_type, count,
1238                                               list, NULL);
1239     if (error)
1240     {
1241         log_error("Error: failed to obtain supported image type list at %s:%d "
1242                   "(err = %d)\n",
1243                   __FILE__, __LINE__, err);
1244         return 0;
1245     }
1246 
1247     // iterate looking for a match.
1248     cl_uint i;
1249     for (i = 0; i < count; i++)
1250     {
1251         if (fmt->image_channel_data_type == list[i].image_channel_data_type
1252             && fmt->image_channel_order == list[i].image_channel_order)
1253             break;
1254     }
1255 
1256     return (i < count) ? 1 : 0;
1257 }
1258 
1259 size_t get_pixel_bytes(const cl_image_format *fmt);
get_pixel_bytes(const cl_image_format * fmt)1260 size_t get_pixel_bytes(const cl_image_format *fmt)
1261 {
1262     size_t chanCount;
1263     switch (fmt->image_channel_order)
1264     {
1265         case CL_R:
1266         case CL_A:
1267         case CL_Rx:
1268         case CL_INTENSITY:
1269         case CL_LUMINANCE:
1270         case CL_DEPTH: chanCount = 1; break;
1271         case CL_RG:
1272         case CL_RA:
1273         case CL_RGx: chanCount = 2; break;
1274         case CL_RGB:
1275         case CL_RGBx:
1276         case CL_sRGB:
1277         case CL_sRGBx: chanCount = 3; break;
1278         case CL_RGBA:
1279         case CL_ARGB:
1280         case CL_BGRA:
1281         case CL_sBGRA:
1282         case CL_sRGBA:
1283 #ifdef CL_1RGB_APPLE
1284         case CL_1RGB_APPLE:
1285 #endif
1286 #ifdef CL_BGR1_APPLE
1287         case CL_BGR1_APPLE:
1288 #endif
1289             chanCount = 4;
1290             break;
1291         default:
1292             log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__);
1293             abort();
1294             break;
1295     }
1296 
1297     switch (fmt->image_channel_data_type)
1298     {
1299         case CL_UNORM_SHORT_565:
1300         case CL_UNORM_SHORT_555: return 2;
1301 
1302         case CL_UNORM_INT_101010: return 4;
1303 
1304         case CL_SNORM_INT8:
1305         case CL_UNORM_INT8:
1306         case CL_SIGNED_INT8:
1307         case CL_UNSIGNED_INT8: return chanCount;
1308 
1309         case CL_SNORM_INT16:
1310         case CL_UNORM_INT16:
1311         case CL_HALF_FLOAT:
1312         case CL_SIGNED_INT16:
1313         case CL_UNSIGNED_INT16:
1314 #ifdef CL_SFIXED14_APPLE
1315         case CL_SFIXED14_APPLE:
1316 #endif
1317             return chanCount * 2;
1318 
1319         case CL_SIGNED_INT32:
1320         case CL_UNSIGNED_INT32:
1321         case CL_FLOAT: return chanCount * 4;
1322 
1323         default:
1324             log_error("Unknown channel data type at %s:%d!\n", __FILE__,
1325                       __LINE__);
1326             abort();
1327     }
1328 
1329     return 0;
1330 }
1331 
verifyImageSupport(cl_device_id device)1332 test_status verifyImageSupport(cl_device_id device)
1333 {
1334     int result = checkForImageSupport(device);
1335     if (result == 0)
1336     {
1337         return TEST_PASS;
1338     }
1339     if (result == CL_IMAGE_FORMAT_NOT_SUPPORTED)
1340     {
1341         log_error("SKIPPED: Device does not supported images as required by "
1342                   "this test!\n");
1343         return TEST_SKIP;
1344     }
1345     return TEST_FAIL;
1346 }
1347 
checkForImageSupport(cl_device_id device)1348 int checkForImageSupport(cl_device_id device)
1349 {
1350     cl_uint i;
1351     int error;
1352 
1353 
1354     /* Check the device props to see if images are supported at all first */
1355     error =
1356         clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
1357     test_error(error, "Unable to query device for image support");
1358     if (i == 0)
1359     {
1360         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1361     }
1362 
1363     /* So our support is good */
1364     return 0;
1365 }
1366 
checkFor3DImageSupport(cl_device_id device)1367 int checkFor3DImageSupport(cl_device_id device)
1368 {
1369     cl_uint i;
1370     int error;
1371 
1372     /* Check the device props to see if images are supported at all first */
1373     error =
1374         clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
1375     test_error(error, "Unable to query device for image support");
1376     if (i == 0)
1377     {
1378         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1379     }
1380 
1381     char profile[128];
1382     error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile,
1383                             NULL);
1384     test_error(error, "Unable to query device for CL_DEVICE_PROFILE");
1385     if (0 == strcmp(profile, "EMBEDDED_PROFILE"))
1386     {
1387         size_t width = -1L;
1388         size_t height = -1L;
1389         size_t depth = -1L;
1390         error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH,
1391                                 sizeof(width), &width, NULL);
1392         test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH");
1393         error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
1394                                 sizeof(height), &height, NULL);
1395         test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT");
1396         error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH,
1397                                 sizeof(depth), &depth, NULL);
1398         test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH");
1399 
1400         if (0 == (height | width | depth)) return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1401     }
1402 
1403     /* So our support is good */
1404     return 0;
1405 }
1406 
checkForReadWriteImageSupport(cl_device_id device)1407 int checkForReadWriteImageSupport(cl_device_id device)
1408 {
1409     if (checkForImageSupport(device))
1410     {
1411         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1412     }
1413 
1414     auto device_cl_version = get_device_cl_version(device);
1415     if (device_cl_version >= Version(3, 0))
1416     {
1417         // In OpenCL 3.0, Read-Write images are optional.
1418         // Check if they are supported.
1419         cl_uint are_rw_images_supported{};
1420         test_error(
1421             clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
1422                             sizeof(are_rw_images_supported),
1423                             &are_rw_images_supported, nullptr),
1424             "clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
1425         if (0 == are_rw_images_supported)
1426         {
1427             log_info("READ_WRITE_IMAGE tests skipped, not supported.\n");
1428             return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1429         }
1430     }
1431     // READ_WRITE images are not supported on 1.X devices.
1432     else if (device_cl_version < Version(2, 0))
1433     {
1434         log_info("READ_WRITE_IMAGE tests skipped, Opencl 2.0+ is requried.");
1435         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1436     }
1437     // Support for read-write image arguments is required
1438     // for an 2.X device if the device supports images.
1439 
1440     /* So our support is good */
1441     return 0;
1442 }
1443 
get_min_alignment(cl_context context)1444 size_t get_min_alignment(cl_context context)
1445 {
1446     static cl_uint align_size = 0;
1447 
1448     if (0 == align_size)
1449     {
1450         cl_device_id *devices;
1451         size_t devices_size = 0;
1452         cl_uint result = 0;
1453         cl_int error;
1454         int i;
1455 
1456         error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
1457                                  &devices_size);
1458         test_error_ret(error, "clGetContextInfo failed", 0);
1459 
1460         devices = (cl_device_id *)malloc(devices_size);
1461         if (devices == NULL)
1462         {
1463             print_error(error, "malloc failed");
1464             return 0;
1465         }
1466 
1467         error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size,
1468                                  (void *)devices, NULL);
1469         test_error_ret(error, "clGetContextInfo failed", 0);
1470 
1471         for (i = 0; i < (int)(devices_size / sizeof(cl_device_id)); i++)
1472         {
1473             cl_uint alignment = 0;
1474 
1475             error = clGetDeviceInfo(devices[i], CL_DEVICE_MEM_BASE_ADDR_ALIGN,
1476                                     sizeof(cl_uint), (void *)&alignment, NULL);
1477 
1478             if (error == CL_SUCCESS)
1479             {
1480                 alignment >>= 3; // convert bits to bytes
1481                 result = (alignment > result) ? alignment : result;
1482             }
1483             else
1484                 print_error(error, "clGetDeviceInfo failed");
1485         }
1486 
1487         align_size = result;
1488         free(devices);
1489     }
1490 
1491     return align_size;
1492 }
1493 
get_default_rounding_mode(cl_device_id device)1494 cl_device_fp_config get_default_rounding_mode(cl_device_id device)
1495 {
1496     char profileStr[128] = "";
1497     cl_device_fp_config single = 0;
1498     int error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
1499                                 sizeof(single), &single, NULL);
1500     if (error)
1501         test_error_ret(error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG",
1502                        0);
1503 
1504     if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST;
1505 
1506     if (0 == (single & CL_FP_ROUND_TO_ZERO))
1507         test_error_ret(-1,
1508                        "FAILURE: device must support either "
1509                        "CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST",
1510                        0);
1511 
1512     // Make sure we are an embedded device before allowing a pass
1513     if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profileStr),
1514                                  &profileStr, NULL)))
1515         test_error_ret(error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0);
1516 
1517     if (strcmp(profileStr, "EMBEDDED_PROFILE"))
1518         test_error_ret(error,
1519                        "FAILURE: non-EMBEDDED_PROFILE devices must support "
1520                        "CL_FP_ROUND_TO_NEAREST",
1521                        0);
1522 
1523     return CL_FP_ROUND_TO_ZERO;
1524 }
1525 
checkDeviceForQueueSupport(cl_device_id device,cl_command_queue_properties prop)1526 int checkDeviceForQueueSupport(cl_device_id device,
1527                                cl_command_queue_properties prop)
1528 {
1529     cl_command_queue_properties realProps;
1530     cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
1531                                    sizeof(realProps), &realProps, NULL);
1532     test_error_ret(error, "FAILURE: Unable to get device queue properties", 0);
1533 
1534     return (realProps & prop) ? 1 : 0;
1535 }
1536 
printDeviceHeader(cl_device_id device)1537 int printDeviceHeader(cl_device_id device)
1538 {
1539     char deviceName[512], deviceVendor[512], deviceVersion[512],
1540         cLangVersion[512];
1541     int error;
1542 
1543     error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName),
1544                             deviceName, NULL);
1545     test_error(error, "Unable to get CL_DEVICE_NAME for device");
1546 
1547     error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(deviceVendor),
1548                             deviceVendor, NULL);
1549     test_error(error, "Unable to get CL_DEVICE_VENDOR for device");
1550 
1551     error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(deviceVersion),
1552                             deviceVersion, NULL);
1553     test_error(error, "Unable to get CL_DEVICE_VERSION for device");
1554 
1555     error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
1556                             sizeof(cLangVersion), cLangVersion, NULL);
1557     test_error(error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device");
1558 
1559     log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute "
1560              "Device Version = %s%s%s\n",
1561              deviceName, deviceVendor, deviceVersion,
1562              (error == CL_SUCCESS) ? ", CL C Version = " : "",
1563              (error == CL_SUCCESS) ? cLangVersion : "");
1564 
1565     auto version = get_device_cl_version(device);
1566     if (version >= Version(3, 0))
1567     {
1568         auto ctsVersion = get_device_info_string(
1569             device, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED);
1570         log_info("Device latest conformance version passed: %s\n",
1571                  ctsVersion.c_str());
1572     }
1573 
1574     return CL_SUCCESS;
1575 }
1576 
get_device_cl_c_version(cl_device_id device)1577 Version get_device_cl_c_version(cl_device_id device)
1578 {
1579     auto device_cl_version = get_device_cl_version(device);
1580 
1581     // The second special case is OpenCL-1.0 where CL_DEVICE_OPENCL_C_VERSION
1582     // did not exist, but since this is just the first version we can
1583     // return 1.0.
1584     if (device_cl_version == Version{ 1, 0 })
1585     {
1586         return Version{ 1, 0 };
1587     }
1588 
1589     // Otherwise we know we have a 1.1 <= device_version <= 2.0 where all CL C
1590     // versions are backwards compatible, hence querying with the
1591     // CL_DEVICE_OPENCL_C_VERSION query must return the most recent supported
1592     // OpenCL C version.
1593     size_t opencl_c_version_size_in_bytes{};
1594     auto error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr,
1595                                  &opencl_c_version_size_in_bytes);
1596     test_error_ret(error,
1597                    "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
1598                    (Version{ -1, 0 }));
1599 
1600     std::string opencl_c_version(opencl_c_version_size_in_bytes, '\0');
1601     error =
1602         clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
1603                         opencl_c_version.size(), &opencl_c_version[0], nullptr);
1604 
1605     test_error_ret(error,
1606                    "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
1607                    (Version{ -1, 0 }));
1608 
1609     // Scrape out the major, minor pair from the string.
1610     auto major = opencl_c_version[opencl_c_version.find('.') - 1];
1611     auto minor = opencl_c_version[opencl_c_version.find('.') + 1];
1612 
1613     return Version{ major - '0', minor - '0' };
1614 }
1615 
get_device_latest_cl_c_version(cl_device_id device)1616 Version get_device_latest_cl_c_version(cl_device_id device)
1617 {
1618     auto device_cl_version = get_device_cl_version(device);
1619 
1620     // If the device version >= 3.0 it must support the
1621     // CL_DEVICE_OPENCL_C_ALL_VERSIONS query from which we can extract the most
1622     // recent CL C version supported by the device.
1623     if (device_cl_version >= Version{ 3, 0 })
1624     {
1625         size_t opencl_c_all_versions_size_in_bytes{};
1626         auto error =
1627             clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
1628                             &opencl_c_all_versions_size_in_bytes);
1629         test_error_ret(
1630             error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1631             (Version{ -1, 0 }));
1632         std::vector<cl_name_version> name_versions(
1633             opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
1634         error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
1635                                 opencl_c_all_versions_size_in_bytes,
1636                                 name_versions.data(), nullptr);
1637         test_error_ret(
1638             error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1639             (Version{ -1, 0 }));
1640 
1641         Version max_supported_cl_c_version{};
1642         for (const auto &name_version : name_versions)
1643         {
1644             Version current_version{ CL_VERSION_MAJOR(name_version.version),
1645                                      CL_VERSION_MINOR(name_version.version) };
1646             max_supported_cl_c_version =
1647                 (current_version > max_supported_cl_c_version)
1648                 ? current_version
1649                 : max_supported_cl_c_version;
1650         }
1651         return max_supported_cl_c_version;
1652     }
1653 
1654     return get_device_cl_c_version(device);
1655 }
1656 
get_max_OpenCL_C_for_context(cl_context context)1657 Version get_max_OpenCL_C_for_context(cl_context context)
1658 {
1659     // Get all the devices in the context and find the maximum
1660     // universally supported OpenCL C version.
1661     size_t devices_size_in_bytes{};
1662     auto error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr,
1663                                   &devices_size_in_bytes);
1664     test_error_ret(error, "clGetDeviceInfo failed for CL_CONTEXT_DEVICES",
1665                    (Version{ -1, 0 }));
1666     std::vector<cl_device_id> devices(devices_size_in_bytes
1667                                       / sizeof(cl_device_id));
1668     error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size_in_bytes,
1669                              devices.data(), nullptr);
1670     auto current_version = get_device_latest_cl_c_version(devices[0]);
1671     std::for_each(std::next(devices.begin()), devices.end(),
1672                   [¤t_version](cl_device_id device) {
1673                       auto device_version =
1674                           get_device_latest_cl_c_version(device);
1675                       // OpenCL 3.0 is not backwards compatible with 2.0.
1676                       // If we have 3.0 and 2.0 in the same driver we
1677                       // use 1.2.
1678                       if (((device_version >= Version(2, 0)
1679                             && device_version < Version(3, 0))
1680                            && current_version >= Version(3, 0))
1681                           || (device_version >= Version(3, 0)
1682                               && (current_version >= Version(2, 0)
1683                                   && current_version < Version(3, 0))))
1684                       {
1685                           current_version = Version(1, 2);
1686                       }
1687                       else
1688                       {
1689                           current_version =
1690                               (std::min)(device_version, current_version);
1691                       }
1692                   });
1693     return current_version;
1694 }
1695 
device_supports_cl_c_version(cl_device_id device,Version version)1696 bool device_supports_cl_c_version(cl_device_id device, Version version)
1697 {
1698     auto device_cl_version = get_device_cl_version(device);
1699 
1700     // In general, a device does not support an OpenCL C version if it is <=
1701     // CL_DEVICE_OPENCL_C_VERSION AND it does not appear in the
1702     // CL_DEVICE_OPENCL_C_ALL_VERSIONS query.
1703 
1704     // If the device version >= 3.0 it must support the
1705     // CL_DEVICE_OPENCL_C_ALL_VERSIONS query, and the version of OpenCL C being
1706     // used must appear in the query result if it's <=
1707     // CL_DEVICE_OPENCL_C_VERSION.
1708     if (device_cl_version >= Version{ 3, 0 })
1709     {
1710         size_t opencl_c_all_versions_size_in_bytes{};
1711         auto error =
1712             clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
1713                             &opencl_c_all_versions_size_in_bytes);
1714         test_error_ret(
1715             error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1716             (false));
1717         std::vector<cl_name_version> name_versions(
1718             opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
1719         error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
1720                                 opencl_c_all_versions_size_in_bytes,
1721                                 name_versions.data(), nullptr);
1722         test_error_ret(
1723             error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
1724             (false));
1725 
1726         for (const auto &name_version : name_versions)
1727         {
1728             Version current_version{ CL_VERSION_MAJOR(name_version.version),
1729                                      CL_VERSION_MINOR(name_version.version) };
1730             if (current_version == version)
1731             {
1732                 return true;
1733             }
1734         }
1735     }
1736 
1737     return version <= get_device_cl_c_version(device);
1738 }
1739 
poll_until(unsigned timeout_ms,unsigned interval_ms,std::function<bool ()> fn)1740 bool poll_until(unsigned timeout_ms, unsigned interval_ms,
1741                 std::function<bool()> fn)
1742 {
1743     unsigned time_spent_ms = 0;
1744     bool ret = false;
1745 
1746     while (time_spent_ms < timeout_ms)
1747     {
1748         ret = fn();
1749         if (ret)
1750         {
1751             break;
1752         }
1753         usleep(interval_ms * 1000);
1754         time_spent_ms += interval_ms;
1755     }
1756 
1757     return ret;
1758 }
1759 
device_supports_double(cl_device_id device)1760 bool device_supports_double(cl_device_id device)
1761 {
1762     if (is_extension_available(device, "cl_khr_fp64"))
1763     {
1764         return true;
1765     }
1766     else
1767     {
1768         cl_device_fp_config double_fp_config;
1769         cl_int err = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG,
1770                                      sizeof(double_fp_config),
1771                                      &double_fp_config, nullptr);
1772         test_error(err,
1773                    "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
1774         return double_fp_config != 0;
1775     }
1776 }
1777 
device_supports_half(cl_device_id device)1778 bool device_supports_half(cl_device_id device)
1779 {
1780     return is_extension_available(device, "cl_khr_fp16");
1781 }
1782