• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "crc32.h"
17 #include "kernelHelpers.h"
18 #include "deviceInfo.h"
19 #include "errorHelpers.h"
20 #include "imageHelpers.h"
21 #include "typeWrappers.h"
22 #include "testHarness.h"
23 #include "parseParameters.h"
24 
25 #include <cassert>
26 #include <vector>
27 #include <string>
28 #include <fstream>
29 #include <sstream>
30 #include <iomanip>
31 
32 #if defined(_WIN32)
33 std::string slash = "\\";
34 #else
35 std::string slash = "/";
36 #endif
37 
38 static cl_int get_first_device_id(const cl_context context, cl_device_id &device);
39 
get_file_size(const std::string & fileName)40 long get_file_size(const std::string &fileName)
41 {
42     std::ifstream ifs(fileName.c_str(), std::ios::binary);
43     if (!ifs.good())
44         return 0;
45     // get length of file:
46     ifs.seekg(0, std::ios::end);
47     std::ios::pos_type length = ifs.tellg();
48     return static_cast<long>(length);
49 }
50 
get_kernel_content(unsigned int numKernelLines,const char * const * kernelProgram)51 static std::string get_kernel_content(unsigned int numKernelLines, const char *const *kernelProgram)
52 {
53     std::string kernel;
54     for (size_t i = 0; i < numKernelLines; ++i)
55     {
56         std::string chunk(kernelProgram[i], 0, std::string::npos);
57         kernel += chunk;
58     }
59 
60     return kernel;
61 }
62 
get_kernel_name(const std::string & source)63 std::string get_kernel_name(const std::string &source)
64 {
65     // Create list of kernel names
66     std::string kernelsList;
67     size_t kPos = source.find("kernel");
68     while (kPos != std::string::npos)
69     {
70         // check for '__kernel'
71         size_t pos = kPos;
72         if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_')
73             pos -= 2;
74 
75         //check character before 'kernel' (white space expected)
76         size_t wsPos = source.find_last_of(" \t\r\n", pos);
77         if (wsPos == std::string::npos || wsPos + 1 == pos)
78         {
79             //check character after 'kernel' (white space expected)
80             size_t akPos = kPos + sizeof("kernel") - 1;
81             wsPos = source.find_first_of(" \t\r\n", akPos);
82             if (!(wsPos == akPos))
83             {
84                 kPos = source.find("kernel", kPos + 1);
85                 continue;
86             }
87 
88             bool attributeFound;
89             do
90             {
91                 attributeFound = false;
92                 // find '(' after kernel name name
93                 size_t pPos = source.find("(", akPos);
94                 if (!(pPos != std::string::npos))
95                     continue;
96 
97                 // check for not empty kernel name before '('
98                 pos = source.find_last_not_of(" \t\r\n", pPos - 1);
99                 if (!(pos != std::string::npos && pos > akPos))
100                     continue;
101 
102                 //find character before kernel name
103                 wsPos = source.find_last_of(" \t\r\n", pos);
104                 if (!(wsPos != std::string::npos && wsPos >= akPos))
105                     continue;
106 
107                 std::string name = source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
108                 //check for kernel attribute
109                 if (name == "__attribute__")
110                 {
111                     attributeFound = true;
112                     int pCount = 1;
113                     akPos = pPos + 1;
114                     while (pCount > 0 && akPos != std::string::npos)
115                     {
116                         akPos = source.find_first_of("()", akPos + 1);
117                         if (akPos != std::string::npos)
118                         {
119                             if (source[akPos] == '(')
120                                 pCount++;
121                             else
122                                 pCount--;
123                         }
124                     }
125                 }
126                 else
127                 {
128                     kernelsList += name + ".";
129                 }
130             } while (attributeFound);
131         }
132         kPos = source.find("kernel", kPos + 1);
133     }
134     std::ostringstream oss;
135     if (MAX_LEN_FOR_KERNEL_LIST > 0)
136     {
137         if (kernelsList.size() > MAX_LEN_FOR_KERNEL_LIST + 1)
138         {
139             kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
140             kernelsList[kernelsList.size() - 1] = '.';
141             kernelsList[kernelsList.size() - 1] = '.';
142         }
143         oss << kernelsList;
144     }
145     return oss.str();
146 }
147 
get_offline_compilation_file_type_str(const CompilationMode compilationMode)148 static std::string get_offline_compilation_file_type_str(const CompilationMode compilationMode)
149 {
150     switch (compilationMode)
151     {
152         default:
153             assert(0 && "Invalid compilation mode");
154             abort();
155         case kOnline:
156             assert(0 && "Invalid compilation mode for offline compilation");
157             abort();
158         case kBinary:
159             return "binary";
160         case kSpir_v:
161             return "SPIR-V";
162     }
163 }
164 
get_unique_filename_prefix(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)165 static std::string get_unique_filename_prefix(unsigned int numKernelLines,
166                                               const char *const *kernelProgram,
167                                               const char *buildOptions)
168 {
169     std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
170     std::string kernelName = get_kernel_name(kernel);
171     cl_uint kernelCrc = crc32(kernel.data(), kernel.size());
172     std::ostringstream oss;
173     oss << kernelName <<  std::hex << std::setfill('0') << std::setw(8) << kernelCrc;
174     if(buildOptions) {
175         cl_uint bOptionsCrc = crc32(buildOptions, strlen(buildOptions));
176         oss << '.' << std::hex << std::setfill('0') << std::setw(8) << bOptionsCrc;
177     }
178     return oss.str();
179 }
180 
181 
182 static std::string
get_cl_build_options_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)183 get_cl_build_options_filename_with_path(const std::string& filePath,
184                                         const std::string& fileNamePrefix) {
185     return filePath + slash + fileNamePrefix + ".options";
186 }
187 
188 static std::string
get_cl_source_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)189 get_cl_source_filename_with_path(const std::string& filePath,
190                                  const std::string& fileNamePrefix) {
191     return filePath + slash + fileNamePrefix + ".cl";
192 }
193 
194 static std::string
get_binary_filename_with_path(CompilationMode mode,cl_uint deviceAddrSpaceSize,const std::string & filePath,const std::string & fileNamePrefix)195 get_binary_filename_with_path(CompilationMode mode,
196                               cl_uint deviceAddrSpaceSize,
197                               const std::string& filePath,
198                               const std::string& fileNamePrefix) {
199     std::string binaryFilename = filePath + slash + fileNamePrefix;
200     if(kSpir_v == mode) {
201         std::ostringstream extension;
202         extension << ".spv" << deviceAddrSpaceSize;
203         binaryFilename += extension.str();
204     }
205     return binaryFilename;
206 }
207 
file_exist_on_disk(const std::string & filePath,const std::string & fileName)208 static bool file_exist_on_disk(const std::string& filePath,
209                                const std::string& fileName) {
210     std::string fileNameWithPath = filePath + slash + fileName;
211     bool exist = false;
212     std::ifstream ifs;
213 
214     ifs.open(fileNameWithPath.c_str(), std::ios::binary);
215     if(ifs.good())
216         exist = true;
217     ifs.close();
218     return exist;
219 }
220 
should_save_kernel_source_to_disk(CompilationMode mode,CompilationCacheMode cacheMode,const std::string & binaryPath,const std::string & binaryName)221 static bool should_save_kernel_source_to_disk(CompilationMode mode,
222                                               CompilationCacheMode cacheMode,
223                                               const std::string& binaryPath,
224                                               const std::string& binaryName)
225 {
226     bool saveToDisk = false;
227     if(cacheMode == kCacheModeDumpCl ||
228        (cacheMode == kCacheModeOverwrite && mode != kOnline)) {
229         saveToDisk = true;
230     }
231     if(cacheMode == kCacheModeCompileIfAbsent && mode != kOnline) {
232         saveToDisk = !file_exist_on_disk(binaryPath, binaryName);
233     }
234     return saveToDisk;
235 }
236 
save_kernel_build_options_to_disk(const std::string & path,const std::string & prefix,const char * buildOptions)237 static int save_kernel_build_options_to_disk(const std::string& path,
238                                              const std::string& prefix,
239                                              const char *buildOptions) {
240     std::string filename = get_cl_build_options_filename_with_path(path, prefix);
241     std::ofstream ofs(filename.c_str(), std::ios::binary);
242     if (!ofs.good())
243     {
244         log_info("Can't save kernel build options: %s\n", filename.c_str());
245         return -1;
246     }
247     ofs.write(buildOptions, strlen(buildOptions));
248     ofs.close();
249     log_info("Saved kernel build options to file: %s\n", filename.c_str());
250     return CL_SUCCESS;
251 }
252 
save_kernel_source_to_disk(const std::string & path,const std::string & prefix,const std::string & source)253 static int save_kernel_source_to_disk(const std::string& path,
254                                       const std::string& prefix,
255                                       const std::string& source) {
256     std::string filename = get_cl_source_filename_with_path(path, prefix);
257     std::ofstream ofs(filename.c_str(), std::ios::binary);
258     if (!ofs.good())
259     {
260         log_info("Can't save kernel source: %s\n", filename.c_str());
261         return -1;
262     }
263     ofs.write(source.c_str(), source.size());
264     ofs.close();
265     log_info("Saved kernel source to file: %s\n", filename.c_str());
266     return CL_SUCCESS;
267 }
268 
save_kernel_source_and_options_to_disk(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)269 static int save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
270                                                   const char *const *kernelProgram,
271                                                   const char *buildOptions)
272 {
273     int error;
274 
275     std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
276     std::string kernelNamePrefix = get_unique_filename_prefix(numKernelLines,
277                                                              kernelProgram,
278                                                              buildOptions);
279 
280     // save kernel source to disk
281     error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix, kernel);
282 
283     // save kernel build options to disk if exists
284     if (buildOptions != NULL)
285         error |= save_kernel_build_options_to_disk(gCompilationCachePath, kernelNamePrefix, buildOptions);
286 
287     return error;
288 }
289 
get_compilation_mode_str(const CompilationMode compilationMode)290 static std::string get_compilation_mode_str(const CompilationMode compilationMode)
291 {
292     switch (compilationMode)
293     {
294         default:
295             assert(0 && "Invalid compilation mode");
296             abort();
297         case kOnline:
298             return "online";
299         case kBinary:
300             return "binary";
301         case kSpir_v:
302             return "spir-v";
303     }
304 }
305 
306 #ifdef KHRONOS_OFFLINE_COMPILER
get_khronos_compiler_command(const cl_uint device_address_space_size,const bool openclCXX,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename)307 static std::string get_khronos_compiler_command(const cl_uint device_address_space_size,
308                                                 const bool openclCXX,
309                                                 const std::string &bOptions,
310                                                 const std::string &sourceFilename,
311                                                 const std::string &outputFilename)
312 {
313     // Set compiler options
314     // Emit SPIR-V
315     std::string compilerOptions = " -cc1 -emit-spirv";
316     // <triple>: for 32 bit SPIR-V use spir-unknown-unknown, for 64 bit SPIR-V use spir64-unknown-unknown.
317     if(device_address_space_size == 32)
318     {
319         compilerOptions += " -triple=spir-unknown-unknown";
320     }
321     else
322     {
323         compilerOptions += " -triple=spir64-unknown-unknown";
324     }
325     // Set OpenCL C++ flag required by SPIR-V-ready clang (compiler provided by Khronos)
326     if(openclCXX)
327     {
328         compilerOptions = compilerOptions + " -cl-std=c++";
329     }
330     // Set correct includes
331     if(openclCXX)
332     {
333         compilerOptions += " -I ";
334         compilerOptions += STRINGIFY_VALUE(CL_LIBCLCXX_DIR);
335     }
336     else
337     {
338         compilerOptions += " -include opencl.h";
339     }
340 
341 #ifdef KHRONOS_OFFLINE_COMPILER_OPTIONS
342     compilerOptions += STRINGIFY_VALUE(KHRONOS_OFFLINE_COMPILER_OPTIONS);
343 #endif
344 
345     // Add build options passed to this function
346     compilerOptions += " " + bOptions;
347     compilerOptions +=
348         " " + sourceFilename +
349         " -o " + outputFilename;
350     std::string runString = STRINGIFY_VALUE(KHRONOS_OFFLINE_COMPILER) + compilerOptions;
351 
352     return runString;
353 }
354 #endif // KHRONOS_OFFLINE_COMPILER
355 
get_cl_device_info_str(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfo)356 static cl_int get_cl_device_info_str(const cl_device_id device, const cl_uint device_address_space_size,
357                                      const CompilationMode compilationMode, std::string &clDeviceInfo)
358 {
359     std::string extensionsString = get_device_extensions_string(device);
360     std::string versionString = get_device_version_string(device);
361 
362     std::ostringstream clDeviceInfoStream;
363     std::string file_type = get_offline_compilation_file_type_str(compilationMode);
364     clDeviceInfoStream << "# OpenCL device info affecting " << file_type << " offline compilation:" << std::endl
365                     << "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size << std::endl
366                     << "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\"" << std::endl;
367     /* We only need the device's supported IL version(s) when compiling IL
368     * that will be loaded with clCreateProgramWithIL() */
369     if (compilationMode == kSpir_v)
370     {
371         std::string ilVersionString = get_device_il_version_string(device);
372         clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString << "\"" << std::endl;
373     }
374     clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\"" << std::endl;
375 
376     clDeviceInfo = clDeviceInfoStream.str();
377 
378     return CL_SUCCESS;
379 }
380 
write_cl_device_info(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfoFilename)381 static int write_cl_device_info(const cl_device_id device, const cl_uint device_address_space_size,
382                                 const CompilationMode compilationMode, std::string &clDeviceInfoFilename)
383 {
384     std::string clDeviceInfo;
385     int error = get_cl_device_info_str(device, device_address_space_size, compilationMode, clDeviceInfo);
386     if (error != CL_SUCCESS)
387     {
388         return error;
389     }
390 
391     cl_uint crc = crc32(clDeviceInfo.data(), clDeviceInfo.size());
392 
393     /* Get the filename for the clDeviceInfo file.
394      * Note: the file includes the hash on its content, so it is usually unnecessary to delete it. */
395     std::ostringstream clDeviceInfoFilenameStream;
396     clDeviceInfoFilenameStream << gCompilationCachePath << slash << "clDeviceInfo-";
397     clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8) << crc << ".txt";
398 
399     clDeviceInfoFilename = clDeviceInfoFilenameStream.str();
400 
401     if ((size_t) get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
402     {
403         /* The CL device info file has already been created.
404          * Nothing to do. */
405         return 0;
406     }
407 
408     /* The file does not exist or its length is not as expected.  Create/overwrite it. */
409     std::ofstream ofs(clDeviceInfoFilename);
410     if (!ofs.good())
411     {
412         log_info("OfflineCompiler: can't create CL device info file: %s\n", clDeviceInfoFilename.c_str());
413         return -1;
414     }
415     ofs << clDeviceInfo;
416     ofs.close();
417 
418     return CL_SUCCESS;
419 }
420 
get_offline_compilation_command(const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename,const std::string & clDeviceInfoFilename)421 static std::string get_offline_compilation_command(const cl_uint device_address_space_size,
422                                                    const CompilationMode compilationMode,
423                                                    const std::string &bOptions,
424                                                    const std::string &sourceFilename,
425                                                    const std::string &outputFilename,
426                                                    const std::string &clDeviceInfoFilename)
427 {
428     std::ostringstream wrapperOptions;
429 
430     wrapperOptions << gCompilationProgram
431                    << " --mode=" << get_compilation_mode_str(compilationMode)
432                    << " --source=" << sourceFilename
433                    << " --output=" << outputFilename
434                    << " --cl-device-info=" << clDeviceInfoFilename;
435 
436     if (bOptions != "")
437     {
438         // Add build options passed to this function
439         wrapperOptions << " -- " << bOptions;
440     }
441 
442     return wrapperOptions.str();
443 }
444 
invoke_offline_compiler(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename,const bool openclCXX)445 static int invoke_offline_compiler(const cl_device_id device,
446                                    const cl_uint device_address_space_size,
447                                    const CompilationMode compilationMode,
448                                    const std::string &bOptions,
449                                    const std::string &sourceFilename,
450                                    const std::string &outputFilename,
451                                    const bool openclCXX)
452 {
453     std::string runString;
454     if (openclCXX)
455     {
456 #ifndef KHRONOS_OFFLINE_COMPILER
457         log_error("CL C++ compilation is not possible: KHRONOS_OFFLINE_COMPILER was not defined.\n");
458         return CL_INVALID_OPERATION;
459 #else
460         if (compilationMode != kSpir_v)
461         {
462             log_error("Compilation mode must be SPIR-V for Khronos compiler");
463             return -1;
464         }
465         runString = get_khronos_compiler_command(device_address_space_size, openclCXX, bOptions,
466                                                  sourceFilename, outputFilename);
467 #endif
468     }
469     else
470     {
471         std::string clDeviceInfoFilename;
472 
473         // See cl_offline_compiler-interface.txt for a description of the
474         // format of the CL device information file generated below, and
475         // the internal command line interface for invoking the offline
476         // compiler.
477 
478         cl_int err = write_cl_device_info(device, device_address_space_size, compilationMode,
479                                           clDeviceInfoFilename);
480         if (err != CL_SUCCESS)
481         {
482             log_error("Failed writing CL device info file\n");
483             return err;
484         }
485 
486         runString = get_offline_compilation_command(device_address_space_size, compilationMode, bOptions,
487                                                     sourceFilename, outputFilename, clDeviceInfoFilename);
488     }
489 
490     // execute script
491     log_info("Executing command: %s\n", runString.c_str());
492     fflush(stdout);
493     int returnCode = system(runString.c_str());
494     if (returnCode != 0)
495     {
496         log_error("ERROR: Command finished with error: 0x%x\n", returnCode);
497         return CL_COMPILE_PROGRAM_FAILURE;
498     }
499 
500     return CL_SUCCESS;
501 }
502 
get_first_device_id(const cl_context context,cl_device_id & device)503 static cl_int get_first_device_id(const cl_context context, cl_device_id &device)
504 {
505     cl_uint numDevices = 0;
506     cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, NULL);
507     test_error(error, "clGetContextInfo failed getting CL_CONTEXT_NUM_DEVICES");
508 
509     if (numDevices == 0)
510     {
511         log_error("ERROR: No CL devices found\n");
512         return -1;
513     }
514 
515     std::vector<cl_device_id> devices(numDevices, 0);
516     error = clGetContextInfo(context, CL_CONTEXT_DEVICES, numDevices*sizeof(cl_device_id), &devices[0], NULL);
517     test_error(error, "clGetContextInfo failed getting CL_CONTEXT_DEVICES");
518 
519     device = devices[0];
520     return CL_SUCCESS;
521 }
522 
get_device_address_bits(const cl_device_id device,cl_uint & device_address_space_size)523 static cl_int get_device_address_bits(const cl_device_id device, cl_uint &device_address_space_size)
524 {
525     cl_int error = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint), &device_address_space_size, NULL);
526     test_error(error, "Unable to obtain device address bits");
527 
528     if (device_address_space_size != 32 && device_address_space_size != 64)
529     {
530         log_error("ERROR: Unexpected number of device address bits: %u\n", device_address_space_size);
531         return -1;
532     }
533 
534     return CL_SUCCESS;
535 }
536 
get_offline_compiler_output(std::ifstream & ifs,const cl_device_id device,cl_uint deviceAddrSpaceSize,const bool openclCXX,const CompilationMode compilationMode,const std::string & bOptions,const std::string & kernelPath,const std::string & kernelNamePrefix)537 static int get_offline_compiler_output(std::ifstream &ifs,
538                                        const cl_device_id device,
539                                        cl_uint deviceAddrSpaceSize,
540                                        const bool openclCXX,
541                                        const CompilationMode compilationMode,
542                                        const std::string &bOptions,
543                                        const std::string &kernelPath,
544                                        const std::string &kernelNamePrefix)
545 {
546     std::string sourceFilename = get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
547     std::string outputFilename = get_binary_filename_with_path(compilationMode,
548                                                                deviceAddrSpaceSize,
549                                                                kernelPath,
550                                                                kernelNamePrefix);
551 
552     ifs.open(outputFilename.c_str(), std::ios::binary);
553     if(!ifs.good()) {
554        std::string file_type = get_offline_compilation_file_type_str(compilationMode);
555         if (gCompilationCacheMode == kCacheModeForceRead) {
556             log_info("OfflineCompiler: can't open cached %s file: %s\n",
557                      file_type.c_str(), outputFilename.c_str());
558             return -1;
559         }
560         else {
561             int error = invoke_offline_compiler(device, deviceAddrSpaceSize, compilationMode,
562                                                 bOptions, sourceFilename, outputFilename, openclCXX);
563             if (error != CL_SUCCESS)
564                 return error;
565 
566             // read output file
567             ifs.open(outputFilename.c_str(), std::ios::binary);
568             if (!ifs.good())
569             {
570                 log_info("OfflineCompiler: can't read generated %s file: %s\n",
571                          file_type.c_str(), outputFilename.c_str());
572                 return -1;
573             }
574        }
575     }
576     return CL_SUCCESS;
577 }
578 
create_single_kernel_helper_create_program_offline(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions,const bool openclCXX,CompilationMode compilationMode)579 static int create_single_kernel_helper_create_program_offline(cl_context context,
580                                                               cl_device_id device,
581                                                               cl_program *outProgram,
582                                                               unsigned int numKernelLines,
583                                                               const char *const *kernelProgram,
584                                                               const char *buildOptions,
585                                                               const bool openclCXX,
586                                                               CompilationMode compilationMode)
587 {
588     if(kCacheModeDumpCl == gCompilationCacheMode) {
589         return -1;
590     }
591 
592     // Get device CL_DEVICE_ADDRESS_BITS
593     int error;
594     cl_uint device_address_space_size = 0;
595     if (device == NULL)
596     {
597         error = get_first_device_id(context, device);
598         test_error(error, "Failed to get device ID for first device");
599     }
600     error = get_device_address_bits(device, device_address_space_size);
601     if (error != CL_SUCCESS)
602         return error;
603 
604     // set build options
605     std::string bOptions;
606     bOptions += buildOptions ? std::string(buildOptions) : "";
607 
608     std::string kernelName = get_unique_filename_prefix(numKernelLines,
609                                                         kernelProgram,
610                                                         buildOptions);
611 
612 
613 
614     std::ifstream ifs;
615     error = get_offline_compiler_output(ifs, device, device_address_space_size, openclCXX, compilationMode, bOptions, gCompilationCachePath, kernelName);
616     if (error != CL_SUCCESS)
617       return error;
618 
619     // -----------------------------------------------------------------------------------
620     // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
621     // -----------------------------------------------------------------------------------
622     // Only OpenCL C++ to SPIR-V compilation
623     #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
624     if(openclCXX)
625     {
626         return CL_SUCCESS;
627     }
628     #endif
629 
630     ifs.seekg(0, ifs.end);
631     int length = ifs.tellg();
632     ifs.seekg(0, ifs.beg);
633 
634     //treat modifiedProgram as input for clCreateProgramWithBinary
635     if (compilationMode == kBinary)
636     {
637         // read binary from file:
638         std::vector<unsigned char> modifiedKernelBuf(length);
639 
640         ifs.read((char *)&modifiedKernelBuf[0], length);
641         ifs.close();
642 
643         size_t lengths = modifiedKernelBuf.size();
644         const unsigned char *binaries = { &modifiedKernelBuf[0] };
645         log_info("offlineCompiler: clCreateProgramWithSource replaced with clCreateProgramWithBinary\n");
646         *outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths, &binaries, NULL, &error);
647         if (*outProgram == NULL || error != CL_SUCCESS)
648         {
649             print_error(error, "clCreateProgramWithBinary failed");
650             return error;
651         }
652     }
653     //treat modifiedProgram as input for clCreateProgramWithIL
654     else if (compilationMode == kSpir_v)
655     {
656         // read spir-v from file:
657         std::vector<unsigned char> modifiedKernelBuf(length);
658 
659         ifs.read((char *)&modifiedKernelBuf[0], length);
660         ifs.close();
661 
662         size_t length = modifiedKernelBuf.size();
663         log_info("offlineCompiler: clCreateProgramWithSource replaced with clCreateProgramWithIL\n");
664 
665         *outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0], length, &error);
666         if (*outProgram == NULL || error != CL_SUCCESS)
667         {
668             print_error(error, "clCreateProgramWithIL failed");
669             return error;
670         }
671     }
672 
673     return CL_SUCCESS;
674 }
675 
create_single_kernel_helper_create_program(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions,const bool openclCXX,CompilationMode compilationMode)676 static int create_single_kernel_helper_create_program(cl_context context,
677                                                       cl_device_id device,
678                                                       cl_program *outProgram,
679                                                       unsigned int numKernelLines,
680                                                       const char **kernelProgram,
681                                                       const char *buildOptions,
682                                                       const bool openclCXX,
683                                                       CompilationMode compilationMode)
684 {
685     std::string filePrefix = get_unique_filename_prefix(numKernelLines,
686                                                         kernelProgram,
687                                                         buildOptions);
688     bool shouldSaveToDisk = should_save_kernel_source_to_disk(compilationMode,
689                                                               gCompilationCacheMode,
690                                                               gCompilationCachePath,
691                                                               filePrefix);
692 
693     if(shouldSaveToDisk)
694     {
695         if(CL_SUCCESS != save_kernel_source_and_options_to_disk(numKernelLines, kernelProgram, buildOptions))
696         {
697             log_error("Unable to dump kernel source to disk");
698             return -1;
699         }
700     }
701     if (compilationMode == kOnline)
702     {
703         int error = CL_SUCCESS;
704 
705         /* Create the program object from source */
706         *outProgram = clCreateProgramWithSource(context, numKernelLines, kernelProgram, NULL, &error);
707         if (*outProgram == NULL || error != CL_SUCCESS)
708         {
709             print_error(error, "clCreateProgramWithSource failed");
710             return error;
711         }
712         return CL_SUCCESS;
713     }
714     else
715     {
716         return create_single_kernel_helper_create_program_offline(context, device, outProgram,
717                                                                   numKernelLines, kernelProgram,
718                                                                   buildOptions, openclCXX,
719                                                                   compilationMode);
720     }
721 }
722 
create_single_kernel_helper_create_program(cl_context context,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions,const bool openclCXX)723 int create_single_kernel_helper_create_program(cl_context context,
724                                                cl_program *outProgram,
725                                                unsigned int numKernelLines,
726                                                const char **kernelProgram,
727                                                const char *buildOptions,
728                                                const bool openclCXX)
729 {
730     return create_single_kernel_helper_create_program(context, NULL, outProgram,
731                                                       numKernelLines, kernelProgram,
732                                                       buildOptions, openclCXX,
733                                                       gCompilationMode);
734 }
735 
create_single_kernel_helper_create_program_for_device(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions,const bool openclCXX)736 int create_single_kernel_helper_create_program_for_device(cl_context context,
737                                                           cl_device_id device,
738                                                           cl_program *outProgram,
739                                                           unsigned int numKernelLines,
740                                                           const char **kernelProgram,
741                                                           const char *buildOptions,
742                                                           const bool openclCXX)
743 {
744     return create_single_kernel_helper_create_program(context, device, outProgram,
745                                                       numKernelLines, kernelProgram,
746                                                       buildOptions, openclCXX,
747                                                       gCompilationMode);
748 }
749 
create_single_kernel_helper_with_build_options(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions,const bool openclCXX)750 int create_single_kernel_helper_with_build_options(cl_context context,
751                                                    cl_program *outProgram,
752                                                    cl_kernel *outKernel,
753                                                    unsigned int numKernelLines,
754                                                    const char **kernelProgram,
755                                                    const char *kernelName,
756                                                    const char *buildOptions,
757                                                    const bool openclCXX)
758 {
759     return create_single_kernel_helper(context, outProgram, outKernel, numKernelLines, kernelProgram, kernelName, buildOptions, openclCXX);
760 }
761 
762 // Creates and builds OpenCL C/C++ program, and creates a kernel
create_single_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions,const bool openclCXX)763 int create_single_kernel_helper(cl_context context,
764                                 cl_program *outProgram,
765                                 cl_kernel *outKernel,
766                                 unsigned int numKernelLines,
767                                 const char **kernelProgram,
768                                 const char *kernelName,
769                                 const char *buildOptions,
770                                 const bool openclCXX)
771 {
772     int error;
773     // Create OpenCL C++ program
774     if(openclCXX)
775     {
776     // -----------------------------------------------------------------------------------
777     // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
778     // -----------------------------------------------------------------------------------
779     // Only OpenCL C++ to SPIR-V compilation
780     #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
781         // Save global variable
782         bool tempgCompilationCacheMode = gCompilationCacheMode;
783         // Force OpenCL C++ -> SPIR-V compilation on every run
784         gCompilationCacheMode = kCacheModeOverwrite;
785     #endif
786         error = create_openclcpp_program(
787             context, outProgram, numKernelLines, kernelProgram, buildOptions
788         );
789         if (error != CL_SUCCESS)
790         {
791             log_error("Create program failed: %d, line: %d\n", error, __LINE__);
792             return error;
793         }
794     // -----------------------------------------------------------------------------------
795     // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
796     // -----------------------------------------------------------------------------------
797     #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
798         // Restore global variables
799         gCompilationCacheMode = tempgCompilationCacheMode;
800         log_info("WARNING: KERNEL %s WAS ONLY COMPILED TO SPIR-V\n", kernelName);
801         return error;
802     #endif
803     }
804     // Create OpenCL C program
805     else
806     {
807         error = create_single_kernel_helper_create_program(
808             context, outProgram, numKernelLines, kernelProgram, buildOptions
809         );
810         if (error != CL_SUCCESS)
811         {
812             log_error("Create program failed: %d, line: %d\n", error, __LINE__);
813             return error;
814         }
815     }
816     // Remove offline-compiler-only build options
817     std::string newBuildOptions;
818     if (buildOptions != NULL)
819     {
820         newBuildOptions = buildOptions;
821         std::string offlineCompierOptions[] = {
822             "-cl-fp16-enable",
823             "-cl-fp64-enable",
824             "-cl-zero-init-local-mem-vars"
825         };
826         for(auto& s : offlineCompierOptions)
827         {
828             std::string::size_type i = newBuildOptions.find(s);
829             if (i != std::string::npos)
830                 newBuildOptions.erase(i, s.length());
831         }
832     }
833     // Build program and create kernel
834     return build_program_create_kernel_helper(
835         context, outProgram, outKernel, numKernelLines, kernelProgram, kernelName, newBuildOptions.c_str()
836     );
837 }
838 
839 // Creates OpenCL C++ program
create_openclcpp_program(cl_context context,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions)840 int create_openclcpp_program(cl_context context,
841                              cl_program *outProgram,
842                              unsigned int numKernelLines,
843                              const char **kernelProgram,
844                              const char *buildOptions)
845 {
846     // Create program
847     return create_single_kernel_helper_create_program(
848         context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions, true, kSpir_v
849     );
850 }
851 
852 // Builds OpenCL C/C++ program and creates
build_program_create_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)853 int build_program_create_kernel_helper(cl_context context,
854                                        cl_program *outProgram,
855                                        cl_kernel *outKernel,
856                                        unsigned int numKernelLines,
857                                        const char **kernelProgram,
858                                        const char *kernelName,
859                                        const char *buildOptions)
860 {
861     int error;
862     /* Compile the program */
863     int buildProgramFailed = 0;
864     int printedSource = 0;
865     error = clBuildProgram(*outProgram, 0, NULL, buildOptions, NULL, NULL);
866     if (error != CL_SUCCESS)
867     {
868         unsigned int i;
869         print_error(error, "clBuildProgram failed");
870         buildProgramFailed = 1;
871         printedSource = 1;
872         log_error("Build options: %s\n", buildOptions);
873         log_error("Original source is: ------------\n");
874         for (i = 0; i < numKernelLines; i++)
875             log_error("%s", kernelProgram[i]);
876     }
877 
878     // Verify the build status on all devices
879     cl_uint deviceCount = 0;
880     error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES, sizeof(deviceCount), &deviceCount, NULL);
881     if (error != CL_SUCCESS)
882     {
883         print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
884         return error;
885     }
886 
887     if (deviceCount == 0)
888     {
889         log_error("No devices found for program.\n");
890         return -1;
891     }
892 
893     cl_device_id *devices = (cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
894     if (NULL == devices)
895         return -1;
896     BufferOwningPtr<cl_device_id> devicesBuf(devices);
897 
898     memset(devices, 0, deviceCount * sizeof(cl_device_id));
899     error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * deviceCount, devices, NULL);
900     if (error != CL_SUCCESS)
901     {
902         print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
903         return error;
904     }
905 
906     cl_uint z;
907     bool buildFailed = false;
908     for (z = 0; z < deviceCount; z++)
909     {
910         char deviceName[4096] = "";
911         error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
912         if (error != CL_SUCCESS || deviceName[0] == '\0')
913         {
914             log_error("Device \"%d\" failed to return a name\n", z);
915             print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
916         }
917 
918         cl_build_status buildStatus;
919         error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
920         if (error != CL_SUCCESS)
921         {
922             print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
923             return error;
924         }
925 
926         if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed && deviceCount == 1)
927         {
928             buildFailed = true;
929             log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
930         }
931 
932         if (buildStatus != CL_BUILD_SUCCESS)
933         {
934 
935             char statusString[64] = "";
936             if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
937                 sprintf(statusString, "CL_BUILD_SUCCESS");
938             else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
939                 sprintf(statusString, "CL_BUILD_NONE");
940             else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
941                 sprintf(statusString, "CL_BUILD_ERROR");
942             else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
943                 sprintf(statusString, "CL_BUILD_IN_PROGRESS");
944             else
945                 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
946 
947             if (buildStatus != CL_BUILD_SUCCESS)
948                 log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
949             size_t paramSize = 0;
950             error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_LOG, 0, NULL, &paramSize);
951             if (error != CL_SUCCESS)
952             {
953 
954                 print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
955                 return error;
956             }
957 
958             std::string log;
959             log.resize(paramSize / sizeof(char));
960             error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL);
961             if (error != CL_SUCCESS || log[0] == '\0')
962             {
963                 log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
964                 if (error)
965                 {
966                     print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
967                     return error;
968                 }
969                 else
970                 {
971                     log_error("clGetProgramBuildInfo returned an empty log.\n");
972                     return -1;
973                 }
974             }
975             // In this case we've already printed out the code above.
976             if (!printedSource)
977             {
978                 unsigned int i;
979                 log_error("Original source is: ------------\n");
980                 for (i = 0; i < numKernelLines; i++)
981                     log_error("%s", kernelProgram[i]);
982                 printedSource = 1;
983             }
984             log_error("Build log for device \"%s\" is: ------------\n", deviceName);
985             log_error("%s\n", log.c_str());
986             log_error("\n----------\n");
987             return -1;
988         }
989     }
990 
991     if (buildFailed)
992     {
993         return -1;
994     }
995 
996     /* And create a kernel from it */
997     if (kernelName != NULL)
998     {
999         *outKernel = clCreateKernel(*outProgram, kernelName, &error);
1000         if (*outKernel == NULL || error != CL_SUCCESS)
1001         {
1002             print_error(error, "Unable to create kernel");
1003             return error;
1004         }
1005     }
1006 
1007     return 0;
1008 }
1009 
get_max_allowed_work_group_size(cl_context context,cl_kernel kernel,size_t * outMaxSize,size_t * outLimits)1010 int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outMaxSize, size_t *outLimits )
1011 {
1012     cl_device_id *devices;
1013     size_t size, maxCommonSize = 0;
1014     int numDevices, i, j, error;
1015   cl_uint numDims;
1016     size_t outSize;
1017   size_t sizeLimit[]={1,1,1};
1018 
1019 
1020     /* Assume fewer than 16 devices will be returned */
1021   error = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &outSize );
1022   test_error( error, "Unable to obtain list of devices size for context" );
1023   devices = (cl_device_id *)malloc(outSize);
1024   BufferOwningPtr<cl_device_id> devicesBuf(devices);
1025 
1026   error = clGetContextInfo( context, CL_CONTEXT_DEVICES, outSize, devices, NULL );
1027   test_error( error, "Unable to obtain list of devices for context" );
1028 
1029     numDevices = (int)( outSize / sizeof( cl_device_id ) );
1030 
1031     for( i = 0; i < numDevices; i++ )
1032     {
1033         error = clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
1034         test_error( error, "Unable to obtain max work group size for device" );
1035         if( size < maxCommonSize || maxCommonSize == 0)
1036             maxCommonSize = size;
1037 
1038         error = clGetKernelWorkGroupInfo( kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
1039         test_error( error, "Unable to obtain max work group size for device and kernel combo" );
1040         if( size < maxCommonSize  || maxCommonSize == 0)
1041             maxCommonSize = size;
1042 
1043     error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( numDims ), &numDims, NULL);
1044     test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
1045     sizeLimit[0] = 1;
1046     error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, numDims*sizeof(size_t), sizeLimit, NULL);
1047         test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
1048 
1049         if (outLimits != NULL)
1050         {
1051       if (i == 0) {
1052         for (j=0; j<3; j++)
1053           outLimits[j] = sizeLimit[j];
1054       } else {
1055         for (j=0; j<(int)numDims; j++) {
1056           if (sizeLimit[j] < outLimits[j])
1057             outLimits[j] = sizeLimit[j];
1058         }
1059       }
1060     }
1061     }
1062 
1063     *outMaxSize = (unsigned int)maxCommonSize;
1064     return 0;
1065 }
1066 
1067 
get_max_allowed_1d_work_group_size_on_device(cl_device_id device,cl_kernel kernel,size_t * outSize)1068 extern int get_max_allowed_1d_work_group_size_on_device( cl_device_id device, cl_kernel kernel, size_t *outSize )
1069 {
1070     cl_uint      maxDim;
1071     size_t       maxWgSize;
1072     size_t       *maxWgSizePerDim;
1073     int          error;
1074 
1075     error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( size_t ), &maxWgSize, NULL );
1076     test_error( error, "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed" );
1077 
1078     error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( cl_uint ), &maxDim, NULL );
1079     test_error( error, "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed" );
1080     maxWgSizePerDim = (size_t*)malloc( maxDim * sizeof( size_t ) );
1081     if( !maxWgSizePerDim )
1082     {
1083         log_error( "Unable to allocate maxWgSizePerDim\n" );
1084         return -1;
1085     }
1086 
1087     error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_SIZES, maxDim * sizeof( size_t ), maxWgSizePerDim, NULL );
1088     if( error != CL_SUCCESS)
1089     {
1090         log_error( "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n" );
1091         free( maxWgSizePerDim );
1092         return error;
1093     }
1094 
1095     // "maxWgSize" is limited to that of the first dimension.
1096     if( maxWgSize > maxWgSizePerDim[0] )
1097     {
1098         maxWgSize = maxWgSizePerDim[0];
1099     }
1100 
1101     free( maxWgSizePerDim );
1102 
1103     *outSize = maxWgSize;
1104     return 0;
1105 }
1106 
1107 
get_max_common_work_group_size(cl_context context,cl_kernel kernel,size_t globalThreadSize,size_t * outMaxSize)1108 int get_max_common_work_group_size( cl_context context, cl_kernel kernel,
1109                                    size_t globalThreadSize, size_t *outMaxSize )
1110 {
1111   size_t sizeLimit[3];
1112     int error = get_max_allowed_work_group_size( context, kernel, outMaxSize, sizeLimit );
1113     if( error != 0 )
1114         return error;
1115 
1116     /* Now find the largest factor of globalThreadSize that is <= maxCommonSize */
1117     /* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1,
1118      the modulo test will succeed and break the loop anyway */
1119     for( ; ( globalThreadSize % *outMaxSize ) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)-- )
1120         ;
1121     return 0;
1122 }
1123 
get_max_common_2D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1124 int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel,
1125                                    size_t *globalThreadSizes, size_t *outMaxSizes )
1126 {
1127   size_t sizeLimit[3];
1128     size_t maxSize;
1129     int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
1130     if( error != 0 )
1131         return error;
1132 
1133     /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
1134        sizes */
1135 
1136     /* Simple case */
1137     if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] <= maxSize )
1138     {
1139     if (globalThreadSizes[ 0 ] <= sizeLimit[0] &&  globalThreadSizes[ 1 ] <= sizeLimit[1]) {
1140       outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
1141       outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
1142       return 0;
1143     }
1144     }
1145 
1146   size_t remainingSize, sizeForThisOne;
1147   remainingSize = maxSize;
1148   int i, j;
1149   for (i=0 ; i<2; i++) {
1150     if (globalThreadSizes[i] > remainingSize)
1151       sizeForThisOne = remainingSize;
1152     else
1153       sizeForThisOne = globalThreadSizes[i];
1154     for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
1155     outMaxSizes[i] = sizeForThisOne;
1156     remainingSize = maxSize;
1157     for (j=0; j<=i; j++)
1158       remainingSize /=outMaxSizes[j];
1159   }
1160 
1161     return 0;
1162 }
1163 
get_max_common_3D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1164 int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel,
1165                                       size_t *globalThreadSizes, size_t *outMaxSizes )
1166 {
1167   size_t sizeLimit[3];
1168     size_t maxSize;
1169     int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
1170     if( error != 0 )
1171         return error;
1172     /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
1173      sizes */
1174 
1175     /* Simple case */
1176     if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] * globalThreadSizes[ 2 ] <= maxSize )
1177     {
1178     if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1] && globalThreadSizes[ 2 ] <= sizeLimit[2]) {
1179       outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
1180       outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
1181       outMaxSizes[ 2 ] = globalThreadSizes[ 2 ];
1182       return 0;
1183     }
1184     }
1185 
1186   size_t remainingSize, sizeForThisOne;
1187   remainingSize = maxSize;
1188   int i, j;
1189   for (i=0 ; i<3; i++) {
1190     if (globalThreadSizes[i] > remainingSize)
1191       sizeForThisOne = remainingSize;
1192     else
1193       sizeForThisOne = globalThreadSizes[i];
1194     for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
1195     outMaxSizes[i] = sizeForThisOne;
1196     remainingSize = maxSize;
1197     for (j=0; j<=i; j++)
1198       remainingSize /=outMaxSizes[j];
1199   }
1200 
1201     return 0;
1202 }
1203 
1204 /* Helper to determine if a device supports an image format */
is_image_format_supported(cl_context context,cl_mem_flags flags,cl_mem_object_type image_type,const cl_image_format * fmt)1205 int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt )
1206 {
1207     cl_image_format *list;
1208     cl_uint count = 0;
1209     cl_int err = clGetSupportedImageFormats( context, flags, image_type, 128, NULL, &count );
1210     if( count == 0 )
1211         return 0;
1212 
1213     list = (cl_image_format*) malloc( count * sizeof( cl_image_format ) );
1214     if( NULL == list )
1215     {
1216         log_error( "Error: unable to allocate %ld byte buffer for image format list at %s:%d (err = %d)\n", count * sizeof( cl_image_format ), __FILE__, __LINE__,  err );
1217         return 0;
1218     }
1219     BufferOwningPtr<cl_image_format> listBuf(list);
1220 
1221 
1222     cl_int error = clGetSupportedImageFormats( context, flags, image_type, count, list, NULL );
1223     if( error )
1224     {
1225         log_error( "Error: failed to obtain supported image type list at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
1226         return 0;
1227     }
1228 
1229     // iterate looking for a match.
1230     cl_uint i;
1231     for( i = 0; i < count; i++ )
1232     {
1233         if( fmt->image_channel_data_type == list[ i ].image_channel_data_type &&
1234             fmt->image_channel_order == list[ i ].image_channel_order )
1235             break;
1236     }
1237 
1238     return ( i < count ) ? 1 : 0;
1239 }
1240 
1241 size_t get_pixel_bytes( const cl_image_format *fmt );
get_pixel_bytes(const cl_image_format * fmt)1242 size_t get_pixel_bytes( const cl_image_format *fmt )
1243 {
1244     size_t chanCount;
1245     switch( fmt->image_channel_order )
1246     {
1247         case CL_R:
1248         case CL_A:
1249         case CL_Rx:
1250         case CL_INTENSITY:
1251         case CL_LUMINANCE:
1252         case CL_DEPTH:
1253             chanCount = 1;
1254             break;
1255         case CL_RG:
1256         case CL_RA:
1257         case CL_RGx:
1258             chanCount = 2;
1259             break;
1260         case CL_RGB:
1261         case CL_RGBx:
1262         case CL_sRGB:
1263         case CL_sRGBx:
1264             chanCount = 3;
1265             break;
1266         case CL_RGBA:
1267         case CL_ARGB:
1268         case CL_BGRA:
1269         case CL_sBGRA:
1270         case CL_sRGBA:
1271 #ifdef CL_1RGB_APPLE
1272         case CL_1RGB_APPLE:
1273 #endif
1274 #ifdef CL_BGR1_APPLE
1275         case CL_BGR1_APPLE:
1276 #endif
1277             chanCount = 4;
1278             break;
1279         default:
1280             log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__ );
1281             abort();
1282             break;
1283     }
1284 
1285     switch( fmt->image_channel_data_type )
1286     {
1287           case CL_UNORM_SHORT_565:
1288           case CL_UNORM_SHORT_555:
1289             return 2;
1290 
1291           case CL_UNORM_INT_101010:
1292             return 4;
1293 
1294           case CL_SNORM_INT8:
1295           case CL_UNORM_INT8:
1296           case CL_SIGNED_INT8:
1297           case CL_UNSIGNED_INT8:
1298             return chanCount;
1299 
1300           case CL_SNORM_INT16:
1301           case CL_UNORM_INT16:
1302           case CL_HALF_FLOAT:
1303           case CL_SIGNED_INT16:
1304           case CL_UNSIGNED_INT16:
1305 #ifdef CL_SFIXED14_APPLE
1306           case CL_SFIXED14_APPLE:
1307 #endif
1308             return chanCount * 2;
1309 
1310           case CL_SIGNED_INT32:
1311           case CL_UNSIGNED_INT32:
1312           case CL_FLOAT:
1313             return chanCount * 4;
1314 
1315         default:
1316             log_error("Unknown channel data type at %s:%d!\n", __FILE__, __LINE__ );
1317             abort();
1318     }
1319 
1320     return 0;
1321 }
1322 
verifyImageSupport(cl_device_id device)1323 test_status verifyImageSupport( cl_device_id device )
1324 {
1325     int result = checkForImageSupport( device );
1326     if( result == 0 )
1327     {
1328         return TEST_PASS;
1329     }
1330     if( result == CL_IMAGE_FORMAT_NOT_SUPPORTED )
1331     {
1332         log_error( "SKIPPED: Device does not supported images as required by this test!\n" );
1333         return TEST_SKIP;
1334     }
1335     return TEST_FAIL;
1336 }
1337 
checkForImageSupport(cl_device_id device)1338 int checkForImageSupport( cl_device_id device )
1339 {
1340     cl_uint i;
1341     int error;
1342 
1343 
1344     /* Check the device props to see if images are supported at all first */
1345     error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
1346     test_error( error, "Unable to query device for image support" );
1347     if( i == 0 )
1348     {
1349         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1350     }
1351 
1352     /* So our support is good */
1353     return 0;
1354 }
1355 
checkFor3DImageSupport(cl_device_id device)1356 int checkFor3DImageSupport( cl_device_id device )
1357 {
1358     cl_uint i;
1359     int error;
1360 
1361     /* Check the device props to see if images are supported at all first */
1362     error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
1363     test_error( error, "Unable to query device for image support" );
1364     if( i == 0 )
1365     {
1366         return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1367     }
1368 
1369     char profile[128];
1370     error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile ), profile, NULL );
1371     test_error( error, "Unable to query device for CL_DEVICE_PROFILE" );
1372     if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ) )
1373     {
1374         size_t width = -1L;
1375         size_t height = -1L;
1376         size_t depth = -1L;
1377         error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(width), &width, NULL );
1378         test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH" );
1379         error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(height), &height, NULL );
1380         test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT" );
1381         error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth), &depth, NULL );
1382         test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH" );
1383 
1384         if( 0 == (height | width | depth ))
1385             return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1386     }
1387 
1388     /* So our support is good */
1389     return 0;
1390 }
1391 
get_min_alignment(cl_context context)1392 size_t get_min_alignment(cl_context context)
1393 {
1394     static cl_uint align_size = 0;
1395 
1396     if( 0 == align_size )
1397     {
1398         cl_device_id * devices;
1399         size_t devices_size = 0;
1400         cl_uint result = 0;
1401         cl_int error;
1402         int i;
1403 
1404         error = clGetContextInfo (context,
1405                                   CL_CONTEXT_DEVICES,
1406                                   0,
1407                                   NULL,
1408                                   &devices_size);
1409         test_error_ret(error, "clGetContextInfo failed", 0);
1410 
1411         devices = (cl_device_id*)malloc(devices_size);
1412         if (devices == NULL) {
1413             print_error( error, "malloc failed" );
1414             return 0;
1415         }
1416 
1417         error = clGetContextInfo (context,
1418                                   CL_CONTEXT_DEVICES,
1419                                   devices_size,
1420                                   (void*)devices,
1421                                   NULL);
1422         test_error_ret(error, "clGetContextInfo failed", 0);
1423 
1424         for (i = 0; i < (int)(devices_size/sizeof(cl_device_id)); i++)
1425         {
1426             cl_uint alignment = 0;
1427 
1428             error = clGetDeviceInfo (devices[i],
1429                                      CL_DEVICE_MEM_BASE_ADDR_ALIGN,
1430                                      sizeof(cl_uint),
1431                                      (void*)&alignment,
1432                                      NULL);
1433 
1434             if (error == CL_SUCCESS)
1435             {
1436                 alignment >>= 3;    // convert bits to bytes
1437                 result = (alignment > result) ? alignment : result;
1438             }
1439             else
1440                 print_error( error, "clGetDeviceInfo failed" );
1441         }
1442 
1443         align_size = result;
1444         free(devices);
1445     }
1446 
1447     return align_size;
1448 }
1449 
get_default_rounding_mode(cl_device_id device)1450 cl_device_fp_config get_default_rounding_mode( cl_device_id device )
1451 {
1452     char profileStr[128] = "";
1453     cl_device_fp_config single = 0;
1454     int error = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
1455     if( error )
1456         test_error_ret( error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG", 0 );
1457 
1458     if( single & CL_FP_ROUND_TO_NEAREST )
1459         return CL_FP_ROUND_TO_NEAREST;
1460 
1461     if( 0 == (single & CL_FP_ROUND_TO_ZERO) )
1462         test_error_ret( -1, "FAILURE: device must support either CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST", 0 );
1463 
1464     // Make sure we are an embedded device before allowing a pass
1465     if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL ) ))
1466         test_error_ret( error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0 );
1467 
1468     if( strcmp( profileStr, "EMBEDDED_PROFILE" ) )
1469         test_error_ret( error, "FAILURE: non-EMBEDDED_PROFILE devices must support CL_FP_ROUND_TO_NEAREST", 0 );
1470 
1471     return CL_FP_ROUND_TO_ZERO;
1472 }
1473 
checkDeviceForQueueSupport(cl_device_id device,cl_command_queue_properties prop)1474 int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop )
1475 {
1476     cl_command_queue_properties realProps;
1477     cl_int error = clGetDeviceInfo( device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof( realProps ), &realProps, NULL );
1478     test_error_ret( error, "FAILURE: Unable to get device queue properties", 0 );
1479 
1480     return ( realProps & prop ) ? 1 : 0;
1481 }
1482 
printDeviceHeader(cl_device_id device)1483 int printDeviceHeader( cl_device_id device )
1484 {
1485     char deviceName[ 512 ], deviceVendor[ 512 ], deviceVersion[ 512 ], cLangVersion[ 512 ];
1486     int error;
1487 
1488     error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
1489     test_error( error, "Unable to get CL_DEVICE_NAME for device" );
1490 
1491     error = clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
1492     test_error( error, "Unable to get CL_DEVICE_VENDOR for device" );
1493 
1494     error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
1495     test_error( error, "Unable to get CL_DEVICE_VERSION for device" );
1496 
1497     error = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof( cLangVersion ), cLangVersion, NULL );
1498     test_error( error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device" );
1499 
1500     log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute Device Version = %s%s%s\n",
1501              deviceName, deviceVendor, deviceVersion, ( error == CL_SUCCESS ) ? ", CL C Version = " : "",
1502              ( error == CL_SUCCESS ) ? cLangVersion : "" );
1503 
1504     return CL_SUCCESS;
1505 }
1506