1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "crc32.h"
17 #include "kernelHelpers.h"
18 #include "deviceInfo.h"
19 #include "errorHelpers.h"
20 #include "imageHelpers.h"
21 #include "typeWrappers.h"
22 #include "testHarness.h"
23 #include "parseParameters.h"
24
25 #include <cassert>
26 #include <vector>
27 #include <string>
28 #include <fstream>
29 #include <sstream>
30 #include <iomanip>
31
32 #if defined(_WIN32)
33 std::string slash = "\\";
34 #else
35 std::string slash = "/";
36 #endif
37
38 static cl_int get_first_device_id(const cl_context context, cl_device_id &device);
39
get_file_size(const std::string & fileName)40 long get_file_size(const std::string &fileName)
41 {
42 std::ifstream ifs(fileName.c_str(), std::ios::binary);
43 if (!ifs.good())
44 return 0;
45 // get length of file:
46 ifs.seekg(0, std::ios::end);
47 std::ios::pos_type length = ifs.tellg();
48 return static_cast<long>(length);
49 }
50
get_kernel_content(unsigned int numKernelLines,const char * const * kernelProgram)51 static std::string get_kernel_content(unsigned int numKernelLines, const char *const *kernelProgram)
52 {
53 std::string kernel;
54 for (size_t i = 0; i < numKernelLines; ++i)
55 {
56 std::string chunk(kernelProgram[i], 0, std::string::npos);
57 kernel += chunk;
58 }
59
60 return kernel;
61 }
62
get_kernel_name(const std::string & source)63 std::string get_kernel_name(const std::string &source)
64 {
65 // Create list of kernel names
66 std::string kernelsList;
67 size_t kPos = source.find("kernel");
68 while (kPos != std::string::npos)
69 {
70 // check for '__kernel'
71 size_t pos = kPos;
72 if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_')
73 pos -= 2;
74
75 //check character before 'kernel' (white space expected)
76 size_t wsPos = source.find_last_of(" \t\r\n", pos);
77 if (wsPos == std::string::npos || wsPos + 1 == pos)
78 {
79 //check character after 'kernel' (white space expected)
80 size_t akPos = kPos + sizeof("kernel") - 1;
81 wsPos = source.find_first_of(" \t\r\n", akPos);
82 if (!(wsPos == akPos))
83 {
84 kPos = source.find("kernel", kPos + 1);
85 continue;
86 }
87
88 bool attributeFound;
89 do
90 {
91 attributeFound = false;
92 // find '(' after kernel name name
93 size_t pPos = source.find("(", akPos);
94 if (!(pPos != std::string::npos))
95 continue;
96
97 // check for not empty kernel name before '('
98 pos = source.find_last_not_of(" \t\r\n", pPos - 1);
99 if (!(pos != std::string::npos && pos > akPos))
100 continue;
101
102 //find character before kernel name
103 wsPos = source.find_last_of(" \t\r\n", pos);
104 if (!(wsPos != std::string::npos && wsPos >= akPos))
105 continue;
106
107 std::string name = source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
108 //check for kernel attribute
109 if (name == "__attribute__")
110 {
111 attributeFound = true;
112 int pCount = 1;
113 akPos = pPos + 1;
114 while (pCount > 0 && akPos != std::string::npos)
115 {
116 akPos = source.find_first_of("()", akPos + 1);
117 if (akPos != std::string::npos)
118 {
119 if (source[akPos] == '(')
120 pCount++;
121 else
122 pCount--;
123 }
124 }
125 }
126 else
127 {
128 kernelsList += name + ".";
129 }
130 } while (attributeFound);
131 }
132 kPos = source.find("kernel", kPos + 1);
133 }
134 std::ostringstream oss;
135 if (MAX_LEN_FOR_KERNEL_LIST > 0)
136 {
137 if (kernelsList.size() > MAX_LEN_FOR_KERNEL_LIST + 1)
138 {
139 kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
140 kernelsList[kernelsList.size() - 1] = '.';
141 kernelsList[kernelsList.size() - 1] = '.';
142 }
143 oss << kernelsList;
144 }
145 return oss.str();
146 }
147
get_offline_compilation_file_type_str(const CompilationMode compilationMode)148 static std::string get_offline_compilation_file_type_str(const CompilationMode compilationMode)
149 {
150 switch (compilationMode)
151 {
152 default:
153 assert(0 && "Invalid compilation mode");
154 abort();
155 case kOnline:
156 assert(0 && "Invalid compilation mode for offline compilation");
157 abort();
158 case kBinary:
159 return "binary";
160 case kSpir_v:
161 return "SPIR-V";
162 }
163 }
164
get_unique_filename_prefix(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)165 static std::string get_unique_filename_prefix(unsigned int numKernelLines,
166 const char *const *kernelProgram,
167 const char *buildOptions)
168 {
169 std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
170 std::string kernelName = get_kernel_name(kernel);
171 cl_uint kernelCrc = crc32(kernel.data(), kernel.size());
172 std::ostringstream oss;
173 oss << kernelName << std::hex << std::setfill('0') << std::setw(8) << kernelCrc;
174 if(buildOptions) {
175 cl_uint bOptionsCrc = crc32(buildOptions, strlen(buildOptions));
176 oss << '.' << std::hex << std::setfill('0') << std::setw(8) << bOptionsCrc;
177 }
178 return oss.str();
179 }
180
181
182 static std::string
get_cl_build_options_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)183 get_cl_build_options_filename_with_path(const std::string& filePath,
184 const std::string& fileNamePrefix) {
185 return filePath + slash + fileNamePrefix + ".options";
186 }
187
188 static std::string
get_cl_source_filename_with_path(const std::string & filePath,const std::string & fileNamePrefix)189 get_cl_source_filename_with_path(const std::string& filePath,
190 const std::string& fileNamePrefix) {
191 return filePath + slash + fileNamePrefix + ".cl";
192 }
193
194 static std::string
get_binary_filename_with_path(CompilationMode mode,cl_uint deviceAddrSpaceSize,const std::string & filePath,const std::string & fileNamePrefix)195 get_binary_filename_with_path(CompilationMode mode,
196 cl_uint deviceAddrSpaceSize,
197 const std::string& filePath,
198 const std::string& fileNamePrefix) {
199 std::string binaryFilename = filePath + slash + fileNamePrefix;
200 if(kSpir_v == mode) {
201 std::ostringstream extension;
202 extension << ".spv" << deviceAddrSpaceSize;
203 binaryFilename += extension.str();
204 }
205 return binaryFilename;
206 }
207
file_exist_on_disk(const std::string & filePath,const std::string & fileName)208 static bool file_exist_on_disk(const std::string& filePath,
209 const std::string& fileName) {
210 std::string fileNameWithPath = filePath + slash + fileName;
211 bool exist = false;
212 std::ifstream ifs;
213
214 ifs.open(fileNameWithPath.c_str(), std::ios::binary);
215 if(ifs.good())
216 exist = true;
217 ifs.close();
218 return exist;
219 }
220
should_save_kernel_source_to_disk(CompilationMode mode,CompilationCacheMode cacheMode,const std::string & binaryPath,const std::string & binaryName)221 static bool should_save_kernel_source_to_disk(CompilationMode mode,
222 CompilationCacheMode cacheMode,
223 const std::string& binaryPath,
224 const std::string& binaryName)
225 {
226 bool saveToDisk = false;
227 if(cacheMode == kCacheModeDumpCl ||
228 (cacheMode == kCacheModeOverwrite && mode != kOnline)) {
229 saveToDisk = true;
230 }
231 if(cacheMode == kCacheModeCompileIfAbsent && mode != kOnline) {
232 saveToDisk = !file_exist_on_disk(binaryPath, binaryName);
233 }
234 return saveToDisk;
235 }
236
save_kernel_build_options_to_disk(const std::string & path,const std::string & prefix,const char * buildOptions)237 static int save_kernel_build_options_to_disk(const std::string& path,
238 const std::string& prefix,
239 const char *buildOptions) {
240 std::string filename = get_cl_build_options_filename_with_path(path, prefix);
241 std::ofstream ofs(filename.c_str(), std::ios::binary);
242 if (!ofs.good())
243 {
244 log_info("Can't save kernel build options: %s\n", filename.c_str());
245 return -1;
246 }
247 ofs.write(buildOptions, strlen(buildOptions));
248 ofs.close();
249 log_info("Saved kernel build options to file: %s\n", filename.c_str());
250 return CL_SUCCESS;
251 }
252
save_kernel_source_to_disk(const std::string & path,const std::string & prefix,const std::string & source)253 static int save_kernel_source_to_disk(const std::string& path,
254 const std::string& prefix,
255 const std::string& source) {
256 std::string filename = get_cl_source_filename_with_path(path, prefix);
257 std::ofstream ofs(filename.c_str(), std::ios::binary);
258 if (!ofs.good())
259 {
260 log_info("Can't save kernel source: %s\n", filename.c_str());
261 return -1;
262 }
263 ofs.write(source.c_str(), source.size());
264 ofs.close();
265 log_info("Saved kernel source to file: %s\n", filename.c_str());
266 return CL_SUCCESS;
267 }
268
save_kernel_source_and_options_to_disk(unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions)269 static int save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
270 const char *const *kernelProgram,
271 const char *buildOptions)
272 {
273 int error;
274
275 std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
276 std::string kernelNamePrefix = get_unique_filename_prefix(numKernelLines,
277 kernelProgram,
278 buildOptions);
279
280 // save kernel source to disk
281 error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix, kernel);
282
283 // save kernel build options to disk if exists
284 if (buildOptions != NULL)
285 error |= save_kernel_build_options_to_disk(gCompilationCachePath, kernelNamePrefix, buildOptions);
286
287 return error;
288 }
289
get_compilation_mode_str(const CompilationMode compilationMode)290 static std::string get_compilation_mode_str(const CompilationMode compilationMode)
291 {
292 switch (compilationMode)
293 {
294 default:
295 assert(0 && "Invalid compilation mode");
296 abort();
297 case kOnline:
298 return "online";
299 case kBinary:
300 return "binary";
301 case kSpir_v:
302 return "spir-v";
303 }
304 }
305
306 #ifdef KHRONOS_OFFLINE_COMPILER
get_khronos_compiler_command(const cl_uint device_address_space_size,const bool openclCXX,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename)307 static std::string get_khronos_compiler_command(const cl_uint device_address_space_size,
308 const bool openclCXX,
309 const std::string &bOptions,
310 const std::string &sourceFilename,
311 const std::string &outputFilename)
312 {
313 // Set compiler options
314 // Emit SPIR-V
315 std::string compilerOptions = " -cc1 -emit-spirv";
316 // <triple>: for 32 bit SPIR-V use spir-unknown-unknown, for 64 bit SPIR-V use spir64-unknown-unknown.
317 if(device_address_space_size == 32)
318 {
319 compilerOptions += " -triple=spir-unknown-unknown";
320 }
321 else
322 {
323 compilerOptions += " -triple=spir64-unknown-unknown";
324 }
325 // Set OpenCL C++ flag required by SPIR-V-ready clang (compiler provided by Khronos)
326 if(openclCXX)
327 {
328 compilerOptions = compilerOptions + " -cl-std=c++";
329 }
330 // Set correct includes
331 if(openclCXX)
332 {
333 compilerOptions += " -I ";
334 compilerOptions += STRINGIFY_VALUE(CL_LIBCLCXX_DIR);
335 }
336 else
337 {
338 compilerOptions += " -include opencl.h";
339 }
340
341 #ifdef KHRONOS_OFFLINE_COMPILER_OPTIONS
342 compilerOptions += STRINGIFY_VALUE(KHRONOS_OFFLINE_COMPILER_OPTIONS);
343 #endif
344
345 // Add build options passed to this function
346 compilerOptions += " " + bOptions;
347 compilerOptions +=
348 " " + sourceFilename +
349 " -o " + outputFilename;
350 std::string runString = STRINGIFY_VALUE(KHRONOS_OFFLINE_COMPILER) + compilerOptions;
351
352 return runString;
353 }
354 #endif // KHRONOS_OFFLINE_COMPILER
355
get_cl_device_info_str(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfo)356 static cl_int get_cl_device_info_str(const cl_device_id device, const cl_uint device_address_space_size,
357 const CompilationMode compilationMode, std::string &clDeviceInfo)
358 {
359 std::string extensionsString = get_device_extensions_string(device);
360 std::string versionString = get_device_version_string(device);
361
362 std::ostringstream clDeviceInfoStream;
363 std::string file_type = get_offline_compilation_file_type_str(compilationMode);
364 clDeviceInfoStream << "# OpenCL device info affecting " << file_type << " offline compilation:" << std::endl
365 << "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size << std::endl
366 << "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\"" << std::endl;
367 /* We only need the device's supported IL version(s) when compiling IL
368 * that will be loaded with clCreateProgramWithIL() */
369 if (compilationMode == kSpir_v)
370 {
371 std::string ilVersionString = get_device_il_version_string(device);
372 clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString << "\"" << std::endl;
373 }
374 clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\"" << std::endl;
375
376 clDeviceInfo = clDeviceInfoStream.str();
377
378 return CL_SUCCESS;
379 }
380
write_cl_device_info(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,std::string & clDeviceInfoFilename)381 static int write_cl_device_info(const cl_device_id device, const cl_uint device_address_space_size,
382 const CompilationMode compilationMode, std::string &clDeviceInfoFilename)
383 {
384 std::string clDeviceInfo;
385 int error = get_cl_device_info_str(device, device_address_space_size, compilationMode, clDeviceInfo);
386 if (error != CL_SUCCESS)
387 {
388 return error;
389 }
390
391 cl_uint crc = crc32(clDeviceInfo.data(), clDeviceInfo.size());
392
393 /* Get the filename for the clDeviceInfo file.
394 * Note: the file includes the hash on its content, so it is usually unnecessary to delete it. */
395 std::ostringstream clDeviceInfoFilenameStream;
396 clDeviceInfoFilenameStream << gCompilationCachePath << slash << "clDeviceInfo-";
397 clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8) << crc << ".txt";
398
399 clDeviceInfoFilename = clDeviceInfoFilenameStream.str();
400
401 if ((size_t) get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
402 {
403 /* The CL device info file has already been created.
404 * Nothing to do. */
405 return 0;
406 }
407
408 /* The file does not exist or its length is not as expected. Create/overwrite it. */
409 std::ofstream ofs(clDeviceInfoFilename);
410 if (!ofs.good())
411 {
412 log_info("OfflineCompiler: can't create CL device info file: %s\n", clDeviceInfoFilename.c_str());
413 return -1;
414 }
415 ofs << clDeviceInfo;
416 ofs.close();
417
418 return CL_SUCCESS;
419 }
420
get_offline_compilation_command(const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename,const std::string & clDeviceInfoFilename)421 static std::string get_offline_compilation_command(const cl_uint device_address_space_size,
422 const CompilationMode compilationMode,
423 const std::string &bOptions,
424 const std::string &sourceFilename,
425 const std::string &outputFilename,
426 const std::string &clDeviceInfoFilename)
427 {
428 std::ostringstream wrapperOptions;
429
430 wrapperOptions << gCompilationProgram
431 << " --mode=" << get_compilation_mode_str(compilationMode)
432 << " --source=" << sourceFilename
433 << " --output=" << outputFilename
434 << " --cl-device-info=" << clDeviceInfoFilename;
435
436 if (bOptions != "")
437 {
438 // Add build options passed to this function
439 wrapperOptions << " -- " << bOptions;
440 }
441
442 return wrapperOptions.str();
443 }
444
invoke_offline_compiler(const cl_device_id device,const cl_uint device_address_space_size,const CompilationMode compilationMode,const std::string & bOptions,const std::string & sourceFilename,const std::string & outputFilename,const bool openclCXX)445 static int invoke_offline_compiler(const cl_device_id device,
446 const cl_uint device_address_space_size,
447 const CompilationMode compilationMode,
448 const std::string &bOptions,
449 const std::string &sourceFilename,
450 const std::string &outputFilename,
451 const bool openclCXX)
452 {
453 std::string runString;
454 if (openclCXX)
455 {
456 #ifndef KHRONOS_OFFLINE_COMPILER
457 log_error("CL C++ compilation is not possible: KHRONOS_OFFLINE_COMPILER was not defined.\n");
458 return CL_INVALID_OPERATION;
459 #else
460 if (compilationMode != kSpir_v)
461 {
462 log_error("Compilation mode must be SPIR-V for Khronos compiler");
463 return -1;
464 }
465 runString = get_khronos_compiler_command(device_address_space_size, openclCXX, bOptions,
466 sourceFilename, outputFilename);
467 #endif
468 }
469 else
470 {
471 std::string clDeviceInfoFilename;
472
473 // See cl_offline_compiler-interface.txt for a description of the
474 // format of the CL device information file generated below, and
475 // the internal command line interface for invoking the offline
476 // compiler.
477
478 cl_int err = write_cl_device_info(device, device_address_space_size, compilationMode,
479 clDeviceInfoFilename);
480 if (err != CL_SUCCESS)
481 {
482 log_error("Failed writing CL device info file\n");
483 return err;
484 }
485
486 runString = get_offline_compilation_command(device_address_space_size, compilationMode, bOptions,
487 sourceFilename, outputFilename, clDeviceInfoFilename);
488 }
489
490 // execute script
491 log_info("Executing command: %s\n", runString.c_str());
492 fflush(stdout);
493 int returnCode = system(runString.c_str());
494 if (returnCode != 0)
495 {
496 log_error("ERROR: Command finished with error: 0x%x\n", returnCode);
497 return CL_COMPILE_PROGRAM_FAILURE;
498 }
499
500 return CL_SUCCESS;
501 }
502
get_first_device_id(const cl_context context,cl_device_id & device)503 static cl_int get_first_device_id(const cl_context context, cl_device_id &device)
504 {
505 cl_uint numDevices = 0;
506 cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, NULL);
507 test_error(error, "clGetContextInfo failed getting CL_CONTEXT_NUM_DEVICES");
508
509 if (numDevices == 0)
510 {
511 log_error("ERROR: No CL devices found\n");
512 return -1;
513 }
514
515 std::vector<cl_device_id> devices(numDevices, 0);
516 error = clGetContextInfo(context, CL_CONTEXT_DEVICES, numDevices*sizeof(cl_device_id), &devices[0], NULL);
517 test_error(error, "clGetContextInfo failed getting CL_CONTEXT_DEVICES");
518
519 device = devices[0];
520 return CL_SUCCESS;
521 }
522
get_device_address_bits(const cl_device_id device,cl_uint & device_address_space_size)523 static cl_int get_device_address_bits(const cl_device_id device, cl_uint &device_address_space_size)
524 {
525 cl_int error = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint), &device_address_space_size, NULL);
526 test_error(error, "Unable to obtain device address bits");
527
528 if (device_address_space_size != 32 && device_address_space_size != 64)
529 {
530 log_error("ERROR: Unexpected number of device address bits: %u\n", device_address_space_size);
531 return -1;
532 }
533
534 return CL_SUCCESS;
535 }
536
get_offline_compiler_output(std::ifstream & ifs,const cl_device_id device,cl_uint deviceAddrSpaceSize,const bool openclCXX,const CompilationMode compilationMode,const std::string & bOptions,const std::string & kernelPath,const std::string & kernelNamePrefix)537 static int get_offline_compiler_output(std::ifstream &ifs,
538 const cl_device_id device,
539 cl_uint deviceAddrSpaceSize,
540 const bool openclCXX,
541 const CompilationMode compilationMode,
542 const std::string &bOptions,
543 const std::string &kernelPath,
544 const std::string &kernelNamePrefix)
545 {
546 std::string sourceFilename = get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
547 std::string outputFilename = get_binary_filename_with_path(compilationMode,
548 deviceAddrSpaceSize,
549 kernelPath,
550 kernelNamePrefix);
551
552 ifs.open(outputFilename.c_str(), std::ios::binary);
553 if(!ifs.good()) {
554 std::string file_type = get_offline_compilation_file_type_str(compilationMode);
555 if (gCompilationCacheMode == kCacheModeForceRead) {
556 log_info("OfflineCompiler: can't open cached %s file: %s\n",
557 file_type.c_str(), outputFilename.c_str());
558 return -1;
559 }
560 else {
561 int error = invoke_offline_compiler(device, deviceAddrSpaceSize, compilationMode,
562 bOptions, sourceFilename, outputFilename, openclCXX);
563 if (error != CL_SUCCESS)
564 return error;
565
566 // read output file
567 ifs.open(outputFilename.c_str(), std::ios::binary);
568 if (!ifs.good())
569 {
570 log_info("OfflineCompiler: can't read generated %s file: %s\n",
571 file_type.c_str(), outputFilename.c_str());
572 return -1;
573 }
574 }
575 }
576 return CL_SUCCESS;
577 }
578
create_single_kernel_helper_create_program_offline(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char * const * kernelProgram,const char * buildOptions,const bool openclCXX,CompilationMode compilationMode)579 static int create_single_kernel_helper_create_program_offline(cl_context context,
580 cl_device_id device,
581 cl_program *outProgram,
582 unsigned int numKernelLines,
583 const char *const *kernelProgram,
584 const char *buildOptions,
585 const bool openclCXX,
586 CompilationMode compilationMode)
587 {
588 if(kCacheModeDumpCl == gCompilationCacheMode) {
589 return -1;
590 }
591
592 // Get device CL_DEVICE_ADDRESS_BITS
593 int error;
594 cl_uint device_address_space_size = 0;
595 if (device == NULL)
596 {
597 error = get_first_device_id(context, device);
598 test_error(error, "Failed to get device ID for first device");
599 }
600 error = get_device_address_bits(device, device_address_space_size);
601 if (error != CL_SUCCESS)
602 return error;
603
604 // set build options
605 std::string bOptions;
606 bOptions += buildOptions ? std::string(buildOptions) : "";
607
608 std::string kernelName = get_unique_filename_prefix(numKernelLines,
609 kernelProgram,
610 buildOptions);
611
612
613
614 std::ifstream ifs;
615 error = get_offline_compiler_output(ifs, device, device_address_space_size, openclCXX, compilationMode, bOptions, gCompilationCachePath, kernelName);
616 if (error != CL_SUCCESS)
617 return error;
618
619 // -----------------------------------------------------------------------------------
620 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
621 // -----------------------------------------------------------------------------------
622 // Only OpenCL C++ to SPIR-V compilation
623 #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
624 if(openclCXX)
625 {
626 return CL_SUCCESS;
627 }
628 #endif
629
630 ifs.seekg(0, ifs.end);
631 int length = ifs.tellg();
632 ifs.seekg(0, ifs.beg);
633
634 //treat modifiedProgram as input for clCreateProgramWithBinary
635 if (compilationMode == kBinary)
636 {
637 // read binary from file:
638 std::vector<unsigned char> modifiedKernelBuf(length);
639
640 ifs.read((char *)&modifiedKernelBuf[0], length);
641 ifs.close();
642
643 size_t lengths = modifiedKernelBuf.size();
644 const unsigned char *binaries = { &modifiedKernelBuf[0] };
645 log_info("offlineCompiler: clCreateProgramWithSource replaced with clCreateProgramWithBinary\n");
646 *outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths, &binaries, NULL, &error);
647 if (*outProgram == NULL || error != CL_SUCCESS)
648 {
649 print_error(error, "clCreateProgramWithBinary failed");
650 return error;
651 }
652 }
653 //treat modifiedProgram as input for clCreateProgramWithIL
654 else if (compilationMode == kSpir_v)
655 {
656 // read spir-v from file:
657 std::vector<unsigned char> modifiedKernelBuf(length);
658
659 ifs.read((char *)&modifiedKernelBuf[0], length);
660 ifs.close();
661
662 size_t length = modifiedKernelBuf.size();
663 log_info("offlineCompiler: clCreateProgramWithSource replaced with clCreateProgramWithIL\n");
664
665 *outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0], length, &error);
666 if (*outProgram == NULL || error != CL_SUCCESS)
667 {
668 print_error(error, "clCreateProgramWithIL failed");
669 return error;
670 }
671 }
672
673 return CL_SUCCESS;
674 }
675
create_single_kernel_helper_create_program(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions,const bool openclCXX,CompilationMode compilationMode)676 static int create_single_kernel_helper_create_program(cl_context context,
677 cl_device_id device,
678 cl_program *outProgram,
679 unsigned int numKernelLines,
680 const char **kernelProgram,
681 const char *buildOptions,
682 const bool openclCXX,
683 CompilationMode compilationMode)
684 {
685 std::string filePrefix = get_unique_filename_prefix(numKernelLines,
686 kernelProgram,
687 buildOptions);
688 bool shouldSaveToDisk = should_save_kernel_source_to_disk(compilationMode,
689 gCompilationCacheMode,
690 gCompilationCachePath,
691 filePrefix);
692
693 if(shouldSaveToDisk)
694 {
695 if(CL_SUCCESS != save_kernel_source_and_options_to_disk(numKernelLines, kernelProgram, buildOptions))
696 {
697 log_error("Unable to dump kernel source to disk");
698 return -1;
699 }
700 }
701 if (compilationMode == kOnline)
702 {
703 int error = CL_SUCCESS;
704
705 /* Create the program object from source */
706 *outProgram = clCreateProgramWithSource(context, numKernelLines, kernelProgram, NULL, &error);
707 if (*outProgram == NULL || error != CL_SUCCESS)
708 {
709 print_error(error, "clCreateProgramWithSource failed");
710 return error;
711 }
712 return CL_SUCCESS;
713 }
714 else
715 {
716 return create_single_kernel_helper_create_program_offline(context, device, outProgram,
717 numKernelLines, kernelProgram,
718 buildOptions, openclCXX,
719 compilationMode);
720 }
721 }
722
create_single_kernel_helper_create_program(cl_context context,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions,const bool openclCXX)723 int create_single_kernel_helper_create_program(cl_context context,
724 cl_program *outProgram,
725 unsigned int numKernelLines,
726 const char **kernelProgram,
727 const char *buildOptions,
728 const bool openclCXX)
729 {
730 return create_single_kernel_helper_create_program(context, NULL, outProgram,
731 numKernelLines, kernelProgram,
732 buildOptions, openclCXX,
733 gCompilationMode);
734 }
735
create_single_kernel_helper_create_program_for_device(cl_context context,cl_device_id device,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions,const bool openclCXX)736 int create_single_kernel_helper_create_program_for_device(cl_context context,
737 cl_device_id device,
738 cl_program *outProgram,
739 unsigned int numKernelLines,
740 const char **kernelProgram,
741 const char *buildOptions,
742 const bool openclCXX)
743 {
744 return create_single_kernel_helper_create_program(context, device, outProgram,
745 numKernelLines, kernelProgram,
746 buildOptions, openclCXX,
747 gCompilationMode);
748 }
749
create_single_kernel_helper_with_build_options(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions,const bool openclCXX)750 int create_single_kernel_helper_with_build_options(cl_context context,
751 cl_program *outProgram,
752 cl_kernel *outKernel,
753 unsigned int numKernelLines,
754 const char **kernelProgram,
755 const char *kernelName,
756 const char *buildOptions,
757 const bool openclCXX)
758 {
759 return create_single_kernel_helper(context, outProgram, outKernel, numKernelLines, kernelProgram, kernelName, buildOptions, openclCXX);
760 }
761
762 // Creates and builds OpenCL C/C++ program, and creates a kernel
create_single_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions,const bool openclCXX)763 int create_single_kernel_helper(cl_context context,
764 cl_program *outProgram,
765 cl_kernel *outKernel,
766 unsigned int numKernelLines,
767 const char **kernelProgram,
768 const char *kernelName,
769 const char *buildOptions,
770 const bool openclCXX)
771 {
772 int error;
773 // Create OpenCL C++ program
774 if(openclCXX)
775 {
776 // -----------------------------------------------------------------------------------
777 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
778 // -----------------------------------------------------------------------------------
779 // Only OpenCL C++ to SPIR-V compilation
780 #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
781 // Save global variable
782 bool tempgCompilationCacheMode = gCompilationCacheMode;
783 // Force OpenCL C++ -> SPIR-V compilation on every run
784 gCompilationCacheMode = kCacheModeOverwrite;
785 #endif
786 error = create_openclcpp_program(
787 context, outProgram, numKernelLines, kernelProgram, buildOptions
788 );
789 if (error != CL_SUCCESS)
790 {
791 log_error("Create program failed: %d, line: %d\n", error, __LINE__);
792 return error;
793 }
794 // -----------------------------------------------------------------------------------
795 // ------------- ONLY FOR OPENCL 22 CONFORMANCE TEST 22 DEVELOPMENT ------------------
796 // -----------------------------------------------------------------------------------
797 #if defined(DEVELOPMENT) && defined(ONLY_SPIRV_COMPILATION)
798 // Restore global variables
799 gCompilationCacheMode = tempgCompilationCacheMode;
800 log_info("WARNING: KERNEL %s WAS ONLY COMPILED TO SPIR-V\n", kernelName);
801 return error;
802 #endif
803 }
804 // Create OpenCL C program
805 else
806 {
807 error = create_single_kernel_helper_create_program(
808 context, outProgram, numKernelLines, kernelProgram, buildOptions
809 );
810 if (error != CL_SUCCESS)
811 {
812 log_error("Create program failed: %d, line: %d\n", error, __LINE__);
813 return error;
814 }
815 }
816 // Remove offline-compiler-only build options
817 std::string newBuildOptions;
818 if (buildOptions != NULL)
819 {
820 newBuildOptions = buildOptions;
821 std::string offlineCompierOptions[] = {
822 "-cl-fp16-enable",
823 "-cl-fp64-enable",
824 "-cl-zero-init-local-mem-vars"
825 };
826 for(auto& s : offlineCompierOptions)
827 {
828 std::string::size_type i = newBuildOptions.find(s);
829 if (i != std::string::npos)
830 newBuildOptions.erase(i, s.length());
831 }
832 }
833 // Build program and create kernel
834 return build_program_create_kernel_helper(
835 context, outProgram, outKernel, numKernelLines, kernelProgram, kernelName, newBuildOptions.c_str()
836 );
837 }
838
839 // Creates OpenCL C++ program
create_openclcpp_program(cl_context context,cl_program * outProgram,unsigned int numKernelLines,const char ** kernelProgram,const char * buildOptions)840 int create_openclcpp_program(cl_context context,
841 cl_program *outProgram,
842 unsigned int numKernelLines,
843 const char **kernelProgram,
844 const char *buildOptions)
845 {
846 // Create program
847 return create_single_kernel_helper_create_program(
848 context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions, true, kSpir_v
849 );
850 }
851
852 // Builds OpenCL C/C++ program and creates
build_program_create_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const char * buildOptions)853 int build_program_create_kernel_helper(cl_context context,
854 cl_program *outProgram,
855 cl_kernel *outKernel,
856 unsigned int numKernelLines,
857 const char **kernelProgram,
858 const char *kernelName,
859 const char *buildOptions)
860 {
861 int error;
862 /* Compile the program */
863 int buildProgramFailed = 0;
864 int printedSource = 0;
865 error = clBuildProgram(*outProgram, 0, NULL, buildOptions, NULL, NULL);
866 if (error != CL_SUCCESS)
867 {
868 unsigned int i;
869 print_error(error, "clBuildProgram failed");
870 buildProgramFailed = 1;
871 printedSource = 1;
872 log_error("Build options: %s\n", buildOptions);
873 log_error("Original source is: ------------\n");
874 for (i = 0; i < numKernelLines; i++)
875 log_error("%s", kernelProgram[i]);
876 }
877
878 // Verify the build status on all devices
879 cl_uint deviceCount = 0;
880 error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES, sizeof(deviceCount), &deviceCount, NULL);
881 if (error != CL_SUCCESS)
882 {
883 print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
884 return error;
885 }
886
887 if (deviceCount == 0)
888 {
889 log_error("No devices found for program.\n");
890 return -1;
891 }
892
893 cl_device_id *devices = (cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
894 if (NULL == devices)
895 return -1;
896 BufferOwningPtr<cl_device_id> devicesBuf(devices);
897
898 memset(devices, 0, deviceCount * sizeof(cl_device_id));
899 error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * deviceCount, devices, NULL);
900 if (error != CL_SUCCESS)
901 {
902 print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
903 return error;
904 }
905
906 cl_uint z;
907 bool buildFailed = false;
908 for (z = 0; z < deviceCount; z++)
909 {
910 char deviceName[4096] = "";
911 error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
912 if (error != CL_SUCCESS || deviceName[0] == '\0')
913 {
914 log_error("Device \"%d\" failed to return a name\n", z);
915 print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
916 }
917
918 cl_build_status buildStatus;
919 error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
920 if (error != CL_SUCCESS)
921 {
922 print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
923 return error;
924 }
925
926 if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed && deviceCount == 1)
927 {
928 buildFailed = true;
929 log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
930 }
931
932 if (buildStatus != CL_BUILD_SUCCESS)
933 {
934
935 char statusString[64] = "";
936 if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
937 sprintf(statusString, "CL_BUILD_SUCCESS");
938 else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
939 sprintf(statusString, "CL_BUILD_NONE");
940 else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
941 sprintf(statusString, "CL_BUILD_ERROR");
942 else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
943 sprintf(statusString, "CL_BUILD_IN_PROGRESS");
944 else
945 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
946
947 if (buildStatus != CL_BUILD_SUCCESS)
948 log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
949 size_t paramSize = 0;
950 error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_LOG, 0, NULL, ¶mSize);
951 if (error != CL_SUCCESS)
952 {
953
954 print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
955 return error;
956 }
957
958 std::string log;
959 log.resize(paramSize / sizeof(char));
960 error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL);
961 if (error != CL_SUCCESS || log[0] == '\0')
962 {
963 log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
964 if (error)
965 {
966 print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
967 return error;
968 }
969 else
970 {
971 log_error("clGetProgramBuildInfo returned an empty log.\n");
972 return -1;
973 }
974 }
975 // In this case we've already printed out the code above.
976 if (!printedSource)
977 {
978 unsigned int i;
979 log_error("Original source is: ------------\n");
980 for (i = 0; i < numKernelLines; i++)
981 log_error("%s", kernelProgram[i]);
982 printedSource = 1;
983 }
984 log_error("Build log for device \"%s\" is: ------------\n", deviceName);
985 log_error("%s\n", log.c_str());
986 log_error("\n----------\n");
987 return -1;
988 }
989 }
990
991 if (buildFailed)
992 {
993 return -1;
994 }
995
996 /* And create a kernel from it */
997 if (kernelName != NULL)
998 {
999 *outKernel = clCreateKernel(*outProgram, kernelName, &error);
1000 if (*outKernel == NULL || error != CL_SUCCESS)
1001 {
1002 print_error(error, "Unable to create kernel");
1003 return error;
1004 }
1005 }
1006
1007 return 0;
1008 }
1009
get_max_allowed_work_group_size(cl_context context,cl_kernel kernel,size_t * outMaxSize,size_t * outLimits)1010 int get_max_allowed_work_group_size( cl_context context, cl_kernel kernel, size_t *outMaxSize, size_t *outLimits )
1011 {
1012 cl_device_id *devices;
1013 size_t size, maxCommonSize = 0;
1014 int numDevices, i, j, error;
1015 cl_uint numDims;
1016 size_t outSize;
1017 size_t sizeLimit[]={1,1,1};
1018
1019
1020 /* Assume fewer than 16 devices will be returned */
1021 error = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &outSize );
1022 test_error( error, "Unable to obtain list of devices size for context" );
1023 devices = (cl_device_id *)malloc(outSize);
1024 BufferOwningPtr<cl_device_id> devicesBuf(devices);
1025
1026 error = clGetContextInfo( context, CL_CONTEXT_DEVICES, outSize, devices, NULL );
1027 test_error( error, "Unable to obtain list of devices for context" );
1028
1029 numDevices = (int)( outSize / sizeof( cl_device_id ) );
1030
1031 for( i = 0; i < numDevices; i++ )
1032 {
1033 error = clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
1034 test_error( error, "Unable to obtain max work group size for device" );
1035 if( size < maxCommonSize || maxCommonSize == 0)
1036 maxCommonSize = size;
1037
1038 error = clGetKernelWorkGroupInfo( kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof( size ), &size, NULL );
1039 test_error( error, "Unable to obtain max work group size for device and kernel combo" );
1040 if( size < maxCommonSize || maxCommonSize == 0)
1041 maxCommonSize = size;
1042
1043 error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( numDims ), &numDims, NULL);
1044 test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
1045 sizeLimit[0] = 1;
1046 error= clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, numDims*sizeof(size_t), sizeLimit, NULL);
1047 test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
1048
1049 if (outLimits != NULL)
1050 {
1051 if (i == 0) {
1052 for (j=0; j<3; j++)
1053 outLimits[j] = sizeLimit[j];
1054 } else {
1055 for (j=0; j<(int)numDims; j++) {
1056 if (sizeLimit[j] < outLimits[j])
1057 outLimits[j] = sizeLimit[j];
1058 }
1059 }
1060 }
1061 }
1062
1063 *outMaxSize = (unsigned int)maxCommonSize;
1064 return 0;
1065 }
1066
1067
get_max_allowed_1d_work_group_size_on_device(cl_device_id device,cl_kernel kernel,size_t * outSize)1068 extern int get_max_allowed_1d_work_group_size_on_device( cl_device_id device, cl_kernel kernel, size_t *outSize )
1069 {
1070 cl_uint maxDim;
1071 size_t maxWgSize;
1072 size_t *maxWgSizePerDim;
1073 int error;
1074
1075 error = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( size_t ), &maxWgSize, NULL );
1076 test_error( error, "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed" );
1077
1078 error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( cl_uint ), &maxDim, NULL );
1079 test_error( error, "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed" );
1080 maxWgSizePerDim = (size_t*)malloc( maxDim * sizeof( size_t ) );
1081 if( !maxWgSizePerDim )
1082 {
1083 log_error( "Unable to allocate maxWgSizePerDim\n" );
1084 return -1;
1085 }
1086
1087 error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_ITEM_SIZES, maxDim * sizeof( size_t ), maxWgSizePerDim, NULL );
1088 if( error != CL_SUCCESS)
1089 {
1090 log_error( "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n" );
1091 free( maxWgSizePerDim );
1092 return error;
1093 }
1094
1095 // "maxWgSize" is limited to that of the first dimension.
1096 if( maxWgSize > maxWgSizePerDim[0] )
1097 {
1098 maxWgSize = maxWgSizePerDim[0];
1099 }
1100
1101 free( maxWgSizePerDim );
1102
1103 *outSize = maxWgSize;
1104 return 0;
1105 }
1106
1107
get_max_common_work_group_size(cl_context context,cl_kernel kernel,size_t globalThreadSize,size_t * outMaxSize)1108 int get_max_common_work_group_size( cl_context context, cl_kernel kernel,
1109 size_t globalThreadSize, size_t *outMaxSize )
1110 {
1111 size_t sizeLimit[3];
1112 int error = get_max_allowed_work_group_size( context, kernel, outMaxSize, sizeLimit );
1113 if( error != 0 )
1114 return error;
1115
1116 /* Now find the largest factor of globalThreadSize that is <= maxCommonSize */
1117 /* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1,
1118 the modulo test will succeed and break the loop anyway */
1119 for( ; ( globalThreadSize % *outMaxSize ) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)-- )
1120 ;
1121 return 0;
1122 }
1123
get_max_common_2D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1124 int get_max_common_2D_work_group_size( cl_context context, cl_kernel kernel,
1125 size_t *globalThreadSizes, size_t *outMaxSizes )
1126 {
1127 size_t sizeLimit[3];
1128 size_t maxSize;
1129 int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
1130 if( error != 0 )
1131 return error;
1132
1133 /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
1134 sizes */
1135
1136 /* Simple case */
1137 if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] <= maxSize )
1138 {
1139 if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1]) {
1140 outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
1141 outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
1142 return 0;
1143 }
1144 }
1145
1146 size_t remainingSize, sizeForThisOne;
1147 remainingSize = maxSize;
1148 int i, j;
1149 for (i=0 ; i<2; i++) {
1150 if (globalThreadSizes[i] > remainingSize)
1151 sizeForThisOne = remainingSize;
1152 else
1153 sizeForThisOne = globalThreadSizes[i];
1154 for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
1155 outMaxSizes[i] = sizeForThisOne;
1156 remainingSize = maxSize;
1157 for (j=0; j<=i; j++)
1158 remainingSize /=outMaxSizes[j];
1159 }
1160
1161 return 0;
1162 }
1163
get_max_common_3D_work_group_size(cl_context context,cl_kernel kernel,size_t * globalThreadSizes,size_t * outMaxSizes)1164 int get_max_common_3D_work_group_size( cl_context context, cl_kernel kernel,
1165 size_t *globalThreadSizes, size_t *outMaxSizes )
1166 {
1167 size_t sizeLimit[3];
1168 size_t maxSize;
1169 int error = get_max_allowed_work_group_size( context, kernel, &maxSize, sizeLimit );
1170 if( error != 0 )
1171 return error;
1172 /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global
1173 sizes */
1174
1175 /* Simple case */
1176 if( globalThreadSizes[ 0 ] * globalThreadSizes[ 1 ] * globalThreadSizes[ 2 ] <= maxSize )
1177 {
1178 if (globalThreadSizes[ 0 ] <= sizeLimit[0] && globalThreadSizes[ 1 ] <= sizeLimit[1] && globalThreadSizes[ 2 ] <= sizeLimit[2]) {
1179 outMaxSizes[ 0 ] = globalThreadSizes[ 0 ];
1180 outMaxSizes[ 1 ] = globalThreadSizes[ 1 ];
1181 outMaxSizes[ 2 ] = globalThreadSizes[ 2 ];
1182 return 0;
1183 }
1184 }
1185
1186 size_t remainingSize, sizeForThisOne;
1187 remainingSize = maxSize;
1188 int i, j;
1189 for (i=0 ; i<3; i++) {
1190 if (globalThreadSizes[i] > remainingSize)
1191 sizeForThisOne = remainingSize;
1192 else
1193 sizeForThisOne = globalThreadSizes[i];
1194 for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ;
1195 outMaxSizes[i] = sizeForThisOne;
1196 remainingSize = maxSize;
1197 for (j=0; j<=i; j++)
1198 remainingSize /=outMaxSizes[j];
1199 }
1200
1201 return 0;
1202 }
1203
1204 /* Helper to determine if a device supports an image format */
is_image_format_supported(cl_context context,cl_mem_flags flags,cl_mem_object_type image_type,const cl_image_format * fmt)1205 int is_image_format_supported( cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt )
1206 {
1207 cl_image_format *list;
1208 cl_uint count = 0;
1209 cl_int err = clGetSupportedImageFormats( context, flags, image_type, 128, NULL, &count );
1210 if( count == 0 )
1211 return 0;
1212
1213 list = (cl_image_format*) malloc( count * sizeof( cl_image_format ) );
1214 if( NULL == list )
1215 {
1216 log_error( "Error: unable to allocate %ld byte buffer for image format list at %s:%d (err = %d)\n", count * sizeof( cl_image_format ), __FILE__, __LINE__, err );
1217 return 0;
1218 }
1219 BufferOwningPtr<cl_image_format> listBuf(list);
1220
1221
1222 cl_int error = clGetSupportedImageFormats( context, flags, image_type, count, list, NULL );
1223 if( error )
1224 {
1225 log_error( "Error: failed to obtain supported image type list at %s:%d (err = %d)\n", __FILE__, __LINE__, err );
1226 return 0;
1227 }
1228
1229 // iterate looking for a match.
1230 cl_uint i;
1231 for( i = 0; i < count; i++ )
1232 {
1233 if( fmt->image_channel_data_type == list[ i ].image_channel_data_type &&
1234 fmt->image_channel_order == list[ i ].image_channel_order )
1235 break;
1236 }
1237
1238 return ( i < count ) ? 1 : 0;
1239 }
1240
1241 size_t get_pixel_bytes( const cl_image_format *fmt );
get_pixel_bytes(const cl_image_format * fmt)1242 size_t get_pixel_bytes( const cl_image_format *fmt )
1243 {
1244 size_t chanCount;
1245 switch( fmt->image_channel_order )
1246 {
1247 case CL_R:
1248 case CL_A:
1249 case CL_Rx:
1250 case CL_INTENSITY:
1251 case CL_LUMINANCE:
1252 case CL_DEPTH:
1253 chanCount = 1;
1254 break;
1255 case CL_RG:
1256 case CL_RA:
1257 case CL_RGx:
1258 chanCount = 2;
1259 break;
1260 case CL_RGB:
1261 case CL_RGBx:
1262 case CL_sRGB:
1263 case CL_sRGBx:
1264 chanCount = 3;
1265 break;
1266 case CL_RGBA:
1267 case CL_ARGB:
1268 case CL_BGRA:
1269 case CL_sBGRA:
1270 case CL_sRGBA:
1271 #ifdef CL_1RGB_APPLE
1272 case CL_1RGB_APPLE:
1273 #endif
1274 #ifdef CL_BGR1_APPLE
1275 case CL_BGR1_APPLE:
1276 #endif
1277 chanCount = 4;
1278 break;
1279 default:
1280 log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__ );
1281 abort();
1282 break;
1283 }
1284
1285 switch( fmt->image_channel_data_type )
1286 {
1287 case CL_UNORM_SHORT_565:
1288 case CL_UNORM_SHORT_555:
1289 return 2;
1290
1291 case CL_UNORM_INT_101010:
1292 return 4;
1293
1294 case CL_SNORM_INT8:
1295 case CL_UNORM_INT8:
1296 case CL_SIGNED_INT8:
1297 case CL_UNSIGNED_INT8:
1298 return chanCount;
1299
1300 case CL_SNORM_INT16:
1301 case CL_UNORM_INT16:
1302 case CL_HALF_FLOAT:
1303 case CL_SIGNED_INT16:
1304 case CL_UNSIGNED_INT16:
1305 #ifdef CL_SFIXED14_APPLE
1306 case CL_SFIXED14_APPLE:
1307 #endif
1308 return chanCount * 2;
1309
1310 case CL_SIGNED_INT32:
1311 case CL_UNSIGNED_INT32:
1312 case CL_FLOAT:
1313 return chanCount * 4;
1314
1315 default:
1316 log_error("Unknown channel data type at %s:%d!\n", __FILE__, __LINE__ );
1317 abort();
1318 }
1319
1320 return 0;
1321 }
1322
verifyImageSupport(cl_device_id device)1323 test_status verifyImageSupport( cl_device_id device )
1324 {
1325 int result = checkForImageSupport( device );
1326 if( result == 0 )
1327 {
1328 return TEST_PASS;
1329 }
1330 if( result == CL_IMAGE_FORMAT_NOT_SUPPORTED )
1331 {
1332 log_error( "SKIPPED: Device does not supported images as required by this test!\n" );
1333 return TEST_SKIP;
1334 }
1335 return TEST_FAIL;
1336 }
1337
checkForImageSupport(cl_device_id device)1338 int checkForImageSupport( cl_device_id device )
1339 {
1340 cl_uint i;
1341 int error;
1342
1343
1344 /* Check the device props to see if images are supported at all first */
1345 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
1346 test_error( error, "Unable to query device for image support" );
1347 if( i == 0 )
1348 {
1349 return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1350 }
1351
1352 /* So our support is good */
1353 return 0;
1354 }
1355
checkFor3DImageSupport(cl_device_id device)1356 int checkFor3DImageSupport( cl_device_id device )
1357 {
1358 cl_uint i;
1359 int error;
1360
1361 /* Check the device props to see if images are supported at all first */
1362 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_SUPPORT, sizeof( i ), &i, NULL );
1363 test_error( error, "Unable to query device for image support" );
1364 if( i == 0 )
1365 {
1366 return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1367 }
1368
1369 char profile[128];
1370 error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile ), profile, NULL );
1371 test_error( error, "Unable to query device for CL_DEVICE_PROFILE" );
1372 if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ) )
1373 {
1374 size_t width = -1L;
1375 size_t height = -1L;
1376 size_t depth = -1L;
1377 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(width), &width, NULL );
1378 test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH" );
1379 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(height), &height, NULL );
1380 test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT" );
1381 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth), &depth, NULL );
1382 test_error( error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH" );
1383
1384 if( 0 == (height | width | depth ))
1385 return CL_IMAGE_FORMAT_NOT_SUPPORTED;
1386 }
1387
1388 /* So our support is good */
1389 return 0;
1390 }
1391
get_min_alignment(cl_context context)1392 size_t get_min_alignment(cl_context context)
1393 {
1394 static cl_uint align_size = 0;
1395
1396 if( 0 == align_size )
1397 {
1398 cl_device_id * devices;
1399 size_t devices_size = 0;
1400 cl_uint result = 0;
1401 cl_int error;
1402 int i;
1403
1404 error = clGetContextInfo (context,
1405 CL_CONTEXT_DEVICES,
1406 0,
1407 NULL,
1408 &devices_size);
1409 test_error_ret(error, "clGetContextInfo failed", 0);
1410
1411 devices = (cl_device_id*)malloc(devices_size);
1412 if (devices == NULL) {
1413 print_error( error, "malloc failed" );
1414 return 0;
1415 }
1416
1417 error = clGetContextInfo (context,
1418 CL_CONTEXT_DEVICES,
1419 devices_size,
1420 (void*)devices,
1421 NULL);
1422 test_error_ret(error, "clGetContextInfo failed", 0);
1423
1424 for (i = 0; i < (int)(devices_size/sizeof(cl_device_id)); i++)
1425 {
1426 cl_uint alignment = 0;
1427
1428 error = clGetDeviceInfo (devices[i],
1429 CL_DEVICE_MEM_BASE_ADDR_ALIGN,
1430 sizeof(cl_uint),
1431 (void*)&alignment,
1432 NULL);
1433
1434 if (error == CL_SUCCESS)
1435 {
1436 alignment >>= 3; // convert bits to bytes
1437 result = (alignment > result) ? alignment : result;
1438 }
1439 else
1440 print_error( error, "clGetDeviceInfo failed" );
1441 }
1442
1443 align_size = result;
1444 free(devices);
1445 }
1446
1447 return align_size;
1448 }
1449
get_default_rounding_mode(cl_device_id device)1450 cl_device_fp_config get_default_rounding_mode( cl_device_id device )
1451 {
1452 char profileStr[128] = "";
1453 cl_device_fp_config single = 0;
1454 int error = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single ), &single, NULL );
1455 if( error )
1456 test_error_ret( error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG", 0 );
1457
1458 if( single & CL_FP_ROUND_TO_NEAREST )
1459 return CL_FP_ROUND_TO_NEAREST;
1460
1461 if( 0 == (single & CL_FP_ROUND_TO_ZERO) )
1462 test_error_ret( -1, "FAILURE: device must support either CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST", 0 );
1463
1464 // Make sure we are an embedded device before allowing a pass
1465 if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL ) ))
1466 test_error_ret( error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0 );
1467
1468 if( strcmp( profileStr, "EMBEDDED_PROFILE" ) )
1469 test_error_ret( error, "FAILURE: non-EMBEDDED_PROFILE devices must support CL_FP_ROUND_TO_NEAREST", 0 );
1470
1471 return CL_FP_ROUND_TO_ZERO;
1472 }
1473
checkDeviceForQueueSupport(cl_device_id device,cl_command_queue_properties prop)1474 int checkDeviceForQueueSupport( cl_device_id device, cl_command_queue_properties prop )
1475 {
1476 cl_command_queue_properties realProps;
1477 cl_int error = clGetDeviceInfo( device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof( realProps ), &realProps, NULL );
1478 test_error_ret( error, "FAILURE: Unable to get device queue properties", 0 );
1479
1480 return ( realProps & prop ) ? 1 : 0;
1481 }
1482
printDeviceHeader(cl_device_id device)1483 int printDeviceHeader( cl_device_id device )
1484 {
1485 char deviceName[ 512 ], deviceVendor[ 512 ], deviceVersion[ 512 ], cLangVersion[ 512 ];
1486 int error;
1487
1488 error = clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof( deviceName ), deviceName, NULL );
1489 test_error( error, "Unable to get CL_DEVICE_NAME for device" );
1490
1491 error = clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof( deviceVendor ), deviceVendor, NULL );
1492 test_error( error, "Unable to get CL_DEVICE_VENDOR for device" );
1493
1494 error = clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof( deviceVersion ), deviceVersion, NULL );
1495 test_error( error, "Unable to get CL_DEVICE_VERSION for device" );
1496
1497 error = clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof( cLangVersion ), cLangVersion, NULL );
1498 test_error( error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device" );
1499
1500 log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute Device Version = %s%s%s\n",
1501 deviceName, deviceVendor, deviceVersion, ( error == CL_SUCCESS ) ? ", CL C Version = " : "",
1502 ( error == CL_SUCCESS ) ? cLangVersion : "" );
1503
1504 return CL_SUCCESS;
1505 }
1506