1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/testHarness.h"
17 #include "harness/typeWrappers.h"
18 #include "base.h"
19
20 #include <string>
21 #include <vector>
22 #include <algorithm>
23 #include <sstream>
24
25 typedef enum {
26 ARG_TYPE_NONE,
27
28 ARG_TYPE_HOST_PTR,
29 ARG_TYPE_HOST_LOCAL,
30
31 ARG_TYPE_COARSE_GRAINED_SVM,
32 ARG_TYPE_FINE_GRAINED_BUFFER_SVM,
33 ARG_TYPE_FINE_GRAINED_SYSTEM_SVM,
34 ARG_TYPE_ATOMICS_SVM
35 } ExtraKernelArgMemType;
36
37 class CSVMWrapper {
38 public:
CSVMWrapper()39 CSVMWrapper() : ptr_(NULL), context_(NULL) { }
40
Attach(cl_context context,void * ptr)41 void Attach(cl_context context, void *ptr) {
42 context_ = context;
43 ptr_ = ptr;
44 }
45
~CSVMWrapper()46 ~CSVMWrapper() {
47 if (ptr_)
48 clSVMFree(context_, ptr_);
49 }
50
operator void*()51 operator void *() {
52 return ptr_;
53 }
54
55 private:
56 void *ptr_;
57 cl_context context_;
58 };
59
60 class CAdvancedTest : public CTest {
61 public:
CAdvancedTest(const std::vector<std::string> & kernel)62 CAdvancedTest(const std::vector<std::string>& kernel) : CTest(), _kernels(kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
63
64 }
65
CAdvancedTest(const std::string & library,const std::vector<std::string> & kernel)66 CAdvancedTest(const std::string& library, const std::vector<std::string>& kernel) : CTest(), _libraryCode(library), _kernels(kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
67
68 }
69
CAdvancedTest(const std::string & kernel,ExtraKernelArgMemType argType=ARG_TYPE_NONE)70 CAdvancedTest(const std::string& kernel, ExtraKernelArgMemType argType = ARG_TYPE_NONE) : CTest(), _kernels(1, kernel), _extraKernelArgMemType(argType) {
71
72 }
73
CAdvancedTest(const std::string & library,const std::string & kernel)74 CAdvancedTest(const std::string& library, const std::string& kernel) : CTest(), _libraryCode(library), _kernels(1, kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
75
76 }
77
PrintCompilationLog(cl_program program,cl_device_id device)78 int PrintCompilationLog(cl_program program, cl_device_id device) {
79 cl_int error;
80 size_t buildLogSize = 0;
81
82 error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize);
83 test_error(error, "clGetProgramBuildInfo failed");
84
85 std::string log;
86 log.resize(buildLogSize);
87
88 error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, buildLogSize, &log[0], NULL);
89 test_error(error, "clGetProgramBuildInfo failed");
90
91 log_error("Build log for device is:\n------------\n");
92 log_error("%s\n", log.c_str() );
93 log_error( "\n----------\n" );
94
95 return CL_SUCCESS;
96 }
97
ExecuteSubcase(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,const std::string & src)98 int ExecuteSubcase(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const std::string& src) {
99 cl_int error;
100
101 clProgramWrapper program, preCompiledLibrary, library, finalProgram;
102 clKernelWrapper kernel;
103
104 const char *srcPtr = src.c_str();
105
106 if (!_libraryCode.empty()) {
107 program = clCreateProgramWithSource(context, 1, &srcPtr, NULL, &error);
108 test_error(error, "clCreateProgramWithSource failed");
109
110 error = clCompileProgram(program, 1, &deviceID, "-cl-std=CL2.0", 0, NULL, NULL, NULL, NULL);
111
112 if (error != CL_SUCCESS)
113 PrintCompilationLog(program, deviceID);
114 test_error(error, "clCompileProgram failed");
115
116 const char *srcPtrLibrary = _libraryCode.c_str();
117
118 preCompiledLibrary = clCreateProgramWithSource(context, 1, &srcPtrLibrary, NULL, &error);
119 test_error(error, "clCreateProgramWithSource failed");
120
121 error = clCompileProgram(preCompiledLibrary, 1, &deviceID, "-cl-std=CL2.0", 0, NULL, NULL, NULL, NULL);
122
123 if (error != CL_SUCCESS)
124 PrintCompilationLog(preCompiledLibrary, deviceID);
125 test_error(error, "clCompileProgram failed");
126
127 library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &preCompiledLibrary, NULL, NULL, &error);
128 test_error(error, "clLinkProgram failed");
129
130 cl_program objects[] = { program, library };
131 finalProgram = clLinkProgram(context, 1, &deviceID, "", 2, objects, NULL, NULL, &error);
132 test_error(error, "clLinkProgram failed");
133
134 kernel = clCreateKernel(finalProgram, "testKernel", &error);
135 test_error(error, "clCreateKernel failed");
136 }
137
138 else {
139 if (create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &srcPtr, "testKernel", "-cl-std=CL2.0")) {
140 log_error("create_single_kernel_helper failed\n");
141 return -1;
142 }
143 }
144
145 size_t bufferSize = num_elements * sizeof(cl_uint);
146 clMemWrapper buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bufferSize, NULL, &error);
147 test_error(error, "clCreateBuffer failed");
148
149 error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
150 test_error(error, "clSetKernelArg(0) failed");
151
152 // Warning: the order below is very important as SVM buffer cannot be free'd before corresponding mem_object
153 CSVMWrapper svmWrapper;
154 clMemWrapper extraArg;
155 std::vector<cl_uint> extraArgData(num_elements);
156 for (cl_uint i = 0; i < (cl_uint)num_elements; i++)
157 extraArgData[i] = i;
158
159 if (_extraKernelArgMemType != ARG_TYPE_NONE) {
160 if (_extraKernelArgMemType == ARG_TYPE_HOST_PTR) {
161 extraArg = clCreateBuffer(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, bufferSize, &extraArgData[0], &error);
162 test_error(error, "clCreateBuffer failed");
163 }
164
165 else {
166 void *ptr = NULL;
167
168 switch (_extraKernelArgMemType) {
169 case ARG_TYPE_COARSE_GRAINED_SVM:
170 ptr = clSVMAlloc(context, CL_MEM_READ_WRITE, bufferSize, 0);
171 break;
172 case ARG_TYPE_FINE_GRAINED_BUFFER_SVM:
173 ptr = clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_READ_WRITE, bufferSize, 0);
174 break;
175 case ARG_TYPE_FINE_GRAINED_SYSTEM_SVM:
176 ptr = &extraArgData[0];
177 break;
178 case ARG_TYPE_ATOMICS_SVM:
179 ptr = clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS | CL_MEM_READ_WRITE, bufferSize, 0);
180 break;
181 default:
182 break;
183 }
184
185 if(_extraKernelArgMemType != ARG_TYPE_HOST_LOCAL) {
186 if (!ptr) {
187 log_error("Allocation failed\n");
188 return -1;
189 }
190
191 if (_extraKernelArgMemType != ARG_TYPE_FINE_GRAINED_SYSTEM_SVM) {
192 svmWrapper.Attach(context, ptr);
193 }
194
195 if (_extraKernelArgMemType == ARG_TYPE_COARSE_GRAINED_SVM) {
196 error = clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_WRITE, ptr, bufferSize, 0, NULL, NULL);
197 test_error(error, "clEnqueueSVMMap failed");
198 }
199
200 memcpy(ptr, &extraArgData[0], bufferSize);
201
202 if (_extraKernelArgMemType == ARG_TYPE_COARSE_GRAINED_SVM) {
203 error = clEnqueueSVMUnmap(queue, ptr, 0, NULL, NULL);
204 test_error(error, "clEnqueueSVMUnmap failed");
205 clFinish(queue);
206 }
207
208 extraArg = clCreateBuffer(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, bufferSize, ptr, &error);
209 test_error(error, "clCreateBuffer from SVM buffer failed");
210 }
211 }
212
213 if(_extraKernelArgMemType == ARG_TYPE_HOST_LOCAL)
214 error = clSetKernelArg(kernel, 1, bufferSize, NULL);
215 else
216 error = clSetKernelArg(kernel, 1, sizeof(extraArg), &extraArg);
217
218
219 test_error(error, "clSetKernelArg(1) failed");
220 }
221
222 size_t globalWorkGroupSize = num_elements;
223 size_t localWorkGroupSize = 0;
224 error = get_max_common_work_group_size(context, kernel, globalWorkGroupSize, &localWorkGroupSize);
225 test_error(error, "Unable to get common work group size");
226
227 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalWorkGroupSize, &localWorkGroupSize, 0, NULL, NULL);
228 test_error(error, "clEnqueueNDRangeKernel failed");
229
230 // verify results
231 std::vector<cl_uint> results(num_elements);
232
233 error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, bufferSize, &results[0], 0, NULL, NULL);
234 test_error(error, "clEnqueueReadBuffer failed");
235
236 size_t passCount = std::count(results.begin(), results.end(), 1);
237 if (passCount != results.size()) {
238 std::vector<cl_uint>::iterator iter = std::find(results.begin(), results.end(), 0);
239 log_error("Verification on device failed at index %ld\n", std::distance(results.begin(), iter));
240 log_error("%ld out of %ld failed\n", (results.size()-passCount), results.size());
241 return -1;
242 }
243
244 return CL_SUCCESS;
245 }
246
Execute(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)247 int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
248 cl_int result = CL_SUCCESS;
249
250 for (std::vector<std::string>::const_iterator it = _kernels.begin(); it != _kernels.end(); ++it) {
251 log_info("Executing subcase #%ld out of %ld\n", (it - _kernels.begin() + 1), _kernels.size());
252
253 result |= ExecuteSubcase(deviceID, context, queue, num_elements, *it);
254 }
255
256 return result;
257 }
258
259 private:
260 const std::string _libraryCode;
261 const std::vector<std::string> _kernels;
262 const ExtraKernelArgMemType _extraKernelArgMemType;
263 };
264
test_library_function(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)265 int test_library_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
266 const std::string LIBRARY_FUNCTION = common::CONFORMANCE_VERIFY_FENCE +
267 NL
268 NL "bool helperFunction(float *floatp, float val) {"
269 NL " if (!isFenceValid(get_fence(floatp)))"
270 NL " return false;"
271 NL
272 NL " if (*floatp != val)"
273 NL " return false;"
274 NL
275 NL " return true;"
276 NL "}"
277 NL;
278
279 const std::string KERNEL_FUNCTION =
280 NL
281 NL "extern bool helperFunction(float *floatp, float val);"
282 NL
283 NL "__global float gfloat = 1.0f;"
284 NL
285 NL "__kernel void testKernel(__global uint *results) {"
286 NL " uint tid = get_global_id(0);"
287 NL
288 NL " __global float *gfloatp = &gfloat;"
289 NL " __local float lfloat;"
290 NL " lfloat = 2.0f;"
291 NL " __local float *lfloatp = &lfloat;"
292 NL " float pfloat = 3.0f;"
293 NL " __private float *pfloatp = &pfloat;"
294 NL
295 NL " uint failures = 0;"
296 NL
297 NL " failures += helperFunction(gfloatp, gfloat) ? 0 : 1;"
298 NL " failures += helperFunction(lfloatp, lfloat) ? 0 : 1;"
299 NL " failures += helperFunction(pfloatp, pfloat) ? 0 : 1;"
300 NL
301 NL " results[tid] = failures == 0;"
302 NL "}"
303 NL;
304
305 CAdvancedTest test(LIBRARY_FUNCTION, KERNEL_FUNCTION);
306
307 return test.Execute(deviceID, context, queue, num_elements);
308 }
309
test_generic_variable_volatile(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)310 int test_generic_variable_volatile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
311 std::vector<std::string> KERNEL_FUNCTIONS;
312
313 KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
314 NL
315 NL "bool helperFunction(float *floatp, float val) {"
316 NL " if (!isFenceValid(get_fence(floatp)))"
317 NL " return false;"
318 NL
319 NL " if (*floatp != val)"
320 NL " return false;"
321 NL
322 NL " return true;"
323 NL "}"
324 NL
325 NL "__kernel void testKernel(__global uint *results) {"
326 NL " uint tid = get_global_id(0);"
327 NL
328 NL " static __global float val;"
329 NL " val = 0.1f;"
330 NL " float * volatile ptr = &val;"
331 NL
332 NL " results[tid] = helperFunction(ptr, val);"
333 NL "}"
334 NL
335 );
336
337 KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
338 NL
339 NL "bool helperFunction(float *floatp, float val) {"
340 NL " if (!isFenceValid(get_fence(floatp)))"
341 NL " return false;"
342 NL
343 NL " if (*floatp != val)"
344 NL " return false;"
345 NL
346 NL " return true;"
347 NL "}"
348 NL
349 NL "__kernel void testKernel(__global uint *results) {"
350 NL " uint tid = get_global_id(0);"
351 NL
352 NL " __local float val;"
353 NL " val = 0.1f;"
354 NL " float * ptr = &val;"
355 NL
356 NL " results[tid] = helperFunction(ptr, val);"
357 NL "}"
358 NL
359 );
360
361 KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
362 NL
363 NL "bool helperFunction(float *floatp, float val) {"
364 NL " if (!isFenceValid(get_fence(floatp)))"
365 NL " return false;"
366 NL
367 NL " if (*floatp != val)"
368 NL " return false;"
369 NL
370 NL " return true;"
371 NL "}"
372 NL
373 NL "__kernel void testKernel(__global uint *results) {"
374 NL " uint tid = get_global_id(0);"
375 NL
376 NL " __private float val;"
377 NL " val = 0.1f;"
378 NL " float * volatile ptr = &val;"
379 NL
380 NL " results[tid] = helperFunction(ptr, val);"
381 NL "}"
382 NL
383 );
384
385 CAdvancedTest test(KERNEL_FUNCTIONS);
386
387 return test.Execute(deviceID, context, queue, num_elements);
388 }
389
test_generic_variable_const(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)390 int test_generic_variable_const(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
391 std::vector<std::string> KERNEL_FUNCTIONS;
392
393 KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
394 NL
395 NL "bool helperFunction(const float *floatp, float val) {"
396 NL " if (!isFenceValid(get_fence(floatp)))"
397 NL " return false;"
398 NL
399 NL " if (*floatp != val)"
400 NL " return false;"
401 NL
402 NL " return true;"
403 NL "}"
404 NL
405 NL "__kernel void testKernel(__global uint *results) {"
406 NL " uint tid = get_global_id(0);"
407 NL
408 NL " const __private float val = 0.1f;"
409 NL " const float * ptr = &val;"
410 NL
411 NL " results[tid] = helperFunction(ptr, val);"
412 NL "}"
413 NL
414 );
415
416 KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
417 NL
418 NL "bool helperFunction(const float *floatp, float val) {"
419 NL " if (!isFenceValid(get_fence(floatp)))"
420 NL " return false;"
421 NL
422 NL " if (*floatp != val)"
423 NL " return false;"
424 NL
425 NL " return true;"
426 NL "}"
427 NL
428 NL "__kernel void testKernel(__global uint *results) {"
429 NL " uint tid = get_global_id(0);"
430 NL
431 NL " const static __global float val = 0.1f;"
432 NL " const float * ptr = &val;"
433 NL
434 NL " results[tid] = helperFunction(ptr, val);"
435 NL "}"
436 NL
437 );
438
439 CAdvancedTest test(KERNEL_FUNCTIONS);
440
441 return test.Execute(deviceID, context, queue, num_elements);
442 }
443
test_generic_variable_gentype(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)444 int test_generic_variable_gentype(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
445 const std::string KERNEL_FUNCTION_TEMPLATE = common::CONFORMANCE_VERIFY_FENCE +
446 NL
447 NL "%s"
448 NL
449 NL "bool helperFunction(const %s *%sp, %s val) {"
450 NL " if (!isFenceValid(get_fence(%sp)))"
451 NL " return false;"
452 NL
453 NL " return %s(*%sp == val);"
454 NL "}"
455 NL
456 NL "__kernel void testKernel(__global uint *results) {"
457 NL " uint tid = get_global_id(0);"
458 NL
459 NL " %s %s val = (%s)1;"
460 NL " %s * ptr = &val;"
461 NL
462 NL " results[tid] = helperFunction(ptr, val);"
463 NL "}"
464 NL;
465 /* Qualcomm fix: 12502 Gen Addr Space - Fix kernel for generic variable gentype (half) test
466 const std::string KERNEL_FUNCTION_TEMPLATE_HALF = common::CONFORMANCE_VERIFY_FENCE */
467 const std::string vector_sizes[] = { "", "2", "3", "4", "8", "16" };
468 const std::string gentype_base[] = { "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
469 const std::string gentype_others[] = { "bool", "size_t", "ptrdiff_t", "intptr_t", "uintptr_t" };
470
471 const std::string address_spaces[] = { "static __global", "__private" };
472
473 const std::string vector_cmp = "all";
474
475 std::vector<std::string> KERNEL_FUNCTIONS;
476
477 // Add base types plus theirs vector variants
478 for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
479 for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
480 for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
481 char temp_kernel[1024];
482 const std::string fulltype = gentype_base[i] + vector_sizes[j];
483 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
484 "",
485 fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
486 (j > 0 ? vector_cmp.c_str() : ""),
487 fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
488 fulltype.c_str());
489
490 KERNEL_FUNCTIONS.push_back(temp_kernel);
491 }
492 }
493 }
494
495 const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
496
497 // Add double floating types if they are supported
498 if (is_extension_available(deviceID, "cl_khr_fp64")) {
499 for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
500 for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
501 char temp_kernel[1024];
502 const std::string fulltype = std::string("double") + vector_sizes[j];
503 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
504 cl_khr_fp64_pragma.c_str(),
505 fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
506 (j > 0 ? vector_cmp.c_str() : ""),
507 fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
508 fulltype.c_str());
509
510 KERNEL_FUNCTIONS.push_back(temp_kernel);
511 }
512 }
513 }
514 /* Qualcomm fix: 12502 Gen Addr Space - Fix kernel for generic variable gentype (half) test */
515 const std::string cl_khr_fp16_pragma = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable";
516
517 // Add half floating types if they are supported
518 if (is_extension_available(deviceID, "cl_khr_fp16")) {
519 for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
520 for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
521 char temp_kernel[1024];
522 const std::string fulltype = std::string("half") + vector_sizes[j];
523 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
524 cl_khr_fp16_pragma.c_str(),
525 fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
526 (j > 0 ? vector_cmp.c_str() : ""),
527 fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
528 fulltype.c_str());
529 /* Qualcomm fix: end */
530 KERNEL_FUNCTIONS.push_back(temp_kernel);
531 }
532 }
533 }
534
535 // Add other types that do not have vector variants
536 for (size_t i = 0; i < sizeof(gentype_others) / sizeof(gentype_others[0]); i++) {
537 for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
538 char temp_kernel[1024];
539 const std::string fulltype = gentype_others[i];
540 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
541 "",
542 fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
543 "",
544 fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
545 fulltype.c_str());
546
547 KERNEL_FUNCTIONS.push_back(temp_kernel);
548 }
549 }
550
551 CAdvancedTest test(KERNEL_FUNCTIONS);
552
553 return test.Execute(deviceID, context, queue, num_elements);
554 }
555
create_math_kernels(std::vector<std::string> & KERNEL_FUNCTIONS)556 void create_math_kernels(std::vector<std::string>& KERNEL_FUNCTIONS) {
557 const std::string KERNEL_FUNCTION_TEMPLATE =
558 NL
559 NL "__kernel void testKernel(__global uint *results) {"
560 NL " uint tid = get_global_id(0);"
561 NL
562 NL " const %s param1 = %s;"
563 NL " %s param2_generic;"
564 NL " %s param2_reference;"
565 NL " %s * ptr = ¶m2_generic;"
566 NL " %s return_value_generic;"
567 NL " %s return_value_reference;"
568 NL
569 NL " return_value_generic = %s(param1, ptr);"
570 NL " return_value_reference = %s(param1, ¶m2_reference);"
571 NL
572 NL " results[tid] = (%s(*ptr == param2_reference) && %s(return_value_generic == return_value_reference));"
573 NL "}"
574 NL;
575
576 typedef struct {
577 std::string bulitin_name;
578 std::string base_gentype;
579 std::string pointer_gentype;
580 std::string first_param_value;
581 std::string compare_fn;
582 } BuiltinDescriptor;
583
584 BuiltinDescriptor builtins[] = {
585 { "fract", "float", "float", "133.55f", "" },
586 { "frexp", "float2", "int2", "(float2)(24.12f, 99999.7f)", "all" },
587 { "frexp", "float", "int", "1234.5f", "" },
588 { "lgamma_r", "float2", "int2", "(float2)(1000.0f, 9999.5f)", "all" },
589 { "lgamma_r", "float", "int", "1000.0f", "" },
590 { "modf", "float", "float", "1234.56789f", "" },
591 { "sincos", "float", "float", "3.141592f", "" }
592 };
593
594 for (size_t i = 0; i < sizeof(builtins) / sizeof(builtins[0]); i++) {
595 char temp_kernel[1024];
596 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(), builtins[i].base_gentype.c_str(), builtins[i].first_param_value.c_str(),
597 builtins[i].pointer_gentype.c_str(), builtins[i].pointer_gentype.c_str(), builtins[i].pointer_gentype.c_str(), builtins[i].base_gentype.c_str(),
598 builtins[i].base_gentype.c_str(), builtins[i].bulitin_name.c_str(), builtins[i].bulitin_name.c_str(),
599 builtins[i].compare_fn.c_str(), builtins[i].compare_fn.c_str());
600
601 KERNEL_FUNCTIONS.push_back(temp_kernel);
602 }
603
604 // add special case for remquo (3 params)
605 KERNEL_FUNCTIONS.push_back(
606 NL
607 NL "__kernel void testKernel(__global uint *results) {"
608 NL " uint tid = get_global_id(0);"
609 NL
610 NL " const float param1 = 1234.56789f;"
611 NL " const float param2 = 123.456789f;"
612 NL " int param3_generic;"
613 NL " int param3_reference;"
614 NL " int * ptr = ¶m3_generic;"
615 NL " float return_value_generic;"
616 NL " float return_value_reference;"
617 NL
618 NL " return_value_generic = remquo(param1, param2, ptr);"
619 NL " return_value_reference = remquo(param1, param2, ¶m3_reference);"
620 NL
621 NL " results[tid] = (*ptr == param3_reference && return_value_generic == return_value_reference);"
622 NL "}"
623 NL
624 );
625 }
626
get_default_data_for_type(const std::string & type)627 std::string get_default_data_for_type(const std::string& type) {
628 std::string result;
629
630 if (type == "float") {
631 for (int i = 0; i < 10; i++) {
632 for (int j = 0; j < 10; j++) {
633 char temp[10];
634 sprintf(temp, "%d.%df, ", i, j);
635 result += std::string(temp);
636 }
637 }
638 }
639
640 else if (type == "double") {
641 for (int i = 0; i < 10; i++) {
642 for (int j = 0; j < 10; j++) {
643 char temp[10];
644 sprintf(temp, "%d.%d, ", i, j);
645 result += std::string(temp);
646 }
647 }
648 }
649
650 else {
651 for (int i = 0; i < 100; i++) {
652 char temp[10];
653 sprintf(temp, "%d, ", i);
654 result += std::string(temp);
655 }
656 }
657
658 return result;
659 }
660
create_vload_kernels(std::vector<std::string> & KERNEL_FUNCTIONS,cl_device_id deviceID)661 void create_vload_kernels(std::vector<std::string>& KERNEL_FUNCTIONS, cl_device_id deviceID) {
662 const std::string KERNEL_FUNCTION_TEMPLATE_GLOBAL =
663 NL
664 NL "%s"
665 NL "__global %s data[] = { %s };"
666 NL
667 NL "__kernel void testKernel(__global uint *results) {"
668 NL " uint tid = get_global_id(0);"
669 NL
670 NL " // Testing: %s"
671 NL " const %s * ptr = data;"
672 NL " %s%s result_generic = vload%s(2, ptr);"
673 NL " %s%s result_reference = vload%s(2, data);"
674 NL
675 NL " results[tid] = all(result_generic == result_reference);"
676 NL "}"
677 NL;
678
679 const std::string KERNEL_FUNCTION_TEMPLATE_LOCAL =
680 NL
681 NL "%s"
682 NL "__constant %s to_copy_from[] = { %s };"
683 NL
684 NL "__kernel void testKernel(__global uint *results) {"
685 NL " uint tid = get_global_id(0);"
686 NL
687 NL " __local %s data[100];"
688 NL " for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
689 NL " data[i] = to_copy_from[i];"
690 NL
691 NL " const %s * ptr = data;"
692 NL " %s%s result_generic = vload%s(2, ptr);"
693 NL " %s%s result_reference = vload%s(2, data);"
694 NL
695 NL " results[tid] = all(result_generic == result_reference);"
696 NL "}"
697 NL;
698
699 const std::string KERNEL_FUNCTION_TEMPLATE_PRIVATE =
700 NL
701 NL "%s"
702 NL "__kernel void testKernel(__global uint *results) {"
703 NL " uint tid = get_global_id(0);"
704 NL
705 NL " %s data[] = { %s };"
706 NL " // Testing: %s"
707 NL " const %s * ptr = data;"
708 NL " %s%s result_generic = vload%s(2, ptr);"
709 NL " %s%s result_reference = vload%s(2, data);"
710 NL
711 NL " results[tid] = all(result_generic == result_reference);"
712 NL "}"
713 NL;
714
715 const std::string vector_sizes[] = { "2", "3", "4", "8", "16" };
716 const std::string gentype_base[] = { "double", "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
717 const std::string kernel_variants[] = { KERNEL_FUNCTION_TEMPLATE_GLOBAL, KERNEL_FUNCTION_TEMPLATE_LOCAL, KERNEL_FUNCTION_TEMPLATE_PRIVATE };
718
719 const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
720
721 for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
722 const char *pragma_str = "";
723
724 if (i == 0) {
725 if (!is_extension_available(deviceID, "cl_khr_fp64"))
726 continue;
727 else
728 pragma_str = cl_khr_fp64_pragma.c_str();
729 }
730
731 for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
732 for (size_t k = 0; k < sizeof(kernel_variants) / sizeof(kernel_variants[0]); k++) {
733 char temp_kernel[4098];
734 sprintf(temp_kernel, kernel_variants[k].c_str(),
735 pragma_str,
736 gentype_base[i].c_str(),
737 get_default_data_for_type(gentype_base[i]).c_str(),
738 gentype_base[i].c_str(),
739 gentype_base[i].c_str(),
740 gentype_base[i].c_str(), vector_sizes[j].c_str(), vector_sizes[j].c_str(),
741 gentype_base[i].c_str(), vector_sizes[j].c_str(), vector_sizes[j].c_str()
742 );
743
744 KERNEL_FUNCTIONS.push_back(temp_kernel);
745 }
746 }
747 }
748 }
749
create_vstore_kernels(std::vector<std::string> & KERNEL_FUNCTIONS,cl_device_id deviceID)750 void create_vstore_kernels(std::vector<std::string>& KERNEL_FUNCTIONS, cl_device_id deviceID) {
751 const std::string KERNEL_FUNCTION_TEMPLATE_GLOBAL =
752 NL
753 NL "%s"
754 NL "__global %s data_generic[] = { %s };"
755 NL "__global %s data_reference[] = { %s };"
756 NL
757 NL "__kernel void testKernel(__global uint *results) {"
758 NL " uint tid = get_global_id(0);"
759 NL
760 NL " %s%s input = (%s%s)(1);"
761 NL " %s * ptr = data_generic;"
762 NL
763 NL " vstore%s(input, 2, ptr);"
764 NL " vstore%s(input, 2, data_reference);"
765 NL
766 NL " bool result = true;"
767 NL " for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
768 NL " if (data_generic[i] != data_reference[i])"
769 NL " result = false;"
770 NL
771 NL " results[tid] = result;"
772 NL "}"
773 NL;
774
775 const std::string KERNEL_FUNCTION_TEMPLATE_LOCAL =
776 NL
777 NL "%s"
778 NL "__constant %s to_copy_from[] = { %s };"
779 NL
780 NL "__kernel void testKernel(__global uint *results) {"
781 NL " uint tid = get_global_id(0);"
782 NL
783 NL " __local %s data_generic[100];"
784 NL " for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
785 NL " data_generic[i] = to_copy_from[i];"
786 NL
787 NL " __local %s data_reference[100];"
788 NL " for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
789 NL " data_reference[i] = to_copy_from[i];"
790 NL
791 NL " %s%s input = (%s%s)(1);"
792 NL " %s * ptr = data_generic;"
793 NL
794 NL " vstore%s(input, 2, ptr);"
795 NL " vstore%s(input, 2, data_reference);"
796 NL
797 NL " work_group_barrier(CLK_LOCAL_MEM_FENCE);"
798 NL
799 NL " bool result = true;"
800 NL " for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
801 NL " if (data_generic[i] != data_reference[i])"
802 NL " result = false;"
803 NL
804 NL " results[tid] = result;"
805 NL "}"
806 NL;
807
808 const std::string KERNEL_FUNCTION_TEMPLATE_PRIVATE =
809 NL
810 NL "%s"
811 NL "__kernel void testKernel(__global uint *results) {"
812 NL " uint tid = get_global_id(0);"
813 NL
814 NL " __private %s data_generic[] = { %s };"
815 NL " __private %s data_reference[] = { %s };"
816 NL
817 NL " %s%s input = (%s%s)(1);"
818 NL " %s * ptr = data_generic;"
819 NL
820 NL " vstore%s(input, 2, ptr);"
821 NL " vstore%s(input, 2, data_reference);"
822 NL
823 NL " bool result = true;"
824 NL " for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
825 NL " if (data_generic[i] != data_reference[i])"
826 NL " result = false;"
827 NL
828 NL " results[tid] = result;"
829 NL "}"
830 NL;
831
832 const std::string vector_sizes[] = { "2", "3", "4", "8", "16" };
833 const std::string gentype_base[] = { "double", "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
834 const std::string kernel_variants[] = { KERNEL_FUNCTION_TEMPLATE_GLOBAL, KERNEL_FUNCTION_TEMPLATE_LOCAL, KERNEL_FUNCTION_TEMPLATE_PRIVATE };
835
836 const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
837
838 for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
839 const char *pragma_str = "";
840 if (i == 0) {
841 if (!is_extension_available(deviceID, "cl_khr_fp64"))
842 continue;
843 else
844 pragma_str = cl_khr_fp64_pragma.c_str();
845 }
846
847
848 for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
849 for (size_t k = 0; k < sizeof(kernel_variants) / sizeof(kernel_variants[0]); k++) {
850 char temp_kernel[4098];
851
852 switch (k) {
853 case 0: // global template
854 case 2: // private template
855 sprintf(temp_kernel, kernel_variants[k].c_str(),
856 pragma_str,
857 gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
858 gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
859 gentype_base[i].c_str(), vector_sizes[j].c_str(), gentype_base[i].c_str(), vector_sizes[j].c_str(),
860 gentype_base[i].c_str(),
861 vector_sizes[j].c_str(),
862 vector_sizes[j].c_str()
863 );
864 break;
865
866 case 1: // local template
867 sprintf(temp_kernel, kernel_variants[k].c_str(),
868 pragma_str,
869 gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
870 gentype_base[i].c_str(),
871 gentype_base[i].c_str(),
872 gentype_base[i].c_str(), vector_sizes[j].c_str(), gentype_base[i].c_str(), vector_sizes[j].c_str(),
873 gentype_base[i].c_str(),
874 vector_sizes[j].c_str(),
875 vector_sizes[j].c_str()
876 );
877 break;
878 }
879
880 KERNEL_FUNCTIONS.push_back(temp_kernel);
881 }
882 }
883 }
884 }
885
test_builtin_functions(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)886 int test_builtin_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
887 std::vector<std::string> KERNEL_FUNCTIONS;
888
889 create_math_kernels(KERNEL_FUNCTIONS);
890 create_vload_kernels(KERNEL_FUNCTIONS, deviceID);
891 create_vstore_kernels(KERNEL_FUNCTIONS, deviceID);
892
893 CAdvancedTest test(KERNEL_FUNCTIONS);
894
895 return test.Execute(deviceID, context, queue, num_elements);
896 }
897
test_generic_advanced_casting(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)898 int test_generic_advanced_casting(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
899 std::vector<std::string> KERNEL_FUNCTIONS;
900
901 KERNEL_FUNCTIONS.push_back(
902 NL
903 NL "__global char arr[16] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 };"
904 NL
905 NL "__kernel void testKernel(__global uint *results) {"
906 NL " uint tid = get_global_id(0);"
907 NL
908 NL " const int * volatile ptr = (const int *)arr;"
909 NL
910 NL " results[tid] = (ptr[0] == 0x00000000) && (ptr[1] == 0x01010101) && (ptr[2] == 0x02020202) && (ptr[3] == 0x03030303);"
911 NL "}"
912 NL
913 );
914
915 KERNEL_FUNCTIONS.push_back(
916 NL
917 NL "__kernel void testKernel(__global uint *results) {"
918 NL " uint tid = get_global_id(0);"
919 NL
920 NL " __local int i;"
921 NL " i = 0x11112222;"
922 NL " short *ptr = (short *)&i;"
923 NL " local int *lptr = (local int *)ptr;"
924 NL
925 NL " results[tid] = (lptr == &i) && (*lptr == i);"
926 NL "}"
927 NL
928 );
929
930 KERNEL_FUNCTIONS.push_back(
931 NL
932 NL "__kernel void testKernel(__global uint *results) {"
933 NL " uint tid = get_global_id(0);"
934 NL
935 NL " int i = 0x11112222;"
936 NL
937 NL " void *ptr = &i;"
938 NL " int copy = *((int *)ptr);"
939 NL
940 NL " results[tid] = (copy == i);"
941 NL "}"
942 NL
943 );
944
945 CAdvancedTest test(KERNEL_FUNCTIONS);
946
947 return test.Execute(deviceID, context, queue, num_elements);
948 }
949
test_generic_ptr_to_host_mem_svm(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)950 int test_generic_ptr_to_host_mem_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
951 cl_int result = CL_SUCCESS;
952
953 /* Test SVM capabilities and select matching tests */
954 cl_device_svm_capabilities caps;
955 auto version = get_device_cl_version(deviceID);
956 auto expected_min_version = Version(2, 0);
957
958 cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(caps), &caps, NULL);
959 test_error(error, "clGetDeviceInfo(CL_DEVICE_SVM_CAPABILITIES) failed");
960
961 if ((version < expected_min_version)
962 || (version >= Version(3, 0) && caps == 0))
963 return TEST_SKIPPED_ITSELF;
964
965 if (caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) {
966 CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_COARSE_GRAINED_SVM);
967 result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
968 }
969
970 if (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
971 CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_FINE_GRAINED_BUFFER_SVM);
972 result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
973 }
974
975 if (caps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) {
976 CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_FINE_GRAINED_SYSTEM_SVM);
977 result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
978 }
979
980 if (caps & CL_DEVICE_SVM_ATOMICS) {
981 CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_ATOMICS_SVM);
982 result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
983 }
984
985 return result;
986 }
987
test_generic_ptr_to_host_mem(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)988 int test_generic_ptr_to_host_mem(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
989 cl_int result = CL_SUCCESS;
990
991 CAdvancedTest test_global_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_HOST_PTR);
992 result |= test_global_ptr.Execute(deviceID, context, queue, num_elements);
993
994 CAdvancedTest test_local_ptr(common::LOCAL_KERNEL_FUNCTION, ARG_TYPE_HOST_LOCAL);
995 result |= test_local_ptr.Execute(deviceID, context, queue, num_elements / 64);
996
997 return result;
998 }
999