• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/testHarness.h"
17 #include "harness/typeWrappers.h"
18 #include "base.h"
19 
20 #include <string>
21 #include <vector>
22 #include <algorithm>
23 #include <sstream>
24 
25 typedef enum {
26     ARG_TYPE_NONE,
27 
28     ARG_TYPE_HOST_PTR,
29     ARG_TYPE_HOST_LOCAL,
30 
31     ARG_TYPE_COARSE_GRAINED_SVM,
32     ARG_TYPE_FINE_GRAINED_BUFFER_SVM,
33     ARG_TYPE_FINE_GRAINED_SYSTEM_SVM,
34     ARG_TYPE_ATOMICS_SVM
35 } ExtraKernelArgMemType;
36 
37 class CSVMWrapper {
38 public:
CSVMWrapper()39     CSVMWrapper() : ptr_(NULL), context_(NULL) { }
40 
Attach(cl_context context,void * ptr)41     void Attach(cl_context context, void *ptr) {
42         context_ = context;
43         ptr_ = ptr;
44     }
45 
~CSVMWrapper()46     ~CSVMWrapper() {
47         if (ptr_)
48             clSVMFree(context_, ptr_);
49     }
50 
operator void*()51     operator void *() {
52         return ptr_;
53     }
54 
55 private:
56     void *ptr_;
57     cl_context context_;
58 };
59 
60 class CAdvancedTest : public CTest {
61 public:
CAdvancedTest(const std::vector<std::string> & kernel)62     CAdvancedTest(const std::vector<std::string>& kernel) : CTest(), _kernels(kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
63 
64     }
65 
CAdvancedTest(const std::string & library,const std::vector<std::string> & kernel)66     CAdvancedTest(const std::string& library, const std::vector<std::string>& kernel) : CTest(), _libraryCode(library), _kernels(kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
67 
68     }
69 
CAdvancedTest(const std::string & kernel,ExtraKernelArgMemType argType=ARG_TYPE_NONE)70     CAdvancedTest(const std::string& kernel, ExtraKernelArgMemType argType = ARG_TYPE_NONE) : CTest(), _kernels(1, kernel), _extraKernelArgMemType(argType) {
71 
72     }
73 
CAdvancedTest(const std::string & library,const std::string & kernel)74     CAdvancedTest(const std::string& library, const std::string& kernel) : CTest(), _libraryCode(library), _kernels(1, kernel), _extraKernelArgMemType(ARG_TYPE_NONE) {
75 
76     }
77 
PrintCompilationLog(cl_program program,cl_device_id device)78     int PrintCompilationLog(cl_program program, cl_device_id device) {
79         cl_int error;
80         size_t buildLogSize = 0;
81 
82         error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize);
83         test_error(error, "clGetProgramBuildInfo failed");
84 
85         std::string log;
86         log.resize(buildLogSize);
87 
88         error = clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, buildLogSize, &log[0], NULL);
89         test_error(error, "clGetProgramBuildInfo failed");
90 
91         log_error("Build log for device is:\n------------\n");
92         log_error("%s\n", log.c_str() );
93         log_error( "\n----------\n" );
94 
95         return CL_SUCCESS;
96     }
97 
ExecuteSubcase(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,const std::string & src)98     int ExecuteSubcase(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const std::string& src) {
99         cl_int error;
100 
101         clProgramWrapper program, preCompiledLibrary, library, finalProgram;
102         clKernelWrapper kernel;
103 
104         const char *srcPtr = src.c_str();
105 
106         if (!_libraryCode.empty()) {
107             program = clCreateProgramWithSource(context, 1, &srcPtr, NULL, &error);
108             test_error(error, "clCreateProgramWithSource failed");
109 
110             error = clCompileProgram(program, 1, &deviceID, "-cl-std=CL2.0", 0, NULL, NULL, NULL, NULL);
111 
112             if (error != CL_SUCCESS)
113                 PrintCompilationLog(program, deviceID);
114             test_error(error, "clCompileProgram failed");
115 
116             const char *srcPtrLibrary = _libraryCode.c_str();
117 
118             preCompiledLibrary = clCreateProgramWithSource(context, 1, &srcPtrLibrary, NULL, &error);
119             test_error(error, "clCreateProgramWithSource failed");
120 
121             error = clCompileProgram(preCompiledLibrary, 1, &deviceID, "-cl-std=CL2.0", 0, NULL, NULL, NULL, NULL);
122 
123             if (error != CL_SUCCESS)
124                 PrintCompilationLog(preCompiledLibrary, deviceID);
125             test_error(error, "clCompileProgram failed");
126 
127             library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &preCompiledLibrary, NULL, NULL, &error);
128             test_error(error, "clLinkProgram failed");
129 
130             cl_program objects[] = { program, library };
131             finalProgram = clLinkProgram(context, 1, &deviceID, "", 2, objects, NULL, NULL, &error);
132             test_error(error, "clLinkProgram failed");
133 
134             kernel = clCreateKernel(finalProgram, "testKernel", &error);
135             test_error(error, "clCreateKernel failed");
136         }
137 
138         else {
139             if (create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &srcPtr, "testKernel", "-cl-std=CL2.0")) {
140                 log_error("create_single_kernel_helper failed\n");
141                 return -1;
142             }
143         }
144 
145         size_t bufferSize = num_elements * sizeof(cl_uint);
146         clMemWrapper buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, bufferSize, NULL, &error);
147         test_error(error, "clCreateBuffer failed");
148 
149         error = clSetKernelArg(kernel, 0, sizeof(buffer), &buffer);
150         test_error(error, "clSetKernelArg(0) failed");
151 
152         // Warning: the order below is very important as SVM buffer cannot be free'd before corresponding mem_object
153         CSVMWrapper svmWrapper;
154         clMemWrapper extraArg;
155         std::vector<cl_uint> extraArgData(num_elements);
156         for (cl_uint i = 0; i < (cl_uint)num_elements; i++)
157             extraArgData[i] = i;
158 
159         if (_extraKernelArgMemType != ARG_TYPE_NONE) {
160             if (_extraKernelArgMemType == ARG_TYPE_HOST_PTR) {
161                 extraArg = clCreateBuffer(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, bufferSize, &extraArgData[0], &error);
162                 test_error(error, "clCreateBuffer failed");
163             }
164 
165             else {
166                 void *ptr = NULL;
167 
168                 switch (_extraKernelArgMemType) {
169                 case ARG_TYPE_COARSE_GRAINED_SVM:
170                     ptr = clSVMAlloc(context, CL_MEM_READ_WRITE, bufferSize, 0);
171                     break;
172                 case ARG_TYPE_FINE_GRAINED_BUFFER_SVM:
173                     ptr = clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_READ_WRITE, bufferSize, 0);
174                     break;
175                 case ARG_TYPE_FINE_GRAINED_SYSTEM_SVM:
176                     ptr = &extraArgData[0];
177                     break;
178                 case ARG_TYPE_ATOMICS_SVM:
179                     ptr = clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS | CL_MEM_READ_WRITE, bufferSize, 0);
180                     break;
181                 default:
182                     break;
183                 }
184 
185                 if(_extraKernelArgMemType != ARG_TYPE_HOST_LOCAL) {
186                   if (!ptr) {
187                     log_error("Allocation failed\n");
188                     return -1;
189                   }
190 
191                   if (_extraKernelArgMemType != ARG_TYPE_FINE_GRAINED_SYSTEM_SVM) {
192                   svmWrapper.Attach(context, ptr);
193                   }
194 
195                   if (_extraKernelArgMemType == ARG_TYPE_COARSE_GRAINED_SVM) {
196                     error = clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_WRITE, ptr, bufferSize, 0, NULL, NULL);
197                     test_error(error, "clEnqueueSVMMap failed");
198                   }
199 
200                   memcpy(ptr, &extraArgData[0], bufferSize);
201 
202                   if (_extraKernelArgMemType == ARG_TYPE_COARSE_GRAINED_SVM) {
203                     error = clEnqueueSVMUnmap(queue, ptr, 0, NULL, NULL);
204                     test_error(error, "clEnqueueSVMUnmap failed");
205                     clFinish(queue);
206                   }
207 
208                   extraArg = clCreateBuffer(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE, bufferSize, ptr, &error);
209                   test_error(error, "clCreateBuffer from SVM buffer failed");
210                 }
211             }
212 
213             if(_extraKernelArgMemType == ARG_TYPE_HOST_LOCAL)
214               error = clSetKernelArg(kernel, 1, bufferSize, NULL);
215             else
216               error = clSetKernelArg(kernel, 1, sizeof(extraArg), &extraArg);
217 
218 
219             test_error(error, "clSetKernelArg(1) failed");
220         }
221 
222         size_t globalWorkGroupSize = num_elements;
223         size_t localWorkGroupSize = 0;
224         error = get_max_common_work_group_size(context, kernel, globalWorkGroupSize, &localWorkGroupSize);
225         test_error(error, "Unable to get common work group size");
226 
227         error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalWorkGroupSize, &localWorkGroupSize, 0, NULL, NULL);
228         test_error(error, "clEnqueueNDRangeKernel failed");
229 
230         // verify results
231         std::vector<cl_uint> results(num_elements);
232 
233         error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, bufferSize, &results[0], 0, NULL, NULL);
234         test_error(error, "clEnqueueReadBuffer failed");
235 
236         size_t passCount = std::count(results.begin(), results.end(), 1);
237         if (passCount != results.size()) {
238             std::vector<cl_uint>::iterator iter = std::find(results.begin(), results.end(), 0);
239             log_error("Verification on device failed at index %ld\n", std::distance(results.begin(), iter));
240             log_error("%ld out of %ld failed\n", (results.size()-passCount), results.size());
241             return -1;
242         }
243 
244         return CL_SUCCESS;
245     }
246 
Execute(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)247     int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
248         cl_int result = CL_SUCCESS;
249 
250         for (std::vector<std::string>::const_iterator it = _kernels.begin(); it != _kernels.end(); ++it) {
251             log_info("Executing subcase #%ld out of %ld\n", (it - _kernels.begin() + 1), _kernels.size());
252 
253             result |= ExecuteSubcase(deviceID, context, queue, num_elements, *it);
254         }
255 
256         return result;
257     }
258 
259 private:
260     const std::string _libraryCode;
261     const std::vector<std::string> _kernels;
262     const ExtraKernelArgMemType _extraKernelArgMemType;
263 };
264 
test_library_function(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)265 int test_library_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
266     const std::string LIBRARY_FUNCTION = common::CONFORMANCE_VERIFY_FENCE +
267         NL
268         NL "bool helperFunction(float *floatp, float val) {"
269         NL "    if (!isFenceValid(get_fence(floatp)))"
270         NL "        return false;"
271         NL
272         NL "    if (*floatp != val)"
273         NL "        return false;"
274         NL
275         NL "    return true;"
276         NL "}"
277         NL;
278 
279     const std::string KERNEL_FUNCTION =
280         NL
281         NL "extern bool helperFunction(float *floatp, float val);"
282         NL
283         NL "__global float gfloat = 1.0f;"
284         NL
285         NL "__kernel void testKernel(__global uint *results) {"
286         NL "    uint tid = get_global_id(0);"
287         NL
288         NL "    __global float *gfloatp = &gfloat;"
289         NL "    __local float lfloat;"
290         NL "    lfloat = 2.0f;"
291         NL "    __local float *lfloatp = &lfloat;"
292         NL "    float pfloat = 3.0f;"
293         NL "    __private float *pfloatp = &pfloat;"
294         NL
295         NL "    uint failures = 0;"
296         NL
297         NL "    failures += helperFunction(gfloatp, gfloat) ? 0 : 1;"
298         NL "    failures += helperFunction(lfloatp, lfloat) ? 0 : 1;"
299         NL "    failures += helperFunction(pfloatp, pfloat) ? 0 : 1;"
300         NL
301         NL "    results[tid] = failures == 0;"
302         NL "}"
303         NL;
304 
305     CAdvancedTest test(LIBRARY_FUNCTION, KERNEL_FUNCTION);
306 
307     return test.Execute(deviceID, context, queue, num_elements);
308 }
309 
test_generic_variable_volatile(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)310 int test_generic_variable_volatile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
311     std::vector<std::string> KERNEL_FUNCTIONS;
312 
313     KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
314         NL
315         NL "bool helperFunction(float *floatp, float val) {"
316         NL "    if (!isFenceValid(get_fence(floatp)))"
317         NL "        return false;"
318         NL
319         NL "    if (*floatp != val)"
320         NL "        return false;"
321         NL
322         NL "    return true;"
323         NL "}"
324         NL
325         NL "__kernel void testKernel(__global uint *results) {"
326         NL "    uint tid = get_global_id(0);"
327         NL
328         NL "    static __global float val;"
329         NL "    val = 0.1f;"
330         NL "    float * volatile ptr = &val;"
331         NL
332         NL "    results[tid] = helperFunction(ptr, val);"
333         NL "}"
334         NL
335     );
336 
337     KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
338         NL
339         NL "bool helperFunction(float *floatp, float val) {"
340         NL "    if (!isFenceValid(get_fence(floatp)))"
341         NL "        return false;"
342         NL
343         NL "    if (*floatp != val)"
344         NL "        return false;"
345         NL
346         NL "    return true;"
347         NL "}"
348         NL
349         NL "__kernel void testKernel(__global uint *results) {"
350         NL "    uint tid = get_global_id(0);"
351         NL
352         NL "    __local float val;"
353         NL "    val = 0.1f;"
354         NL "    float * ptr = &val;"
355         NL
356         NL "    results[tid] = helperFunction(ptr, val);"
357         NL "}"
358         NL
359     );
360 
361     KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
362         NL
363         NL "bool helperFunction(float *floatp, float val) {"
364         NL "    if (!isFenceValid(get_fence(floatp)))"
365         NL "        return false;"
366         NL
367         NL "    if (*floatp != val)"
368         NL "        return false;"
369         NL
370         NL "    return true;"
371         NL "}"
372         NL
373         NL "__kernel void testKernel(__global uint *results) {"
374         NL "    uint tid = get_global_id(0);"
375         NL
376         NL "    __private float val;"
377         NL "    val = 0.1f;"
378         NL "    float * volatile ptr = &val;"
379         NL
380         NL "    results[tid] = helperFunction(ptr, val);"
381         NL "}"
382         NL
383     );
384 
385     CAdvancedTest test(KERNEL_FUNCTIONS);
386 
387     return test.Execute(deviceID, context, queue, num_elements);
388 }
389 
test_generic_variable_const(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)390 int test_generic_variable_const(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
391     std::vector<std::string> KERNEL_FUNCTIONS;
392 
393     KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
394         NL
395         NL "bool helperFunction(const float *floatp, float val) {"
396         NL "    if (!isFenceValid(get_fence(floatp)))"
397         NL "        return false;"
398         NL
399         NL "    if (*floatp != val)"
400         NL "        return false;"
401         NL
402         NL "    return true;"
403         NL "}"
404         NL
405         NL "__kernel void testKernel(__global uint *results) {"
406         NL "    uint tid = get_global_id(0);"
407         NL
408         NL "    const __private float val = 0.1f;"
409         NL "    const float * ptr = &val;"
410         NL
411         NL "    results[tid] = helperFunction(ptr, val);"
412         NL "}"
413         NL
414     );
415 
416     KERNEL_FUNCTIONS.push_back(common::CONFORMANCE_VERIFY_FENCE +
417         NL
418         NL "bool helperFunction(const float *floatp, float val) {"
419         NL "    if (!isFenceValid(get_fence(floatp)))"
420         NL "        return false;"
421         NL
422         NL "    if (*floatp != val)"
423         NL "        return false;"
424         NL
425         NL "    return true;"
426         NL "}"
427         NL
428         NL "__kernel void testKernel(__global uint *results) {"
429         NL "    uint tid = get_global_id(0);"
430         NL
431         NL "    const static __global float val = 0.1f;"
432         NL "    const float * ptr = &val;"
433         NL
434         NL "    results[tid] = helperFunction(ptr, val);"
435         NL "}"
436         NL
437     );
438 
439     CAdvancedTest test(KERNEL_FUNCTIONS);
440 
441     return test.Execute(deviceID, context, queue, num_elements);
442 }
443 
test_generic_variable_gentype(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)444 int test_generic_variable_gentype(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
445     const std::string KERNEL_FUNCTION_TEMPLATE = common::CONFORMANCE_VERIFY_FENCE +
446         NL
447         NL "%s"
448         NL
449         NL "bool helperFunction(const %s *%sp, %s val) {"
450         NL "    if (!isFenceValid(get_fence(%sp)))"
451         NL "        return false;"
452         NL
453         NL "    return %s(*%sp == val);"
454         NL "}"
455         NL
456         NL "__kernel void testKernel(__global uint *results) {"
457         NL "    uint tid = get_global_id(0);"
458         NL
459         NL "    %s %s val = (%s)1;"
460         NL "    %s * ptr = &val;"
461         NL
462         NL "    results[tid] = helperFunction(ptr, val);"
463         NL "}"
464         NL;
465 /* Qualcomm fix: 12502  Gen Addr Space - Fix kernel for generic variable gentype (half) test
466    const std::string KERNEL_FUNCTION_TEMPLATE_HALF = common::CONFORMANCE_VERIFY_FENCE */
467     const std::string vector_sizes[] = { "", "2", "3", "4", "8", "16" };
468     const std::string gentype_base[] = { "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
469     const std::string gentype_others[] = { "bool", "size_t", "ptrdiff_t", "intptr_t", "uintptr_t" };
470 
471     const std::string address_spaces[] = { "static __global", "__private" };
472 
473     const std::string vector_cmp = "all";
474 
475     std::vector<std::string> KERNEL_FUNCTIONS;
476 
477     // Add base types plus theirs vector variants
478     for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
479         for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
480             for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
481                 char temp_kernel[1024];
482                 const std::string fulltype = gentype_base[i] + vector_sizes[j];
483                 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
484                     "",
485                     fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
486                     (j > 0 ? vector_cmp.c_str() : ""),
487                     fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
488                     fulltype.c_str());
489 
490                 KERNEL_FUNCTIONS.push_back(temp_kernel);
491             }
492         }
493     }
494 
495     const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
496 
497     // Add double floating types if they are supported
498     if (is_extension_available(deviceID, "cl_khr_fp64")) {
499         for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
500             for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
501                 char temp_kernel[1024];
502                 const std::string fulltype = std::string("double") + vector_sizes[j];
503                 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
504                     cl_khr_fp64_pragma.c_str(),
505                     fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
506                     (j > 0 ? vector_cmp.c_str() : ""),
507                     fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
508                     fulltype.c_str());
509 
510                 KERNEL_FUNCTIONS.push_back(temp_kernel);
511             }
512         }
513     }
514 /* Qualcomm fix: 12502  Gen Addr Space - Fix kernel for generic variable gentype (half) test */
515     const std::string cl_khr_fp16_pragma = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable";
516 
517     // Add half floating types if they are supported
518     if (is_extension_available(deviceID, "cl_khr_fp16")) {
519         for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
520             for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
521                 char temp_kernel[1024];
522                 const std::string fulltype = std::string("half") + vector_sizes[j];
523                 sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
524                     cl_khr_fp16_pragma.c_str(),
525                     fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
526                     (j > 0 ? vector_cmp.c_str() : ""),
527                     fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
528                     fulltype.c_str());
529 /* Qualcomm fix: end */
530                 KERNEL_FUNCTIONS.push_back(temp_kernel);
531             }
532         }
533     }
534 
535     // Add other types that do not have vector variants
536     for (size_t i = 0; i < sizeof(gentype_others) / sizeof(gentype_others[0]); i++) {
537         for (size_t k = 0; k < sizeof(address_spaces) / sizeof(address_spaces[0]); k++) {
538             char temp_kernel[1024];
539             const std::string fulltype = gentype_others[i];
540             sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(),
541                 "",
542                 fulltype.c_str(), fulltype.c_str(), fulltype.c_str(), fulltype.c_str(),
543                 "",
544                 fulltype.c_str(), address_spaces[k].c_str(), fulltype.c_str(), fulltype.c_str(),
545                 fulltype.c_str());
546 
547             KERNEL_FUNCTIONS.push_back(temp_kernel);
548         }
549     }
550 
551     CAdvancedTest test(KERNEL_FUNCTIONS);
552 
553     return test.Execute(deviceID, context, queue, num_elements);
554 }
555 
create_math_kernels(std::vector<std::string> & KERNEL_FUNCTIONS)556 void create_math_kernels(std::vector<std::string>& KERNEL_FUNCTIONS) {
557     const std::string KERNEL_FUNCTION_TEMPLATE =
558         NL
559         NL "__kernel void testKernel(__global uint *results) {"
560         NL "    uint tid = get_global_id(0);"
561         NL
562         NL "    const %s param1 = %s;"
563         NL "    %s param2_generic;"
564         NL "    %s param2_reference;"
565         NL "    %s * ptr = &param2_generic;"
566         NL "    %s return_value_generic;"
567         NL "    %s return_value_reference;"
568         NL
569         NL "    return_value_generic = %s(param1, ptr);"
570         NL "    return_value_reference = %s(param1, &param2_reference);"
571         NL
572         NL "    results[tid] = (%s(*ptr == param2_reference) && %s(return_value_generic == return_value_reference));"
573         NL "}"
574         NL;
575 
576     typedef struct {
577         std::string bulitin_name;
578         std::string base_gentype;
579         std::string pointer_gentype;
580         std::string first_param_value;
581         std::string compare_fn;
582     } BuiltinDescriptor;
583 
584     BuiltinDescriptor builtins[] = {
585         { "fract", "float", "float", "133.55f", "" },
586         { "frexp", "float2", "int2", "(float2)(24.12f, 99999.7f)", "all" },
587         { "frexp", "float", "int", "1234.5f", "" },
588         { "lgamma_r", "float2", "int2", "(float2)(1000.0f, 9999.5f)", "all" },
589         { "lgamma_r", "float", "int", "1000.0f", "" },
590         { "modf", "float", "float", "1234.56789f", "" },
591         { "sincos", "float", "float", "3.141592f", "" }
592     };
593 
594     for (size_t i = 0; i < sizeof(builtins) / sizeof(builtins[0]); i++) {
595         char temp_kernel[1024];
596         sprintf(temp_kernel, KERNEL_FUNCTION_TEMPLATE.c_str(), builtins[i].base_gentype.c_str(), builtins[i].first_param_value.c_str(),
597             builtins[i].pointer_gentype.c_str(), builtins[i].pointer_gentype.c_str(), builtins[i].pointer_gentype.c_str(), builtins[i].base_gentype.c_str(),
598             builtins[i].base_gentype.c_str(), builtins[i].bulitin_name.c_str(), builtins[i].bulitin_name.c_str(),
599             builtins[i].compare_fn.c_str(), builtins[i].compare_fn.c_str());
600 
601         KERNEL_FUNCTIONS.push_back(temp_kernel);
602     }
603 
604     // add special case for remquo (3 params)
605     KERNEL_FUNCTIONS.push_back(
606         NL
607         NL "__kernel void testKernel(__global uint *results) {"
608         NL "    uint tid = get_global_id(0);"
609         NL
610         NL "    const float param1 = 1234.56789f;"
611         NL "    const float param2 = 123.456789f;"
612         NL "    int param3_generic;"
613         NL "    int param3_reference;"
614         NL "    int * ptr = &param3_generic;"
615         NL "    float return_value_generic;"
616         NL "    float return_value_reference;"
617         NL
618         NL "    return_value_generic = remquo(param1, param2, ptr);"
619         NL "    return_value_reference = remquo(param1, param2, &param3_reference);"
620         NL
621         NL "    results[tid] = (*ptr == param3_reference && return_value_generic == return_value_reference);"
622         NL "}"
623         NL
624     );
625 }
626 
get_default_data_for_type(const std::string & type)627 std::string get_default_data_for_type(const std::string& type) {
628     std::string result;
629 
630     if (type == "float") {
631         for (int i = 0; i < 10; i++) {
632             for (int j = 0; j < 10; j++) {
633                 char temp[10];
634                 sprintf(temp, "%d.%df, ", i, j);
635                 result += std::string(temp);
636             }
637         }
638     }
639 
640     else if (type == "double") {
641         for (int i = 0; i < 10; i++) {
642             for (int j = 0; j < 10; j++) {
643                 char temp[10];
644                 sprintf(temp, "%d.%d, ", i, j);
645                 result += std::string(temp);
646             }
647         }
648     }
649 
650     else {
651         for (int i = 0; i < 100; i++) {
652             char temp[10];
653             sprintf(temp, "%d, ", i);
654             result += std::string(temp);
655         }
656     }
657 
658     return result;
659 }
660 
create_vload_kernels(std::vector<std::string> & KERNEL_FUNCTIONS,cl_device_id deviceID)661 void create_vload_kernels(std::vector<std::string>& KERNEL_FUNCTIONS, cl_device_id deviceID) {
662     const std::string KERNEL_FUNCTION_TEMPLATE_GLOBAL =
663         NL
664         NL "%s"
665         NL "__global %s data[] = { %s };"
666         NL
667         NL "__kernel void testKernel(__global uint *results) {"
668         NL "    uint tid = get_global_id(0);"
669         NL
670         NL "    // Testing: %s"
671         NL "    const %s * ptr = data;"
672         NL "    %s%s result_generic = vload%s(2, ptr);"
673         NL "    %s%s result_reference = vload%s(2, data);"
674         NL
675         NL "    results[tid] = all(result_generic == result_reference);"
676         NL "}"
677         NL;
678 
679     const std::string KERNEL_FUNCTION_TEMPLATE_LOCAL =
680         NL
681         NL "%s"
682         NL "__constant %s to_copy_from[] = { %s };"
683         NL
684         NL "__kernel void testKernel(__global uint *results) {"
685         NL "    uint tid = get_global_id(0);"
686         NL
687         NL "    __local %s data[100];"
688         NL "    for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
689         NL "        data[i] = to_copy_from[i];"
690         NL
691         NL "    const %s * ptr = data;"
692         NL "    %s%s result_generic = vload%s(2, ptr);"
693         NL "    %s%s result_reference = vload%s(2, data);"
694         NL
695         NL "    results[tid] = all(result_generic == result_reference);"
696         NL "}"
697         NL;
698 
699     const std::string KERNEL_FUNCTION_TEMPLATE_PRIVATE =
700         NL
701         NL "%s"
702         NL "__kernel void testKernel(__global uint *results) {"
703         NL "    uint tid = get_global_id(0);"
704         NL
705         NL "    %s data[] = { %s };"
706         NL "    // Testing: %s"
707         NL "    const %s * ptr = data;"
708         NL "    %s%s result_generic = vload%s(2, ptr);"
709         NL "    %s%s result_reference = vload%s(2, data);"
710         NL
711         NL "    results[tid] = all(result_generic == result_reference);"
712         NL "}"
713         NL;
714 
715     const std::string vector_sizes[] = { "2", "3", "4", "8", "16" };
716     const std::string gentype_base[] = { "double", "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
717     const std::string kernel_variants[] = { KERNEL_FUNCTION_TEMPLATE_GLOBAL, KERNEL_FUNCTION_TEMPLATE_LOCAL, KERNEL_FUNCTION_TEMPLATE_PRIVATE };
718 
719     const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
720 
721     for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
722         const char *pragma_str = "";
723 
724         if (i == 0) {
725             if (!is_extension_available(deviceID, "cl_khr_fp64"))
726                 continue;
727             else
728                 pragma_str = cl_khr_fp64_pragma.c_str();
729         }
730 
731         for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
732             for (size_t k = 0; k < sizeof(kernel_variants) / sizeof(kernel_variants[0]); k++) {
733                 char temp_kernel[4098];
734                 sprintf(temp_kernel, kernel_variants[k].c_str(),
735                     pragma_str,
736                     gentype_base[i].c_str(),
737                     get_default_data_for_type(gentype_base[i]).c_str(),
738                     gentype_base[i].c_str(),
739                     gentype_base[i].c_str(),
740                     gentype_base[i].c_str(), vector_sizes[j].c_str(), vector_sizes[j].c_str(),
741                     gentype_base[i].c_str(), vector_sizes[j].c_str(), vector_sizes[j].c_str()
742                 );
743 
744                 KERNEL_FUNCTIONS.push_back(temp_kernel);
745             }
746         }
747     }
748 }
749 
create_vstore_kernels(std::vector<std::string> & KERNEL_FUNCTIONS,cl_device_id deviceID)750 void create_vstore_kernels(std::vector<std::string>& KERNEL_FUNCTIONS, cl_device_id deviceID) {
751     const std::string KERNEL_FUNCTION_TEMPLATE_GLOBAL =
752         NL
753         NL "%s"
754         NL "__global %s data_generic[] = { %s };"
755         NL "__global %s data_reference[] = { %s };"
756         NL
757         NL "__kernel void testKernel(__global uint *results) {"
758         NL "    uint tid = get_global_id(0);"
759         NL
760         NL "    %s%s input = (%s%s)(1);"
761         NL "    %s * ptr = data_generic;"
762         NL
763         NL "    vstore%s(input, 2, ptr);"
764         NL "    vstore%s(input, 2, data_reference);"
765         NL
766         NL "    bool result = true;"
767         NL "    for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
768         NL "        if (data_generic[i] != data_reference[i])"
769         NL "            result = false;"
770         NL
771         NL "    results[tid] = result;"
772         NL "}"
773         NL;
774 
775     const std::string KERNEL_FUNCTION_TEMPLATE_LOCAL =
776         NL
777         NL "%s"
778         NL "__constant %s to_copy_from[] = { %s };"
779         NL
780         NL "__kernel void testKernel(__global uint *results) {"
781         NL "    uint tid = get_global_id(0);"
782         NL
783         NL "    __local %s data_generic[100];"
784         NL "    for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
785         NL "        data_generic[i] = to_copy_from[i];"
786         NL
787         NL "    __local %s data_reference[100];"
788         NL "    for (int i = 0; i < sizeof(to_copy_from) / sizeof(to_copy_from[0]); i++)"
789         NL "        data_reference[i] = to_copy_from[i];"
790         NL
791         NL "    %s%s input = (%s%s)(1);"
792         NL "    %s * ptr = data_generic;"
793         NL
794         NL "    vstore%s(input, 2, ptr);"
795         NL "    vstore%s(input, 2, data_reference);"
796         NL
797         NL "    work_group_barrier(CLK_LOCAL_MEM_FENCE);"
798         NL
799         NL "    bool result = true;"
800         NL "    for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
801         NL "        if (data_generic[i] != data_reference[i])"
802         NL "            result = false;"
803         NL
804         NL "    results[tid] = result;"
805         NL "}"
806         NL;
807 
808     const std::string KERNEL_FUNCTION_TEMPLATE_PRIVATE =
809         NL
810         NL "%s"
811         NL "__kernel void testKernel(__global uint *results) {"
812         NL "    uint tid = get_global_id(0);"
813         NL
814         NL "    __private %s data_generic[] = { %s };"
815         NL "    __private %s data_reference[] = { %s };"
816         NL
817         NL "    %s%s input = (%s%s)(1);"
818         NL "    %s * ptr = data_generic;"
819         NL
820         NL "    vstore%s(input, 2, ptr);"
821         NL "    vstore%s(input, 2, data_reference);"
822         NL
823         NL "    bool result = true;"
824         NL "    for (int i = 0; i < sizeof(data_generic) / sizeof(data_generic[0]); i++)"
825         NL "        if (data_generic[i] != data_reference[i])"
826         NL "            result = false;"
827         NL
828         NL "    results[tid] = result;"
829         NL "}"
830         NL;
831 
832     const std::string vector_sizes[] = { "2", "3", "4", "8", "16" };
833     const std::string gentype_base[] = { "double", "float", "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
834     const std::string kernel_variants[] = { KERNEL_FUNCTION_TEMPLATE_GLOBAL, KERNEL_FUNCTION_TEMPLATE_LOCAL, KERNEL_FUNCTION_TEMPLATE_PRIVATE };
835 
836     const std::string cl_khr_fp64_pragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
837 
838     for (size_t i = 0; i < sizeof(gentype_base) / sizeof(gentype_base[0]); i++) {
839         const char *pragma_str = "";
840         if (i == 0) {
841             if (!is_extension_available(deviceID, "cl_khr_fp64"))
842                 continue;
843             else
844                 pragma_str = cl_khr_fp64_pragma.c_str();
845         }
846 
847 
848         for (size_t j = 0; j < sizeof(vector_sizes) / sizeof(vector_sizes[0]); j++) {
849             for (size_t k = 0; k < sizeof(kernel_variants) / sizeof(kernel_variants[0]); k++) {
850                 char temp_kernel[4098];
851 
852                 switch (k) {
853                     case 0: // global template
854                     case 2: // private template
855                         sprintf(temp_kernel, kernel_variants[k].c_str(),
856                             pragma_str,
857                             gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
858                             gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
859                             gentype_base[i].c_str(), vector_sizes[j].c_str(), gentype_base[i].c_str(), vector_sizes[j].c_str(),
860                             gentype_base[i].c_str(),
861                             vector_sizes[j].c_str(),
862                             vector_sizes[j].c_str()
863                         );
864                         break;
865 
866                     case 1: // local template
867                         sprintf(temp_kernel, kernel_variants[k].c_str(),
868                             pragma_str,
869                             gentype_base[i].c_str(), get_default_data_for_type(gentype_base[i]).c_str(),
870                             gentype_base[i].c_str(),
871                             gentype_base[i].c_str(),
872                             gentype_base[i].c_str(), vector_sizes[j].c_str(), gentype_base[i].c_str(), vector_sizes[j].c_str(),
873                             gentype_base[i].c_str(),
874                             vector_sizes[j].c_str(),
875                             vector_sizes[j].c_str()
876                         );
877                         break;
878                 }
879 
880                 KERNEL_FUNCTIONS.push_back(temp_kernel);
881             }
882         }
883     }
884 }
885 
test_builtin_functions(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)886 int test_builtin_functions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
887     std::vector<std::string> KERNEL_FUNCTIONS;
888 
889     create_math_kernels(KERNEL_FUNCTIONS);
890     create_vload_kernels(KERNEL_FUNCTIONS, deviceID);
891     create_vstore_kernels(KERNEL_FUNCTIONS, deviceID);
892 
893     CAdvancedTest test(KERNEL_FUNCTIONS);
894 
895     return test.Execute(deviceID, context, queue, num_elements);
896 }
897 
test_generic_advanced_casting(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)898 int test_generic_advanced_casting(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
899     std::vector<std::string> KERNEL_FUNCTIONS;
900 
901     KERNEL_FUNCTIONS.push_back(
902         NL
903         NL "__global char arr[16] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 };"
904         NL
905         NL "__kernel void testKernel(__global uint *results) {"
906         NL "    uint tid = get_global_id(0);"
907         NL
908         NL "    const int * volatile ptr = (const int *)arr;"
909         NL
910         NL "    results[tid] = (ptr[0] == 0x00000000) && (ptr[1] == 0x01010101) && (ptr[2] == 0x02020202) && (ptr[3] == 0x03030303);"
911         NL "}"
912         NL
913     );
914 
915     KERNEL_FUNCTIONS.push_back(
916         NL
917         NL "__kernel void testKernel(__global uint *results) {"
918         NL "    uint tid = get_global_id(0);"
919         NL
920         NL "    __local int i;"
921         NL "    i = 0x11112222;"
922         NL "    short *ptr = (short *)&i;"
923         NL "    local int *lptr = (local int *)ptr;"
924         NL
925         NL "    results[tid] = (lptr == &i) && (*lptr == i);"
926         NL "}"
927         NL
928     );
929 
930     KERNEL_FUNCTIONS.push_back(
931         NL
932         NL "__kernel void testKernel(__global uint *results) {"
933         NL "    uint tid = get_global_id(0);"
934         NL
935         NL "    int i = 0x11112222;"
936         NL
937         NL "    void *ptr = &i;"
938         NL "    int copy = *((int *)ptr);"
939         NL
940         NL "    results[tid] = (copy == i);"
941         NL "}"
942         NL
943     );
944 
945     CAdvancedTest test(KERNEL_FUNCTIONS);
946 
947     return test.Execute(deviceID, context, queue, num_elements);
948 }
949 
test_generic_ptr_to_host_mem_svm(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)950 int test_generic_ptr_to_host_mem_svm(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
951     cl_int result = CL_SUCCESS;
952 
953     /* Test SVM capabilities and select matching tests */
954     cl_device_svm_capabilities caps;
955     auto version = get_device_cl_version(deviceID);
956     auto expected_min_version = Version(2, 0);
957 
958     cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(caps), &caps, NULL);
959     test_error(error, "clGetDeviceInfo(CL_DEVICE_SVM_CAPABILITIES) failed");
960 
961     if ((version < expected_min_version)
962         || (version >= Version(3, 0) && caps == 0))
963         return TEST_SKIPPED_ITSELF;
964 
965     if (caps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) {
966         CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_COARSE_GRAINED_SVM);
967         result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
968     }
969 
970     if (caps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
971         CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_FINE_GRAINED_BUFFER_SVM);
972         result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
973     }
974 
975     if (caps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) {
976         CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_FINE_GRAINED_SYSTEM_SVM);
977         result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
978     }
979 
980     if (caps & CL_DEVICE_SVM_ATOMICS) {
981         CAdvancedTest test_global_svm_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_ATOMICS_SVM);
982         result |= test_global_svm_ptr.Execute(deviceID, context, queue, num_elements);
983     }
984 
985     return result;
986 }
987 
test_generic_ptr_to_host_mem(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)988 int test_generic_ptr_to_host_mem(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
989     cl_int result = CL_SUCCESS;
990 
991     CAdvancedTest test_global_ptr(common::GLOBAL_KERNEL_FUNCTION, ARG_TYPE_HOST_PTR);
992     result |= test_global_ptr.Execute(deviceID, context, queue, num_elements);
993 
994     CAdvancedTest test_local_ptr(common::LOCAL_KERNEL_FUNCTION, ARG_TYPE_HOST_LOCAL);
995     result |= test_local_ptr.Execute(deviceID, context, queue, num_elements / 64);
996 
997     return result;
998 }
999