1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24 #include "harness/conversions.h"
25 #include "harness/ThreadPool.h"
26
27 #define NUM_TESTS 23
28
29 #define LONG_MATH_SHIFT_SIZE 26
30 #define QUICK_MATH_SHIFT_SIZE 16
31
32 static const char *kernel_code =
33 "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
34 "{\n"
35 " int tid = get_global_id(0);\n"
36 "\n"
37 " dst[tid] = srcA[tid] %s srcB[tid];\n"
38 "}\n";
39
40 static const char *kernel_code_V3 =
41 "__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
42 "{\n"
43 " int tid = get_global_id(0);\n"
44 "\n"
45 " vstore3( vload3( tid, srcA ) %s vload3( tid, srcB), tid, dst );\n"
46 "}\n";
47
48 static const char *kernel_code_V3_scalar_vector =
49 "__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
50 "{\n"
51 " int tid = get_global_id(0);\n"
52 "\n"
53 " vstore3( srcA[tid] %s vload3( tid, srcB), tid, dst );\n"
54 "}\n";
55
56 static const char *kernel_code_V3_vector_scalar =
57 "__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
58 "{\n"
59 " int tid = get_global_id(0);\n"
60 "\n"
61 " vstore3( vload3( tid, srcA ) %s srcB[tid], tid, dst );\n"
62 "}\n";
63
64
65 // Separate kernel here because it does not fit the pattern
66 static const char *not_kernel_code =
67 "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
68 "{\n"
69 " int tid = get_global_id(0);\n"
70 "\n"
71 " dst[tid] = %ssrcA[tid];\n"
72 "}\n";
73
74 static const char *not_kernel_code_V3 =
75 "__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
76 "{\n"
77 " int tid = get_global_id(0);\n"
78 "\n"
79 " vstore3( %s vload3( tid, srcA ), tid, dst );\n"
80 "}\n";
81
82 static const char *kernel_code_scalar_shift =
83 "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
84 "{\n"
85 " int tid = get_global_id(0);\n"
86 "\n"
87 " dst[tid] = srcA[tid] %s srcB[tid]%s;\n"
88 "}\n";
89
90 static const char *kernel_code_scalar_shift_V3 =
91 "__kernel void test(__global %s/*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
92 "{\n"
93 " int tid = get_global_id(0);\n"
94 "\n"
95 " vstore3( vload3( tid, srcA) %s vload3( tid, srcB )%s, tid, dst );\n"
96 "}\n";
97
98 static const char *kernel_code_question_colon =
99 "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
100 "{\n"
101 " int tid = get_global_id(0);\n"
102 "\n"
103 " dst[tid] = (srcA[tid]%s < srcB[tid]%s) ? srcA[tid] : srcB[tid];\n"
104 "}\n";
105
106 static const char *kernel_code_question_colon_V3 =
107 "__kernel void test(__global %s/*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
108 "{\n"
109 " int tid = get_global_id(0);\n"
110 "\n"
111 " vstore3( (vload3( tid, srcA)%s < vload3(tid, srcB)%s) ? vload3( tid, srcA) : vload3( tid, srcB), tid, dst );\n"
112 "}\n";
113
114
115
116
117 // External verification and data generation functions
118 extern const char *tests[];
119 extern const char *test_names[];
120 extern int verify_long(int test, size_t vector_size, cl_long *inptrA, cl_long *inptrB, cl_long *outptr, size_t n);
121 extern void init_long_data(uint64_t indx, int num_elements, cl_long *input_ptr[], MTdata d) ;
122 extern int verify_ulong(int test, size_t vector_size, cl_ulong *inptrA, cl_ulong *inptrB, cl_ulong *outptr, size_t n);
123 extern void init_ulong_data(uint64_t indx, int num_elements, cl_ulong *input_ptr[], MTdata d) ;
124 extern int verify_int(int test, size_t vector_size, cl_int *inptrA, cl_int *inptrB, cl_int *outptr, size_t n);
125 extern void init_int_data(uint64_t indx, int num_elements, cl_int *input_ptr[], MTdata d) ;
126 extern int verify_uint(int test, size_t vector_size, cl_uint *inptrA, cl_uint *inptrB, cl_uint *outptr, size_t n);
127 extern void init_uint_data(uint64_t indx, int num_elements, cl_uint *input_ptr[], MTdata d) ;
128 extern int verify_short(int test, size_t vector_size, cl_short *inptrA, cl_short *inptrB, cl_short *outptr, size_t n);
129 extern void init_short_data(uint64_t indx, int num_elements, cl_short *input_ptr[], MTdata d) ;
130 extern int verify_ushort(int test, size_t vector_size, cl_ushort *inptrA, cl_ushort *inptrB, cl_ushort *outptr, size_t n);
131 extern void init_ushort_data(uint64_t indx, int num_elements, cl_ushort *input_ptr[], MTdata d) ;
132 extern int verify_char(int test, size_t vector_size, cl_char *inptrA, cl_char *inptrB, cl_char *outptr, size_t n);
133 extern void init_char_data(uint64_t indx, int num_elements, cl_char *input_ptr[], MTdata d) ;
134 extern int verify_uchar(int test, size_t vector_size, cl_uchar *inptrA, cl_uchar *inptrB, cl_uchar *outptr, size_t n);
135 extern void init_uchar_data(uint64_t indx, int num_elements, cl_uchar *input_ptr[], MTdata d) ;
136
137 // Supported type list
138 const ExplicitType types[] = {
139 kChar,
140 kUChar,
141 kShort,
142 kUShort,
143 kInt,
144 kUInt,
145 kLong,
146 kULong,
147 };
148
149 enum TestStyle
150 {
151 kDontCare=0,
152 kBothVectors,
153 kInputAScalar,
154 kInputBScalar,
155 kVectorScalarScalar, // for the ?: operator only; indicates vector ? scalar : scalar.
156 kInputCAlsoScalar = 0x80 // Or'ed flag to indicate that the selector for the ?: operator is also scalar
157 };
158
159 typedef struct _perThreadData
160 {
161 cl_mem m_streams[3];
162 cl_int *m_input_ptr[2], *m_output_ptr;
163 size_t m_type_size;
164 cl_program m_program[NUM_TESTS];
165 cl_kernel m_kernel[NUM_TESTS];
166 } perThreadData;
167
168
perThreadDataNew()169 perThreadData * perThreadDataNew()
170 {
171 perThreadData * pThis = (perThreadData *)malloc(sizeof(perThreadData));
172
173
174 memset(pThis->m_program, 0, sizeof(cl_program)*NUM_TESTS);
175 memset(pThis->m_kernel, 0, sizeof(cl_kernel)*NUM_TESTS);
176
177 pThis->m_input_ptr[0] = pThis->m_input_ptr[1] = NULL;
178 pThis->m_output_ptr = NULL;
179
180 return pThis;
181 }
182
183
perThreadDataDestroy(perThreadData * pThis)184 void perThreadDataDestroy(perThreadData * pThis)
185 {
186 int i;
187 // cleanup
188 clReleaseMemObject(pThis->m_streams[0]);
189 clReleaseMemObject(pThis->m_streams[1]);
190 clReleaseMemObject(pThis->m_streams[2]);
191 for (i=0; i<NUM_TESTS; i++)
192 {
193 if (pThis->m_kernel[i] != NULL) clReleaseKernel(pThis->m_kernel[i]);
194 if (pThis->m_program[i] != NULL) clReleaseProgram(pThis->m_program[i]);
195 }
196 free(pThis->m_input_ptr[0]);
197 free(pThis->m_input_ptr[1]);
198 free(pThis->m_output_ptr);
199
200 free(pThis);
201 }
202
203
perThreadDataInit(perThreadData * pThis,ExplicitType type,int num_elements,int vectorSize,int inputAVecSize,int inputBVecSize,cl_context context,int start_test_ID,int end_test_ID,int testID)204 cl_int perThreadDataInit(perThreadData * pThis, ExplicitType type,
205 int num_elements, int vectorSize,
206 int inputAVecSize, int inputBVecSize,
207 cl_context context, int start_test_ID,
208 int end_test_ID, int testID)
209 {
210 int i;
211 const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
212
213 const char *type_name = get_explicit_type_name(type);
214 pThis->m_type_size = get_explicit_type_size(type);
215 int err;
216 // Used for the && and || tests where the vector case returns a signed value
217 const char *signed_type_name;
218 switch (type) {
219 case kChar:
220 case kUChar:
221 signed_type_name = get_explicit_type_name(kChar);
222 break;
223 case kShort:
224 case kUShort:
225 signed_type_name = get_explicit_type_name(kShort);
226 break;
227 case kInt:
228 case kUInt:
229 signed_type_name = get_explicit_type_name(kInt);
230 break;
231 case kLong:
232 case kULong:
233 signed_type_name = get_explicit_type_name(kLong);
234 break;
235 default:
236 log_error("Invalid type.\n");
237 return -1;
238 break;
239 }
240
241 pThis->m_input_ptr[0] =
242 (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
243 pThis->m_input_ptr[1] =
244 (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
245 pThis->m_output_ptr =
246 (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
247 pThis->m_streams[0] = clCreateBuffer(
248 context, CL_MEM_READ_WRITE,
249 pThis->m_type_size * num_elements * inputAVecSize, NULL, &err);
250
251 test_error(err, "clCreateBuffer failed");
252
253 pThis->m_streams[1] = clCreateBuffer(
254 context, CL_MEM_READ_WRITE,
255 pThis->m_type_size * num_elements * inputBVecSize, NULL, &err);
256
257 test_error(err, "clCreateBuffer failed");
258
259 pThis->m_streams[2] = clCreateBuffer(
260 context, CL_MEM_READ_WRITE,
261 pThis->m_type_size * num_elements * vectorSize, NULL, &err);
262
263 test_error(err, "clCreateBuffer failed");
264
265 const char *vectorString = sizeNames[ vectorSize ];
266 const char *inputAVectorString = sizeNames[ inputAVecSize ];
267 const char *inputBVectorString = sizeNames[ inputBVecSize ];
268
269 if (testID == -1)
270 {
271 log_info("\tTesting %s%s (%d bytes)...\n", type_name, vectorString, (int)(pThis->m_type_size*vectorSize));
272 }
273
274 char programString[4096];
275 const char *ptr;
276
277
278 const char * kernel_code_base = ( vectorSize != 3 ) ? kernel_code : ( inputAVecSize == 1 ) ? kernel_code_V3_scalar_vector : ( inputBVecSize == 1 ) ? kernel_code_V3_vector_scalar : kernel_code_V3;
279
280 for (i=start_test_ID; i<end_test_ID; i++) {
281 switch (i) {
282 case 10:
283 case 11:
284 sprintf(programString, vectorSize == 3 ? kernel_code_scalar_shift_V3 : kernel_code_scalar_shift, type_name, inputAVectorString, type_name, inputBVectorString,
285 type_name, vectorString, tests[i], ((vectorSize == 1) ? "":".s0"));
286 break;
287 case 12:
288 sprintf(programString, vectorSize == 3 ? not_kernel_code_V3 : not_kernel_code, type_name, inputAVectorString, type_name, inputBVectorString,
289 type_name, vectorString, tests[i]);
290 break;
291 case 13:
292 sprintf(programString, vectorSize == 3 ? kernel_code_question_colon_V3 : kernel_code_question_colon,
293 type_name, inputAVectorString, type_name, inputBVectorString,
294 type_name, vectorString, ((vectorSize == 1) ? "":".s0"), ((vectorSize == 1) ? "":".s0")) ;
295 break;
296 case 14:
297 case 15:
298 case 16:
299 case 17:
300 case 18:
301 case 19:
302 case 20:
303 case 21:
304 // Need an unsigned result here for vector sizes > 1
305 sprintf(programString, kernel_code_base, type_name, inputAVectorString, type_name, inputBVectorString,
306 ((vectorSize == 1) ? type_name : signed_type_name), vectorString, tests[i]);
307 break;
308 case 22:
309 // Need an unsigned result here for vector sizes > 1
310 sprintf(programString, vectorSize == 3 ? not_kernel_code_V3 : not_kernel_code, type_name, inputAVectorString, type_name, inputBVectorString,
311 ((vectorSize == 1) ? type_name : signed_type_name), vectorString, tests[i]);
312 break;
313 default:
314 sprintf(programString, kernel_code_base, type_name, inputAVectorString, type_name, inputBVectorString,
315 type_name, vectorString, tests[i]);
316 break;
317 }
318
319 //printf("kernel: %s\n", programString);
320 ptr = programString;
321 err = create_single_kernel_helper( context,
322 &(pThis->m_program[ i ]),
323 &(pThis->m_kernel[ i ]), 1,
324 &ptr, "test" );
325 test_error( err, "Unable to create test kernel" );
326 err = clSetKernelArg(pThis->m_kernel[i], 0,
327 sizeof pThis->m_streams[0],
328 &(pThis->m_streams[0]) );
329 err |= clSetKernelArg(pThis->m_kernel[i], 1,
330 sizeof pThis->m_streams[1],
331 &(pThis->m_streams[1]) );
332 err |= clSetKernelArg(pThis->m_kernel[i], 2,
333 sizeof pThis->m_streams[2],
334 &(pThis->m_streams[2]) );
335 test_error(err, "clSetKernelArgs failed");
336 }
337
338 return CL_SUCCESS;
339 }
340
341 typedef struct _globalThreadData
342 {
343 cl_device_id m_deviceID;
344 cl_context m_context;
345 // cl_command_queue m_queue;
346 int m_num_elements;
347 int m_threadcount;
348 int m_vectorSize;
349 int m_num_runs_shift;
350 TestStyle m_style;
351 ExplicitType m_type;
352 MTdata * m_pRandData;
353 uint64_t m_offset;
354 int m_testID;
355 perThreadData **m_arrPerThreadData;
356 } globalThreadData;
357
358
359
globalThreadDataNew(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,int vectorSize,TestStyle style,int num_runs_shift,ExplicitType type,int testID,int threadcount)360 globalThreadData * globalThreadDataNew(cl_device_id deviceID, cl_context context,
361 cl_command_queue queue, int num_elements,
362 int vectorSize, TestStyle style, int num_runs_shift,
363 ExplicitType type, int testID,
364 int threadcount)
365 {
366 int i;
367 globalThreadData * pThis = (globalThreadData *)malloc(sizeof(globalThreadData));
368 pThis->m_deviceID = deviceID;
369 pThis->m_context = context;
370 // pThis->m_queue = queue;
371 pThis->m_num_elements = num_elements;
372 pThis->m_num_runs_shift = num_runs_shift;
373 pThis->m_vectorSize = vectorSize;
374 pThis->m_style = style;
375 pThis->m_type = type;
376 pThis->m_offset = (uint64_t)0;
377 pThis->m_testID = testID;
378 pThis->m_arrPerThreadData = NULL;
379 pThis->m_threadcount = threadcount;
380
381 pThis->m_pRandData = (MTdata *)malloc(threadcount*sizeof(MTdata));
382 pThis->m_arrPerThreadData = (perThreadData **)
383 malloc(threadcount*sizeof(perThreadData *));
384 for(i=0; i < threadcount; ++i)
385 {
386 pThis->m_pRandData[i] = init_genrand(i+1);
387 pThis->m_arrPerThreadData[i] = NULL;
388 }
389
390 return pThis;
391 }
392
globalThreadDataDestroy(globalThreadData * pThis)393 void globalThreadDataDestroy(globalThreadData * pThis)
394 {
395 int i;
396
397 for(i=0; i < pThis->m_threadcount; ++i)
398 {
399 free_mtdata(pThis->m_pRandData[i]);
400 if(pThis->m_arrPerThreadData[i] != NULL)
401 {
402 perThreadDataDestroy(pThis->m_arrPerThreadData[i]);
403 }
404 }
405 free(pThis->m_arrPerThreadData);
406 free(pThis->m_pRandData);
407 free(pThis);
408 }
409
410 int
411 test_integer_ops(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int vectorSize, TestStyle style, int num_runs_shift, ExplicitType type, int testID, MTdata randIn, uint64_t startIndx, uint64_t endIndx,
412 perThreadData ** ppThreadData);
413
414
test_integer_ops_do_thread(cl_uint job_id,cl_uint thread_id,void * userInfo)415 cl_int test_integer_ops_do_thread( cl_uint job_id, cl_uint thread_id, void *userInfo )
416 {
417 cl_int error; cl_int result;
418 globalThreadData * threadInfoGlobal = (globalThreadData *)userInfo;
419 cl_command_queue queue;
420
421 #if THREAD_DEBUG
422 log_error("Thread %x (job %x) about to create command queue\n",
423 thread_id, job_id);
424 #endif
425
426 queue = clCreateCommandQueue (threadInfoGlobal->m_context,
427 threadInfoGlobal->m_deviceID,0,
428 &error);
429
430 if(error != CL_SUCCESS)
431 {
432 log_error("Thread %x (job %x) could not create command queue\n",
433 thread_id, job_id);
434 return error; // should we clean up the queue too?
435 }
436
437 #if THREAD_DEBUG
438 log_error("Thread %x (job %x) created command queue\n",
439 thread_id, job_id);
440 #endif
441
442 result = test_integer_ops( threadInfoGlobal->m_deviceID,
443 threadInfoGlobal->m_context,
444 queue,
445 threadInfoGlobal->m_num_elements,
446 threadInfoGlobal->m_vectorSize, threadInfoGlobal->m_style,
447 threadInfoGlobal->m_num_runs_shift,
448 threadInfoGlobal->m_type, threadInfoGlobal->m_testID,
449 threadInfoGlobal->m_pRandData[thread_id],
450 threadInfoGlobal->m_offset + threadInfoGlobal->m_num_elements*job_id,
451 threadInfoGlobal->m_offset + threadInfoGlobal->m_num_elements*(job_id+1),
452 &(threadInfoGlobal->m_arrPerThreadData[thread_id])
453 );
454
455 if(result != 0)
456 {
457 log_error("Thread %x (job %x) failed test_integer_ops with result %x\n",
458 thread_id, job_id, result);
459 // return error;
460 }
461
462
463 error = clReleaseCommandQueue(queue);
464 if(error != CL_SUCCESS)
465 {
466 log_error("Thread %x (job %x) could not release command queue\n",
467 thread_id, job_id);
468 return error;
469 }
470 return result;
471 }
472
473 int
test_integer_ops_threaded(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,int vectorSize,TestStyle style,int num_runs_shift,ExplicitType type,int testID)474 test_integer_ops_threaded(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int vectorSize, TestStyle style, int num_runs_shift, ExplicitType type, int testID)
475 {
476 globalThreadData * pThreadInfo = NULL;
477 cl_int result=0;
478 cl_uint threadcount = GetThreadCount();
479
480 // Check to see if we are using single threaded mode on other than a 1.0 device
481 if (getenv( "CL_TEST_SINGLE_THREADED" )) {
482
483 char device_version[1024] = { 0 };
484 result = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL );
485 if(result != CL_SUCCESS)
486 {
487 log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n", result);
488 return result;
489 }
490
491 if (strcmp("OpenCL 1.0 ",device_version)) {
492 log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n");
493 }
494 }
495
496 // This test will run threadcount threads concurrently; each thread will execute test_integer_ops()
497 // which will allocate 2 OpenCL buffers on the device; each buffer has size num_elements * type_size * vectorSize.
498 // We need to make sure that the total device memory allocated by all threads does not exceed the maximum
499 // memory on the device. If it does, we decrease num_elements until all threads combined will not
500 // over-subscribe device memory.
501 cl_ulong maxDeviceGlobalMem;
502 result = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxDeviceGlobalMem), &maxDeviceGlobalMem, NULL);
503 if(result != CL_SUCCESS)
504 {
505 log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n", result);
506 return result;
507 }
508
509 if (maxDeviceGlobalMem > (cl_ulong)SIZE_MAX) {
510 maxDeviceGlobalMem = (cl_ulong)SIZE_MAX;
511 }
512
513 // Let's not take all device memory - reduce by 75%
514 maxDeviceGlobalMem = (maxDeviceGlobalMem * 3) >> 2;
515 // Now reduce num_elements so that the total device memory usage does not exceed 75% of global device memory.
516 size_t type_size = get_explicit_type_size(type);
517 while ((cl_ulong)threadcount * 4 * num_elements * type_size * vectorSize > maxDeviceGlobalMem)
518 {
519 num_elements >>= 1;
520 }
521
522 uint64_t startIndx = (uint64_t)0;
523 uint64_t endIndx = (1ULL<<num_runs_shift);
524 uint64_t jobcount = (endIndx-startIndx)/num_elements;
525
526 if(jobcount==0)
527 {
528 jobcount = 1;
529 }
530
531 pThreadInfo = globalThreadDataNew(deviceID, context, queue, num_elements,
532 vectorSize, style, num_runs_shift,
533 type, testID, threadcount);
534
535
536 pThreadInfo->m_offset = startIndx;
537
538 #if THREAD_DEBUG
539 log_error("Launching %llx jobs\n",
540 jobcount);
541 #endif
542
543 result = ThreadPool_Do(test_integer_ops_do_thread, (cl_uint)jobcount, (void *)pThreadInfo);
544
545 if(result != 0)
546 {
547 // cleanup ??
548 log_error("ThreadPool_Do return non-success value %d\n", result);
549
550 }
551 globalThreadDataDestroy(pThreadInfo);
552 return result;
553 }
554
555
556
557 int
test_integer_ops(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,int vectorSize,TestStyle style,int num_runs_shift,ExplicitType type,int testID,MTdata randDataIn,uint64_t startIndx,uint64_t endIndx,perThreadData ** ppThreadData)558 test_integer_ops(cl_device_id deviceID, cl_context context,
559 cl_command_queue queue, int num_elements,
560 int vectorSize, TestStyle style, int num_runs_shift,
561 ExplicitType type, int testID, MTdata randDataIn,
562 uint64_t startIndx, uint64_t endIndx,
563 perThreadData ** ppThreadData)
564 {
565 size_t threads[1];
566 int err;
567 int i;
568 int inputAVecSize, inputBVecSize;
569
570
571
572 inputAVecSize = inputBVecSize = vectorSize;
573 if( style == kInputAScalar )
574 inputAVecSize = 1;
575 else if( style == kInputBScalar )
576 inputBVecSize = 1;
577
578 /*
579 if( inputAVecSize != inputBVecSize )
580 log_info("Testing \"%s\" on %s%d (%s-%s inputs) (range %llx - %llx of 0-%llx)\n",
581 test_names[testID],
582 get_explicit_type_name(type), vectorSize,
583 ( inputAVecSize == 1 ) ? "scalar" : "vector",
584 ( inputBVecSize == 1 ) ? "scalar" : "vector",
585 startIndx, endIndx, (1ULL<<num_runs_shift) );
586 else
587 log_info("Testing \"%s\" on %s%d (range %llx - %llx of 0-%llx)\n",
588 test_names[testID],
589 get_explicit_type_name(type), vectorSize,
590 startIndx, endIndx, (1ULL<<num_runs_shift));
591 */
592
593
594 // Figure out which sub-test to run, or all of them
595 int start_test_ID = 0;
596 int end_test_ID = NUM_TESTS;
597 if (testID != -1) {
598 start_test_ID = testID;
599 end_test_ID = testID+1;
600 }
601 if (testID > NUM_TESTS) {
602 log_error("Invalid test ID: %d\n", testID);
603 return -1;
604 }
605
606 if(*ppThreadData == NULL)
607 {
608 *ppThreadData = perThreadDataNew();
609 err = perThreadDataInit(*ppThreadData,
610 type, num_elements, vectorSize,
611 inputAVecSize, inputBVecSize,
612 context, start_test_ID,
613 end_test_ID, testID);
614 test_error(err, "failed to init per thread data\n");
615 }
616
617 perThreadData * pThreadData = *ppThreadData;
618
619
620
621 threads[0] = (size_t)num_elements;
622 int error_count = 0;
623 for (i=start_test_ID; i<end_test_ID; i++)
624 {
625 uint64_t indx;
626
627
628 if(startIndx >= endIndx)
629 {
630 startIndx = (uint64_t)0;
631 endIndx = (1ULL<<num_runs_shift);
632 }
633 for (indx=startIndx; indx < endIndx; indx+=num_elements)
634 {
635
636 switch (type) {
637 case kChar:
638 init_char_data(indx, num_elements * vectorSize, (cl_char**)(pThreadData->m_input_ptr), randDataIn);
639 break;
640 case kUChar:
641 init_uchar_data(indx, num_elements * vectorSize, (cl_uchar**)(pThreadData->m_input_ptr), randDataIn);
642 break;
643 case kShort:
644 init_short_data(indx, num_elements * vectorSize, (cl_short**)(pThreadData->m_input_ptr), randDataIn);
645 break;
646 case kUShort:
647 init_ushort_data(indx, num_elements * vectorSize, (cl_ushort**)(pThreadData->m_input_ptr), randDataIn);
648 break;
649 case kInt:
650 init_int_data(indx, num_elements * vectorSize, (cl_int**)(pThreadData->m_input_ptr), randDataIn);
651 break;
652 case kUInt:
653 init_uint_data(indx, num_elements * vectorSize, (cl_uint**)(pThreadData->m_input_ptr), randDataIn);
654 break;
655 case kLong:
656 init_long_data(indx, num_elements * vectorSize, (cl_long**)(pThreadData->m_input_ptr), randDataIn);
657 break;
658 case kULong:
659 init_ulong_data(indx, num_elements * vectorSize, (cl_ulong**)(pThreadData->m_input_ptr), randDataIn);
660 break;
661 default:
662 err = 1;
663 log_error("Invalid type.\n");
664 break;
665 }
666
667
668 err = clEnqueueWriteBuffer(queue, pThreadData->m_streams[0], CL_FALSE, 0, pThreadData->m_type_size*num_elements * inputAVecSize, (void *)pThreadData->m_input_ptr[0], 0, NULL, NULL);
669 test_error(err, "clEnqueueWriteBuffer failed");
670 err = clEnqueueWriteBuffer( queue, pThreadData->m_streams[1], CL_FALSE, 0, pThreadData->m_type_size*num_elements * inputBVecSize, (void *)pThreadData->m_input_ptr[1], 0, NULL, NULL );
671 test_error(err, "clEnqueueWriteBuffer failed");
672
673 err = clEnqueueNDRangeKernel( queue, pThreadData->m_kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
674 test_error(err, "clEnqueueNDRangeKernel failed");
675
676 err = clEnqueueReadBuffer( queue, pThreadData->m_streams[2], CL_TRUE, 0, pThreadData->m_type_size*num_elements * vectorSize, (void *)pThreadData->m_output_ptr, 0, NULL, NULL );
677 test_error(err, "clEnqueueReadBuffer failed");
678
679 // log_info("Performing verification\n");
680
681 // If one of the inputs are scalar, we need to extend the input values to vectors
682 // to accommodate the verify functions
683 if( vectorSize > 1 )
684 {
685 char * p = NULL;
686 if( style == kInputAScalar )
687 p = (char *)pThreadData->m_input_ptr[ 0 ];
688 else if( style == kInputBScalar )
689 p = (char *)pThreadData->m_input_ptr[ 1 ];
690 if( p != NULL )
691 {
692 for( int element = num_elements - 1; element >= 0; element-- )
693 {
694 for( int vec = ( element == 0 ) ? 1 : 0; vec < vectorSize; vec++ )
695 memcpy( p + ( element * vectorSize + vec ) * pThreadData->m_type_size, p + element * pThreadData->m_type_size, pThreadData->m_type_size );
696 }
697 }
698 }
699
700 switch (type) {
701 case kChar:
702 err = verify_char(i, vectorSize, (cl_char*)pThreadData->m_input_ptr[0], (cl_char*)pThreadData->m_input_ptr[1], (cl_char*)pThreadData->m_output_ptr, num_elements * vectorSize);
703 break;
704 case kUChar:
705 err = verify_uchar(i, vectorSize, (cl_uchar*)pThreadData->m_input_ptr[0], (cl_uchar*)pThreadData->m_input_ptr[1], (cl_uchar*)pThreadData->m_output_ptr, num_elements * vectorSize);
706 break;
707 case kShort:
708 err = verify_short(i, vectorSize, (cl_short*)pThreadData->m_input_ptr[0], (cl_short*)pThreadData->m_input_ptr[1], (cl_short*)pThreadData->m_output_ptr, num_elements * vectorSize);
709 break;
710 case kUShort:
711 err = verify_ushort(i, vectorSize, (cl_ushort*)pThreadData->m_input_ptr[0], (cl_ushort*)pThreadData->m_input_ptr[1], (cl_ushort*)pThreadData->m_output_ptr, num_elements * vectorSize);
712 break;
713 case kInt:
714 err = verify_int(i, vectorSize, (cl_int*)pThreadData->m_input_ptr[0], (cl_int*)pThreadData->m_input_ptr[1], (cl_int*)pThreadData->m_output_ptr, num_elements * vectorSize);
715 break;
716 case kUInt:
717 err = verify_uint(i, vectorSize, (cl_uint*)pThreadData->m_input_ptr[0], (cl_uint*)pThreadData->m_input_ptr[1], (cl_uint*)pThreadData->m_output_ptr, num_elements * vectorSize);
718 break;
719 case kLong:
720 err = verify_long(i, vectorSize, (cl_long*)pThreadData->m_input_ptr[0], (cl_long*)pThreadData->m_input_ptr[1], (cl_long*)pThreadData->m_output_ptr, num_elements * vectorSize);
721 break;
722 case kULong:
723 err = verify_ulong(i, vectorSize, (cl_ulong*)pThreadData->m_input_ptr[0], (cl_ulong*)pThreadData->m_input_ptr[1], (cl_ulong*)pThreadData->m_output_ptr, num_elements * vectorSize);
724 break;
725 default:
726 err = 1;
727 log_error("Invalid type.\n");
728 break;
729 }
730
731 if (err) {
732 #if 0
733 log_error( "* inASize: %d inBSize: %d numElem: %d\n", inputAVecSize, inputBVecSize, num_elements );
734 cl_char *inP = (cl_char *)pThreadData->m_input_ptr[0];
735 log_error( "from 18:\n" );
736 for( int q = 18; q < 64; q++ )
737 {
738 log_error( "%02x ", inP[ q ] );
739 }
740 log_error( "\n" );
741 inP = (cl_char *)pThreadData->m_input_ptr[1];
742 for( int q = 18; q < 64; q++ )
743 {
744 log_error( "%02x ", inP[ q ] );
745 }
746 log_error( "\n" );
747 inP = (cl_char *)pThreadData->m_output_ptr;
748 for( int q = 18; q < 64; q++ )
749 {
750 log_error( "%02x ", inP[ q ] );
751 }
752 log_error( "\n" );
753 log_error( "from 36:\n" );
754 inP = (cl_char *)pThreadData->m_input_ptr[0];
755 for( int q = 36; q < 64; q++ )
756 {
757 log_error( "%02x ", inP[ q ] );
758 }
759 log_error( "\n" );
760 inP = (cl_char *)pThreadData->m_input_ptr[1];
761 for( int q = 36; q < 64; q++ )
762 {
763 log_error( "%02x ", inP[ q ] );
764 }
765 log_error( "\n" );
766 inP = (cl_char *)pThreadData->m_output_ptr;
767 for( int q = 36; q < 64; q++ )
768 {
769 log_error( "%02x ", inP[ q ] );
770 }
771 log_error( "\n" );
772 #endif
773 error_count++;
774 break;
775 }
776 }
777
778 /*
779
780 const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
781
782 if (err) {
783 log_error("\t\t%s%s test %s failed (range %llx - %llx of 0-%llx)\n",
784 get_explicit_type_name(type), sizeNames[vectorSize],
785 test_names[i],
786 startIndx, endIndx,
787 (1ULL<<num_runs_shift));
788 } else {
789 log_info("\t\t%s%s test %s passed (range %llx - %llx of 0-%llx)\n",
790 get_explicit_type_name(type), sizeNames[vectorSize],
791 test_names[i],
792 startIndx, endIndx,
793 (1ULL<<num_runs_shift));
794 }
795 */
796 }
797
798
799
800 return error_count;
801 }
802
803
804
805
806
807
808
809
810
811 // Run all the vector sizes for a given test
run_specific_test(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num,int testID)812 int run_specific_test(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int testID) {
813 int errors = 0;
814 errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/1, 1, kBothVectors, num, type, testID);
815 errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/2, 2, kBothVectors, num, type, testID);
816 errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/3, 3, kBothVectors, num, type, testID);
817 errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/4, 4, kBothVectors, num, type, testID);
818 errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/8, 8, kBothVectors, num, type, testID);
819 errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/16, 16, kBothVectors, num, type, testID);
820 return errors;
821 }
822
823 // Run multiple tests for a given type
run_multiple_tests(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num,int * tests,int total_tests)824 int run_multiple_tests(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int *tests, int total_tests) {
825 int errors = 0;
826
827 if (getenv("CL_WIMPY_MODE") && num == LONG_MATH_SHIFT_SIZE) {
828 log_info("Detected CL_WIMPY_MODE env\n");
829 log_info("Skipping long test\n");
830 return 0;
831 }
832
833 int i;
834 for (i=0; i<total_tests; i++)
835 {
836 int localErrors;
837 log_info("Testing \"%s\" ", test_names[tests[i]]); fflush( stdout );
838 localErrors = run_specific_test(deviceID, context, queue, num_elements, type, num, tests[i]);
839 if( localErrors )
840 log_info( "FAILED\n" );
841 else
842 log_info( "passed\n" );
843
844 errors += localErrors;
845 }
846
847 return errors;
848 }
849
850 // Run all the math tests for a given type
run_test_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num)851 int run_test_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
852 int tests[] = {0, 1, 2, 3, 4};
853 return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
854 }
855
856 // Run all the logic tests for a given type
run_test_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num)857 int run_test_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
858 int tests[] = {5, 6, 7, 12, 14, 15, 22};
859 return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
860 }
861
862 // Run all the shifting tests for a given type
run_test_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num)863 int run_test_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
864 int tests[] = {8, 9, 10, 11};
865 return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
866 }
867
868 // Run all the comparison tests for a given type
run_test_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num)869 int run_test_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
870 int tests[] = {13, 16, 17, 18, 19, 20, 21};
871 return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
872 }
873
874 // Run all tests for a given type
run_test(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num)875 int run_test(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
876 int errors = 0;
877 errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 1, kBothVectors, num, type, -1);
878 errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 2, kBothVectors, num, type, -1);
879 errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 3, kBothVectors, num, type, -1);
880 errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 4, kBothVectors, num, type, -1);
881 errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 8, kBothVectors, num, type, -1);
882 errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 16, kBothVectors, num, type, -1);
883 return errors;
884 }
885
886
887 // -----------------
888 // Long tests
889 // -----------------
test_long_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)890 int test_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
891 if (!gHasLong)
892 {
893 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
894 return CL_SUCCESS;
895 }
896 return run_test_math(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
897 }
test_long_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)898 int test_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
899 if (!gHasLong)
900 {
901 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
902 return CL_SUCCESS;
903 }
904 return run_test_logic(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
905 }
test_long_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)906 int test_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
907 if (!gHasLong)
908 {
909 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
910 return CL_SUCCESS;
911 }
912 return run_test_shift(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
913 }
test_long_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)914 int test_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
915 if (!gHasLong)
916 {
917 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
918 return CL_SUCCESS;
919 }
920 return run_test_compare(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
921 }
test_quick_long_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)922 int test_quick_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
923 if (!gHasLong)
924 {
925 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
926 return CL_SUCCESS;
927 }
928 return run_test_math(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
929 }
test_quick_long_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)930 int test_quick_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
931 if (!gHasLong)
932 {
933 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
934 return CL_SUCCESS;
935 }
936 return run_test_logic(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
937 }
test_quick_long_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)938 int test_quick_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
939 if (!gHasLong)
940 {
941 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
942 return CL_SUCCESS;
943 }
944 return run_test_shift(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
945 }
test_quick_long_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)946 int test_quick_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
947 if (!gHasLong)
948 {
949 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
950 return CL_SUCCESS;
951 }
952 return run_test_compare(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
953 }
954
955
956 // -----------------
957 // ULong tests
958 // -----------------
test_ulong_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)959 int test_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
960 if (!gHasLong)
961 {
962 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
963 return CL_SUCCESS;
964 }
965 return run_test_math(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
966 }
test_ulong_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)967 int test_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
968 if (!gHasLong)
969 {
970 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
971 return CL_SUCCESS;
972 }
973 return run_test_logic(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
974 }
test_ulong_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)975 int test_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
976 if (!gHasLong)
977 {
978 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
979 return CL_SUCCESS;
980 }
981 return run_test_shift(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
982 }
test_ulong_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)983 int test_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
984 if (!gHasLong)
985 {
986 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
987 return CL_SUCCESS;
988 }
989 return run_test_compare(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
990 }
test_quick_ulong_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)991 int test_quick_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
992 if (!gHasLong)
993 {
994 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
995 return CL_SUCCESS;
996 }
997 return run_test_math(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
998 }
test_quick_ulong_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)999 int test_quick_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1000 if (!gHasLong)
1001 {
1002 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
1003 return CL_SUCCESS;
1004 }
1005 return run_test_logic(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
1006 }
test_quick_ulong_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1007 int test_quick_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1008 if (!gHasLong)
1009 {
1010 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
1011 return CL_SUCCESS;
1012 }
1013 return run_test_shift(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
1014 }
test_quick_ulong_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1015 int test_quick_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1016 if (!gHasLong)
1017 {
1018 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
1019 return CL_SUCCESS;
1020 }
1021 return run_test_compare(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
1022 }
1023
1024
1025 // -----------------
1026 // Int tests
1027 // -----------------
test_int_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1028 int test_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1029 return run_test_math(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
1030 }
test_int_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1031 int test_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1032 return run_test_logic(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
1033 }
test_int_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1034 int test_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1035 return run_test_shift(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
1036 }
test_int_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1037 int test_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1038 return run_test_compare(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
1039 }
test_quick_int_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1040 int test_quick_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1041 return run_test_math(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
1042 }
test_quick_int_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1043 int test_quick_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1044 return run_test_logic(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
1045 }
test_quick_int_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1046 int test_quick_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1047 return run_test_shift(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
1048 }
test_quick_int_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1049 int test_quick_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1050 return run_test_compare(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
1051 }
1052
1053
1054 // -----------------
1055 // UInt tests
1056 // -----------------
test_uint_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1057 int test_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1058 return run_test_math(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
1059 }
test_uint_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1060 int test_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1061 return run_test_logic(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
1062 }
test_uint_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1063 int test_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1064 return run_test_shift(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
1065 }
test_uint_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1066 int test_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1067 return run_test_compare(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
1068 }
test_quick_uint_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1069 int test_quick_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1070 return run_test_math(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
1071 }
test_quick_uint_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1072 int test_quick_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1073 return run_test_logic(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
1074 }
test_quick_uint_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1075 int test_quick_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1076 return run_test_shift(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
1077 }
test_quick_uint_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1078 int test_quick_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1079 return run_test_compare(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
1080 }
1081
1082
1083 // -----------------
1084 // Short tests
1085 // -----------------
test_short_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1086 int test_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1087 return run_test_math(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
1088 }
test_short_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1089 int test_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1090 return run_test_logic(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
1091 }
test_short_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1092 int test_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1093 return run_test_shift(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
1094 }
test_short_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1095 int test_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1096 return run_test_compare(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
1097 }
test_quick_short_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1098 int test_quick_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1099 return run_test_math(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
1100 }
test_quick_short_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1101 int test_quick_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1102 return run_test_logic(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
1103 }
test_quick_short_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1104 int test_quick_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1105 return run_test_shift(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
1106 }
test_quick_short_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1107 int test_quick_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1108 return run_test_compare(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
1109 }
1110
1111
1112 // -----------------
1113 // UShort tests
1114 // -----------------
test_ushort_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1115 int test_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1116 return run_test_math(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
1117 }
test_ushort_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1118 int test_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1119 return run_test_logic(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
1120 }
test_ushort_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1121 int test_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1122 return run_test_shift(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
1123 }
test_ushort_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1124 int test_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1125 return run_test_compare(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
1126 }
test_quick_ushort_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1127 int test_quick_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1128 return run_test_math(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
1129 }
test_quick_ushort_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1130 int test_quick_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1131 return run_test_logic(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
1132 }
test_quick_ushort_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1133 int test_quick_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1134 return run_test_shift(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
1135 }
test_quick_ushort_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1136 int test_quick_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1137 return run_test_compare(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
1138 }
1139
1140
1141 // -----------------
1142 // Char tests
1143 // -----------------
test_char_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1144 int test_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1145 return run_test_math(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
1146 }
test_char_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1147 int test_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1148 return run_test_logic(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
1149 }
test_char_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1150 int test_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1151 return run_test_shift(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
1152 }
test_char_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1153 int test_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1154 return run_test_compare(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
1155 }
test_quick_char_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1156 int test_quick_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1157 return run_test_math(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
1158 }
test_quick_char_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1159 int test_quick_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1160 return run_test_logic(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
1161 }
test_quick_char_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1162 int test_quick_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1163 return run_test_shift(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
1164 }
test_quick_char_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1165 int test_quick_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1166 return run_test_compare(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
1167 }
1168
1169
1170 // -----------------
1171 // UChar tests
1172 // -----------------
test_uchar_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1173 int test_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1174 return run_test_math(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
1175 }
test_uchar_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1176 int test_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1177 return run_test_logic(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
1178 }
test_uchar_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1179 int test_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1180 return run_test_shift(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
1181 }
test_uchar_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1182 int test_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1183 return run_test_compare(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
1184 }
test_quick_uchar_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1185 int test_quick_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1186 return run_test_math(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
1187 }
test_quick_uchar_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1188 int test_quick_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1189 return run_test_logic(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
1190 }
test_quick_uchar_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1191 int test_quick_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1192 return run_test_shift(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
1193 }
test_quick_uchar_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1194 int test_quick_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1195 return run_test_compare(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
1196 }
1197
1198
1199
1200 // These are kept for debugging if you want to run all the tests together.
1201
test_long(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1202 int test_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1203 if (!gHasLong)
1204 {
1205 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
1206 return CL_SUCCESS;
1207 }
1208 return run_test(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
1209 }
1210
test_quick_long(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1211 int test_quick_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1212 if (!gHasLong)
1213 {
1214 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
1215 return CL_SUCCESS;
1216 }
1217 return run_test(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
1218 }
1219
test_ulong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1220 int test_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1221 if (!gHasLong)
1222 {
1223 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
1224 return CL_SUCCESS;
1225 }
1226 return run_test(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
1227 }
1228
test_quick_ulong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1229 int test_quick_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1230 if (!gHasLong)
1231 {
1232 log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
1233 return CL_SUCCESS;
1234 }
1235 return run_test(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
1236 }
1237
test_int(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1238 int test_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1239 return run_test(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
1240 }
1241
test_quick_int(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1242 int test_quick_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1243 return run_test(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
1244 }
1245
test_uint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1246 int test_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1247 return run_test(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
1248 }
1249
test_quick_uint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1250 int test_quick_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1251 return run_test(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
1252 }
1253
test_short(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1254 int test_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1255 return run_test(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
1256 }
1257
test_quick_short(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1258 int test_quick_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1259 return run_test(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
1260 }
1261
test_ushort(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1262 int test_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1263 return run_test(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
1264 }
1265
test_quick_ushort(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1266 int test_quick_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1267 return run_test(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
1268 }
1269
test_char(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1270 int test_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1271 return run_test(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
1272 }
1273
test_quick_char(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1274 int test_quick_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1275 return run_test(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
1276 }
1277
test_uchar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1278 int test_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1279 return run_test(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
1280 }
1281
test_quick_uchar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1282 int test_quick_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1283 return run_test(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
1284 }
1285
1286 // Prototype for below
1287 int test_question_colon_op(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
1288 int vectorSize, TestStyle style, ExplicitType type );
1289
1290 // Run all the vector sizes for a given test in scalar-vector and vector-scalar modes
run_test_sizes(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num,int testID)1291 int run_test_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int testID)
1292 {
1293 int sizes[] = { 2, 3, 4, 8, 16, 0 };
1294 int errors = 0;
1295
1296 for( int i = 0; sizes[ i ] != 0; i++ )
1297 {
1298 if( testID == 13 )
1299 {
1300 errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputAScalar, type );
1301 errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputBScalar, type );
1302 errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kVectorScalarScalar, type );
1303
1304 errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kBothVectors | kInputCAlsoScalar), type );
1305 errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kInputAScalar | kInputCAlsoScalar), type );
1306 errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kInputBScalar | kInputCAlsoScalar), type );
1307 }
1308 else
1309 {
1310 errors += test_integer_ops_threaded(deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputAScalar, num, type, testID);
1311 errors += test_integer_ops_threaded(deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputBScalar, num, type, testID);
1312 }
1313 }
1314 return errors;
1315 }
1316
1317 // Run all the tests for scalar-vector and vector-scalar for a given type
run_vector_scalar_tests(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num)1318 int run_vector_scalar_tests( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num )
1319 {
1320 int errors = 0;
1321 size_t i;
1322
1323 // Shift operators:
1324 // a) cannot take scalars as first parameter and vectors as second
1325 // b) have the vector >> scalar case tested by tests 10 and 11
1326 // so they get skipped entirely
1327
1328 int testsToRun[] = { 0, 1, 2, 3, 4, 5, 6, 7,
1329 13, 14, 15, 16, 17, 18, 19, 20, 21 };
1330 for (i=0; i< sizeof(testsToRun)/sizeof(testsToRun[0]); i++)
1331 {
1332 errors += run_test_sizes(deviceID, context, queue, 2048, type, num, testsToRun[i]);
1333 }
1334 return errors;
1335 }
1336
test_vector_scalar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1337 int test_vector_scalar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1338 {
1339 int errors = 0;
1340 int numTypes = sizeof( types ) / sizeof( types[ 0 ] );
1341
1342 for( int t = 0; t < numTypes; t++ )
1343 {
1344 if ((types[ t ] == kLong || types[ t ] == kULong) && !gHasLong)
1345 continue;
1346
1347 errors += run_vector_scalar_tests( deviceID, context, queue, num_elements, types[ t ], 1 );
1348 break;
1349 }
1350
1351 return errors;
1352 }
1353
generate_random_bool_data(size_t count,MTdata d,cl_char * outData,size_t outDataSize)1354 void generate_random_bool_data( size_t count, MTdata d, cl_char *outData, size_t outDataSize )
1355 {
1356 cl_uint bits = genrand_int32(d);
1357 cl_uint bitsLeft = 32;
1358
1359 memset( outData, 0, outDataSize * count );
1360
1361 for( size_t i = 0; i < count; i++ )
1362 {
1363 if( 0 == bitsLeft)
1364 {
1365 bits = genrand_int32(d);
1366 bitsLeft = 32;
1367 }
1368
1369 // Note: we will be setting just any bit non-zero for the type, so we can easily skip past
1370 // and just write bytes (assuming the entire output buffer is already zeroed, which we did)
1371 *outData = ( bits & 1 ) ? 0xff : 0;
1372
1373 bits >>= 1; bitsLeft -= 1;
1374
1375 outData += outDataSize;
1376 }
1377 }
1378
1379 static const char *kernel_question_colon_full =
1380 "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *srcC, __global %s%s *dst)\n"
1381 "{\n"
1382 " int tid = get_global_id(0);\n"
1383 "\n"
1384 " %s%s valA = %ssrcA%s"
1385 " %s%s valB = %ssrcB%s"
1386 " %s%s valC = %ssrcC%s"
1387 " %s%s destVal = valC ? valA : valB;\n"
1388 " %s"
1389 "}\n";
1390
1391 static const char *kernel_qc_load_plain_prefix = "";
1392 static const char *kernel_qc_load_plain_suffix = "[ tid ];\n";
1393
1394 static const char *kernel_qc_load_vec3_prefix = "vload3( tid, ";
1395 static const char *kernel_qc_load_vec3_suffix = ");\n";
1396
1397 static const char *kernel_qc_store_plain = "dst[ tid ] = destVal;\n";
1398 static const char *kernel_qc_store_vec3 = "vstore3( destVal, tid, dst );\n";
1399
test_question_colon_op(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,int vectorSize,TestStyle style,ExplicitType type)1400 int test_question_colon_op(cl_device_id deviceID, cl_context context,
1401 cl_command_queue queue, int num_elements,
1402 int vectorSize, TestStyle style, ExplicitType type )
1403 {
1404 cl_mem streams[4];
1405 cl_int *input_ptr[3], *output_ptr;
1406 cl_program program;
1407 cl_kernel kernel;
1408 size_t threads[1];
1409 int err;
1410 int inputAVecSize, inputBVecSize, inputCVecSize;
1411 const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
1412 // Identical to sizeNames but with a blank for 3, since we use vload/store there
1413 const char * paramSizeNames[] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
1414 MTdata s_randStates;
1415
1416 inputAVecSize = inputBVecSize = inputCVecSize = vectorSize;
1417 if( style & kInputCAlsoScalar )
1418 {
1419 style = (TestStyle)( style & ~kInputCAlsoScalar );
1420 inputCVecSize = 1;
1421 }
1422 if( style == kInputAScalar )
1423 inputAVecSize = 1;
1424 else if( style == kInputBScalar )
1425 inputBVecSize = 1;
1426 else if( style == kVectorScalarScalar )
1427 inputAVecSize = inputBVecSize = 1;
1428
1429 log_info("Testing \"?:\" on %s%d (%s?%s:%s inputs)\n",
1430 get_explicit_type_name(type), vectorSize, ( inputCVecSize == 1 ) ? "scalar" : "vector",
1431 ( inputAVecSize == 1 ) ? "scalar" : "vector",
1432 ( inputBVecSize == 1 ) ? "scalar" : "vector" );
1433
1434
1435 const char *type_name = get_explicit_type_name(type);
1436 size_t type_size = get_explicit_type_size(type);
1437
1438 // Create and initialize I/O buffers
1439
1440 input_ptr[0] = (cl_int*)malloc(type_size * num_elements * vectorSize);
1441 input_ptr[1] = (cl_int*)malloc(type_size * num_elements * vectorSize);
1442 input_ptr[2] = (cl_int*)malloc(type_size * num_elements * vectorSize);
1443 output_ptr = (cl_int*)malloc(type_size * num_elements * vectorSize);
1444
1445 s_randStates = init_genrand( gRandomSeed );
1446
1447 generate_random_data( type, num_elements * inputAVecSize, s_randStates, input_ptr[ 0 ] );
1448 generate_random_data( type, num_elements * inputBVecSize, s_randStates, input_ptr[ 1 ] );
1449 generate_random_bool_data( num_elements * inputCVecSize, s_randStates, (cl_char *)input_ptr[ 2 ], type_size );
1450
1451 streams[0] = clCreateBuffer(
1452 context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
1453 type_size * num_elements * inputAVecSize, input_ptr[0], &err);
1454 test_error(err, "clCreateBuffer failed");
1455 streams[1] = clCreateBuffer(
1456 context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
1457 type_size * num_elements * inputBVecSize, input_ptr[1], &err);
1458 test_error(err, "clCreateBuffer failed");
1459 streams[2] = clCreateBuffer(
1460 context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
1461 type_size * num_elements * inputCVecSize, input_ptr[2], &err);
1462 test_error(err, "clCreateBuffer failed");
1463 streams[3] =
1464 clCreateBuffer(context, CL_MEM_WRITE_ONLY,
1465 type_size * num_elements * vectorSize, NULL, &err);
1466 test_error(err, "clCreateBuffer failed");
1467
1468 const char *vectorString = sizeNames[ vectorSize ];
1469 const char *inputAVectorString = sizeNames[ inputAVecSize ];
1470 const char *inputBVectorString = sizeNames[ inputBVecSize ];
1471 const char *inputCVectorString = sizeNames[ inputCVecSize ];
1472
1473 char programString[4096];
1474 const char *ptr;
1475
1476 sprintf( programString, kernel_question_colon_full, type_name, paramSizeNames[ inputAVecSize ],
1477 type_name, paramSizeNames[ inputBVecSize ],
1478 type_name, paramSizeNames[ inputCVecSize ],
1479 type_name, paramSizeNames[ vectorSize ],
1480 // Loads
1481 type_name, inputAVectorString, ( inputAVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputAVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
1482 type_name, inputBVectorString, ( inputBVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputBVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
1483 type_name, inputCVectorString, ( inputCVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputCVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
1484 // Dest type
1485 type_name, vectorString,
1486 // Store
1487 ( vectorSize == 3 ) ? kernel_qc_store_vec3 : kernel_qc_store_plain );
1488
1489 ptr = programString;
1490 err = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test" );
1491 test_error( err, "Unable to create test kernel" );
1492
1493 err = clSetKernelArg( kernel, 0, sizeof streams[0], &streams[0] );
1494 err |= clSetKernelArg( kernel, 1, sizeof streams[1], &streams[1] );
1495 err |= clSetKernelArg( kernel, 2, sizeof streams[2], &streams[2] );
1496 err |= clSetKernelArg( kernel, 3, sizeof streams[3], &streams[3] );
1497 test_error(err, "clSetKernelArgs failed");
1498
1499 // Run
1500 threads[0] = (size_t)num_elements;
1501
1502 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
1503 test_error(err, "clEnqueueNDRangeKernel failed");
1504
1505 // Read and verify results
1506 err = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, type_size*num_elements * vectorSize, (void *)output_ptr, 0, NULL, NULL );
1507 test_error(err, "clEnqueueReadBuffer failed");
1508
1509 // log_info("Performing verification\n");
1510 int error_count = 0;
1511
1512 char *inputAPtr = (char *)input_ptr[ 0 ];
1513 char *inputBPtr = (char *)input_ptr[ 1 ];
1514 cl_char *inputCPtr = (cl_char *)input_ptr[ 2 ];
1515 char *actualPtr = (char *)output_ptr;
1516
1517 for( int i = 0; i < num_elements; i++ )
1518 {
1519 for( int j = 0; j < vectorSize; j++ )
1520 {
1521 char *expectedPtr = ( *inputCPtr ) ? inputAPtr : inputBPtr;
1522 if( memcmp( expectedPtr, actualPtr, type_size ) != 0 )
1523 {
1524 #if 0
1525 char expectedStr[ 128 ], actualStr[ 128 ], inputAStr[ 128 ], inputBStr[ 128 ];
1526 print_type_to_string( type, inputAPtr, inputAStr );
1527 print_type_to_string( type, inputBPtr, inputBStr );
1528 print_type_to_string( type, expectedPtr, expectedStr );
1529 print_type_to_string( type, actualPtr, actualStr );
1530 log_error( "cl_%s verification failed at element %d:%d (expected %s, got %s, inputs: %s, %s, %s)\n",
1531 type_name, i, j, expectedStr, actualStr, inputAStr, inputBStr, ( *inputCPtr ) ? "true" : "false" );
1532 #endif
1533 error_count++;
1534 }
1535 // Advance for each element member. Note if any of the vec sizes are 1, they don't advance here
1536 inputAPtr += ( inputAVecSize == 1 ) ? 0 : type_size;
1537 inputBPtr += ( inputBVecSize == 1 ) ? 0 : type_size;
1538 inputCPtr += ( inputCVecSize == 1 ) ? 0 : type_size;
1539 actualPtr += ( vectorSize == 1 ) ? 0 : type_size;
1540 }
1541 // Reverse for the member advance. If the vec sizes are 1, we need to advance, but otherwise they're already correct
1542 inputAPtr += ( inputAVecSize == 1 ) ? type_size : 0;
1543 inputBPtr += ( inputBVecSize == 1 ) ? type_size : 0;
1544 inputCPtr += ( inputCVecSize == 1 ) ? type_size : 0;
1545 actualPtr += ( vectorSize == 1 ) ? type_size : 0;
1546 }
1547
1548 // cleanup
1549 clReleaseMemObject(streams[0]);
1550 clReleaseMemObject(streams[1]);
1551 clReleaseMemObject(streams[2]);
1552 clReleaseMemObject(streams[3]);
1553 clReleaseKernel(kernel);
1554 clReleaseProgram(program);
1555 free(input_ptr[0]);
1556 free(input_ptr[1]);
1557 free(input_ptr[2]);
1558 free(output_ptr);
1559 free_mtdata( s_randStates );
1560
1561 return error_count;
1562 }
1563