1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <string.h>
19 #include <stdio.h>
20
21 #if !defined(_WIN32)
22 #include <libgen.h>
23 #include <sys/param.h>
24 #endif
25
26 #include "mingw_compat.h"
27 #if defined (__MINGW32__)
28 #include <sys/param.h>
29 #endif
30
31 #include <time.h>
32 #include "errorHelpers.h"
33 #include "harness/compat.h"
34 #include "harness/mt19937.h"
35 #include "harness/kernelHelpers.h"
36 #include "harness/rounding_mode.h"
37 #include "harness/fpcontrol.h"
38 #include "harness/testHarness.h"
39 #include "harness/parseParameters.h"
40 #if defined( __APPLE__ )
41 #include <sys/sysctl.h>
42 #endif
43 #if defined( __linux__ )
44 #include <unistd.h>
45 #include <sys/syscall.h>
46 #include <linux/sysctl.h>
47 #endif
48
49 #if defined (_WIN32)
50 #include <string.h>
51 #endif
52
53 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
54 #include <emmintrin.h>
55 #endif
56
57 #if defined(__PPC__)
58 // Global varaiable used to hold the FPU control register state. The FPSCR register can not
59 // be used because not all Power implementations retain or observed the NI (non-IEEE
60 // mode) bit.
61 __thread fpu_control_t fpu_control = 0;
62 #endif
63
64 #ifndef MAXPATHLEN
65 #define MAXPATHLEN 2048
66 #endif
67
68 char appName[ MAXPATHLEN ] = "";
69 cl_context gContext = NULL;
70 cl_command_queue gQueue = NULL;
71 cl_program gProgram[5] = { NULL, NULL, NULL, NULL, NULL };
72 cl_program gProgram_double[5] = { NULL, NULL, NULL, NULL, NULL };
73 int gForceFTZ = 0;
74 int gSeed = 0;
75 int gSeedSpecified = 0;
76 int gHasDouble = 0;
77 MTdata gMTdata = NULL;
78 int gSkipNanInf = 0;
79 int gIgnoreZeroSign = 0;
80
81 cl_mem bufA = NULL;
82 cl_mem bufB = NULL;
83 cl_mem bufC = NULL;
84 cl_mem bufD = NULL;
85 cl_mem bufE = NULL;
86 cl_mem bufC_double = NULL;
87 cl_mem bufD_double = NULL;
88 float *buf1, *buf2, *buf3, *buf4, *buf5, *buf6;
89 float *correct[8];
90 int *skipTest[8];
91
92 double *buf3_double, *buf4_double, *buf5_double, *buf6_double;
93 double *correct_double[8];
94
95 static const char **gArgList;
96 static size_t gArgCount;
97
98 #define BUFFER_SIZE (1024*1024)
99
100
101 static int ParseArgs( int argc, const char **argv );
102 static void PrintUsage( void );
103 test_status InitCL( cl_device_id device );
104 static void ReleaseCL( void );
105 static int RunTest( int testNumber );
106 static int RunTest_Double( int testNumber );
107
108 #if defined(__ANDROID__)
109 #define nanf( X ) strtof( "NAN", ( char ** ) NULL )
110 #define nan( X ) strtod( "NAN", ( char ** ) NULL )
111 #endif
112
113 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
114 // defeat x87 on MSVC
sse_add(float x,float y)115 float sse_add(float x, float y)
116 {
117 volatile float a = x;
118 volatile float b = y;
119
120 // defeat x87
121 __m128 va = _mm_set_ss( (float) a );
122 __m128 vb = _mm_set_ss( (float) b );
123 va = _mm_add_ss( va, vb );
124 _mm_store_ss( (float*) &a, va );
125 return a;
126 }
127
sse_add_sd(double x,double y)128 double sse_add_sd(double x, double y)
129 {
130 volatile double a = x;
131 volatile double b = y;
132
133 // defeat x87
134 __m128d va = _mm_set_sd( (double) a );
135 __m128d vb = _mm_set_sd( (double) b );
136 va = _mm_add_sd( va, vb );
137 _mm_store_sd( (double*) &a, va );
138 return a;
139 }
140
sse_sub(float x,float y)141 float sse_sub(float x, float y)
142 {
143 volatile float a = x;
144 volatile float b = y;
145
146 // defeat x87
147 __m128 va = _mm_set_ss( (float) a );
148 __m128 vb = _mm_set_ss( (float) b );
149 va = _mm_sub_ss( va, vb );
150 _mm_store_ss( (float*) &a, va );
151 return a;
152 }
153
sse_sub_sd(double x,double y)154 double sse_sub_sd(double x, double y)
155 {
156 volatile double a = x;
157 volatile double b = y;
158
159 // defeat x87
160 __m128d va = _mm_set_sd( (double) a );
161 __m128d vb = _mm_set_sd( (double) b );
162 va = _mm_sub_sd( va, vb );
163 _mm_store_sd( (double*) &a, va );
164 return a;
165 }
166
sse_mul(float x,float y)167 float sse_mul(float x, float y)
168 {
169 volatile float a = x;
170 volatile float b = y;
171
172 // defeat x87
173 __m128 va = _mm_set_ss( (float) a );
174 __m128 vb = _mm_set_ss( (float) b );
175 va = _mm_mul_ss( va, vb );
176 _mm_store_ss( (float*) &a, va );
177 return a;
178 }
179
sse_mul_sd(double x,double y)180 double sse_mul_sd(double x, double y)
181 {
182 volatile double a = x;
183 volatile double b = y;
184
185 // defeat x87
186 __m128d va = _mm_set_sd( (double) a );
187 __m128d vb = _mm_set_sd( (double) b );
188 va = _mm_mul_sd( va, vb );
189 _mm_store_sd( (double*) &a, va );
190 return a;
191 }
192 #endif
193
194 #ifdef __PPC__
ppc_mul(float a,float b)195 float ppc_mul(float a, float b)
196 {
197 float p;
198
199 if (gForceFTZ) {
200 // Flush input a to zero if it is sub-normal
201 if (fabsf(a) < FLT_MIN) {
202 a = copysignf(0.0, a);
203 }
204 // Flush input b to zero if it is sub-normal
205 if (fabsf(b) < FLT_MIN) {
206 b = copysignf(0.0, b);
207 }
208 // Perform multiply
209 p = a * b;
210 // Flush the product if it is a sub-normal
211 if (fabs((double)a * (double)b) < FLT_MIN) {
212 p = copysignf(0.0, p);
213 }
214 } else {
215 p = a * b;
216 }
217 return p;
218 }
219 #endif
220
test_contractions_float_0(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)221 int test_contractions_float_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
222 {
223 return RunTest(0);
224 }
225
test_contractions_float_1(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)226 int test_contractions_float_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
227 {
228 return RunTest(1);
229 }
230
test_contractions_float_2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)231 int test_contractions_float_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
232 {
233 return RunTest(2);
234 }
235
test_contractions_float_3(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)236 int test_contractions_float_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
237 {
238 return RunTest(3);
239 }
240
test_contractions_float_4(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)241 int test_contractions_float_4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
242 {
243 return RunTest(4);
244 }
245
test_contractions_float_5(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)246 int test_contractions_float_5(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
247 {
248 return RunTest(5);
249 }
250
test_contractions_float_6(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)251 int test_contractions_float_6(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
252 {
253 return RunTest(6);
254 }
255
test_contractions_float_7(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)256 int test_contractions_float_7(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
257 {
258 return RunTest(7);
259 }
260
test_contractions_double_0(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)261 int test_contractions_double_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
262 {
263 return RunTest_Double(0);
264 }
265
test_contractions_double_1(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)266 int test_contractions_double_1(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
267 {
268 return RunTest_Double(1);
269 }
270
test_contractions_double_2(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)271 int test_contractions_double_2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
272 {
273 return RunTest_Double(2);
274 }
275
test_contractions_double_3(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)276 int test_contractions_double_3(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
277 {
278 return RunTest_Double(3);
279 }
280
test_contractions_double_4(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)281 int test_contractions_double_4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
282 {
283 return RunTest_Double(4);
284 }
285
test_contractions_double_5(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)286 int test_contractions_double_5(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
287 {
288 return RunTest_Double(5);
289 }
290
test_contractions_double_6(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)291 int test_contractions_double_6(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
292 {
293 return RunTest_Double(6);
294 }
295
test_contractions_double_7(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)296 int test_contractions_double_7(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
297 {
298 return RunTest_Double(7);
299 }
300
301 test_definition test_list[] = {
302 ADD_TEST( contractions_float_0 ),
303 ADD_TEST( contractions_float_1 ),
304 ADD_TEST( contractions_float_2 ),
305 ADD_TEST( contractions_float_3 ),
306 ADD_TEST( contractions_float_4 ),
307 ADD_TEST( contractions_float_5 ),
308 ADD_TEST( contractions_float_6 ),
309 ADD_TEST( contractions_float_7 ),
310 ADD_TEST( contractions_double_0 ),
311 ADD_TEST( contractions_double_1 ),
312 ADD_TEST( contractions_double_2 ),
313 ADD_TEST( contractions_double_3 ),
314 ADD_TEST( contractions_double_4 ),
315 ADD_TEST( contractions_double_5 ),
316 ADD_TEST( contractions_double_6 ),
317 ADD_TEST( contractions_double_7 ),
318 };
319
320 const int test_num = ARRAY_SIZE( test_list );
321
main(int argc,const char ** argv)322 int main( int argc, const char **argv )
323 {
324 argc = parseCustomParam(argc, argv);
325 if (argc == -1)
326 {
327 return -1;
328 }
329
330 int error = ParseArgs( argc, argv );
331
332 if( !error )
333 {
334 error = runTestHarnessWithCheck( gArgCount, gArgList, test_num, test_list, true, 0, InitCL );
335 }
336
337 if( gQueue )
338 {
339 int flush_error = clFinish( gQueue );
340 if( flush_error )
341 log_error( "clFinish failed: %d\n", flush_error );
342 }
343
344 ReleaseCL();
345 free( gArgList );
346
347 return error;
348 }
349
350
351
ParseArgs(int argc,const char ** argv)352 static int ParseArgs( int argc, const char **argv )
353 {
354 gArgList = (const char **)calloc( argc, sizeof( char*) );
355
356 if( NULL == gArgList )
357 {
358 vlog_error( "Failed to allocate memory for argList\n" );
359 return 1;
360 }
361
362 gArgList[0] = argv[0];
363 gArgCount = 1;
364
365 int length_of_seed = 0;
366
367 { // Extract the app name
368 strncpy( appName, argv[0], MAXPATHLEN );
369
370 #if (defined( __APPLE__ ) || defined(__linux__) || defined(__MINGW32__))
371 char baseName[MAXPATHLEN];
372 char *base = NULL;
373 strncpy( baseName, argv[0], MAXPATHLEN );
374 base = basename( baseName );
375 if( NULL != base )
376 {
377 strncpy( appName, base, sizeof( appName ) );
378 appName[ sizeof( appName ) -1 ] = '\0';
379 }
380 #elif defined (_WIN32)
381 char fname[_MAX_FNAME + _MAX_EXT + 1];
382 char ext[_MAX_EXT];
383
384 errno_t err = _splitpath_s( argv[0], NULL, 0, NULL, 0,
385 fname, _MAX_FNAME, ext, _MAX_EXT );
386 if (err == 0) { // no error
387 strcat (fname, ext); //just cat them, size of frame can keep both
388 strncpy (appName, fname, sizeof(appName));
389 appName[ sizeof( appName ) -1 ] = '\0';
390 }
391 #endif
392 }
393
394 for( int i = 1; i < argc; i++ )
395 {
396 const char *arg = argv[i];
397 if( NULL == arg )
398 break;
399
400 if( arg[0] == '-' )
401 {
402 while( arg[1] != '\0' )
403 {
404 arg++;
405 switch( *arg )
406 {
407 case 'h':
408 PrintUsage();
409 return -1;
410
411 case 's':
412 arg++;
413 gSeed = atoi( arg );
414 while (arg[length_of_seed] >='0' && arg[length_of_seed]<='9')
415 length_of_seed++;
416 gSeedSpecified = 1;
417 arg+=length_of_seed-1;
418 break;
419
420 case 'z':
421 gForceFTZ ^= 1;
422 break;
423
424 default:
425 vlog( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
426 PrintUsage();
427 return -1;
428 }
429 }
430 }
431 else
432 {
433 gArgList[gArgCount] = arg;
434 gArgCount++;
435 }
436 }
437 vlog( "\n\nTest binary built %s %s\n", __DATE__, __TIME__ );
438
439 PrintArch();
440
441 return 0;
442 }
443
PrintUsage(void)444 static void PrintUsage( void )
445 {
446 vlog( "%s [-z]: <optional: test names>\n", appName );
447 vlog( "\tOptions:\n" );
448 vlog( "\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by device capabilities by default.)\n" );
449 vlog( "\t\t-sNUMBER set random seed.\n");
450 vlog( "\n" );
451 vlog( "\tTest names:\n" );
452 for( int i = 0; i < test_num; i++ )
453 {
454 vlog( "\t\t%s\n", test_list[i].name );
455 }
456 }
457
458 const char *sizeNames[] = { "float", "float2", "float4", "float8", "float16" };
459 const char *sizeNames_double[] = { "double", "double2", "double4", "double8", "double16" };
460
InitCL(cl_device_id device)461 test_status InitCL( cl_device_id device )
462 {
463 int error;
464 uint32_t i, j;
465 int *bufSkip = NULL;
466 int isRTZ = 0;
467 RoundingMode oldRoundMode = kDefaultRoundingMode;
468
469 cl_device_fp_config floatCapabilities = 0;
470 if( (error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities, NULL)))
471 floatCapabilities = 0;
472 if(0 == (CL_FP_DENORM & floatCapabilities) )
473 gForceFTZ ^= 1;
474
475 // check for cl_khr_fp64
476 gHasDouble = is_extension_available(device, "cl_khr_fp64" );
477
478 if(0 == (CL_FP_INF_NAN & floatCapabilities) )
479 gSkipNanInf = 1;
480
481 // Embedded devices that flush to zero are allowed to have an undefined sign.
482 if (gIsEmbedded && gForceFTZ)
483 gIgnoreZeroSign = 1;
484
485 gContext = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
486 if( NULL == gContext || error )
487 {
488 vlog_error( "clCreateDeviceGroup failed. %d\n", error );
489 return TEST_FAIL;
490 }
491
492 gQueue = clCreateCommandQueue( gContext, device, 0, &error );
493 if( NULL == gQueue || error )
494 {
495 vlog_error( "clCreateContext failed. %d\n", error );
496 return TEST_FAIL;
497 }
498
499 // setup input buffers
500 bufA = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
501 bufB = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
502 bufC = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
503 bufD = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
504 bufE = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
505
506 if( bufA == NULL ||
507 bufB == NULL ||
508 bufC == NULL ||
509 bufD == NULL ||
510 bufE == NULL )
511 {
512 vlog_error( "clCreateArray failed for input\n" );
513 return TEST_FAIL;
514 }
515
516 if( gHasDouble )
517 {
518 bufC_double = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
519 bufD_double = clCreateBuffer( gContext, CL_MEM_READ_WRITE, BUFFER_SIZE, NULL, NULL );
520 if( bufC_double == NULL ||
521 bufD_double == NULL )
522 {
523 vlog_error( "clCreateArray failed for input DP\n" );
524 return TEST_FAIL;
525 }
526 }
527
528 const char *kernels[] = {
529 "", "#pragma OPENCL FP_CONTRACT OFF\n"
530 "__kernel void kernel1( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
531 "{\n"
532 " int i = get_global_id(0);\n"
533 " out[i] = a[i] * b[i] + c[i];\n"
534 "}\n"
535 "\n"
536 "__kernel void kernel2( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
537 "{\n"
538 " int i = get_global_id(0);\n"
539 " out[i] = a[i] * b[i] - c[i];\n"
540 "}\n"
541 "\n"
542 "__kernel void kernel3( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
543 "{\n"
544 " int i = get_global_id(0);\n"
545 " out[i] = c[i] + a[i] * b[i];\n"
546 "}\n"
547 "\n"
548 "__kernel void kernel4( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
549 "{\n"
550 " int i = get_global_id(0);\n"
551 " out[i] = c[i] - a[i] * b[i];\n"
552 "}\n"
553 "\n"
554 "__kernel void kernel5( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
555 "{\n"
556 " int i = get_global_id(0);\n"
557 " out[i] = -(a[i] * b[i] + c[i]);\n"
558 "}\n"
559 "\n"
560 "__kernel void kernel6( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
561 "{\n"
562 " int i = get_global_id(0);\n"
563 " out[i] = -(a[i] * b[i] - c[i]);\n"
564 "}\n"
565 "\n"
566 "__kernel void kernel7( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
567 "{\n"
568 " int i = get_global_id(0);\n"
569 " out[i] = -(c[i] + a[i] * b[i]);\n"
570 "}\n"
571 "\n"
572 "__kernel void kernel8( __global ", NULL, " *out, const __global ", NULL, " *a, const __global ", NULL, " *b, const __global ", NULL, " *c )\n"
573 "{\n"
574 " int i = get_global_id(0);\n"
575 " out[i] = -(c[i] - a[i] * b[i]);\n"
576 "}\n"
577 "\n" };
578
579 for (i = 0; i < sizeof(sizeNames) / sizeof(sizeNames[0]); i++)
580 {
581 size_t strCount = sizeof(kernels) / sizeof(kernels[0]);
582 kernels[0] = "";
583
584 for (j = 2; j < strCount; j += 2) kernels[j] = sizeNames[i];
585 error = create_single_kernel_helper(gContext, &gProgram[i], nullptr,
586 strCount, kernels, nullptr);
587 if (CL_SUCCESS != error || nullptr == gProgram[i])
588 {
589 log_error("Error: Unable to create test program! (%s) (in %s:%d)\n",
590 IGetErrorString(error), __FILE__, __LINE__);
591 return TEST_FAIL;
592 }
593 }
594
595 if (gHasDouble)
596 {
597 kernels[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
598 for (i = 0; i < sizeof(sizeNames_double) / sizeof(sizeNames_double[0]);
599 i++)
600 {
601 size_t strCount = sizeof(kernels) / sizeof(kernels[0]);
602
603 for (j = 2; j < strCount; j += 2) kernels[j] = sizeNames_double[i];
604 error = create_single_kernel_helper(gContext, &gProgram_double[i],
605 nullptr, strCount, kernels,
606 nullptr);
607 if (CL_SUCCESS != error || nullptr == gProgram_double[i])
608 {
609 log_error(
610 "Error: Unable to create test program! (%s) (in %s:%d)\n",
611 IGetErrorString(error), __FILE__, __LINE__);
612 return TEST_FAIL;
613 }
614 }
615 }
616
617 if( 0 == gSeedSpecified )
618 {
619 time_t currentTime = time( NULL );
620 struct tm *t = localtime(¤tTime);
621 gSeed = t->tm_sec + 60 * ( t->tm_min + 60 * (t->tm_hour + 24 * (t->tm_yday + 365 * t->tm_year)));
622 gSeed = (uint32_t) (((uint64_t) gSeed * (uint64_t) gSeed ) >> 16);
623 }
624 gMTdata = init_genrand( gSeed );
625
626
627 // Init bufA and bufB
628 {
629 buf1 = (float *)malloc( BUFFER_SIZE );
630 buf2 = (float *)malloc( BUFFER_SIZE );
631 buf3 = (float *)malloc( BUFFER_SIZE );
632 buf4 = (float *)malloc( BUFFER_SIZE );
633 buf5 = (float *)malloc( BUFFER_SIZE );
634 buf6 = (float *)malloc( BUFFER_SIZE );
635
636 bufSkip = (int *)malloc( BUFFER_SIZE );
637
638 if( NULL == buf1 || NULL == buf2 || NULL == buf3 || NULL == buf4 || NULL == buf5 || NULL == buf6 || NULL == bufSkip)
639 {
640 vlog_error( "Out of memory initializing buffers\n" );
641 return TEST_FAIL;
642 }
643 for( i = 0; i < sizeof( correct ) / sizeof( correct[0] ); i++ )
644 {
645 correct[i] = (float *)malloc( BUFFER_SIZE );
646 skipTest[i] = (int *)malloc( BUFFER_SIZE );
647 if(( NULL == correct[i] ) || ( NULL == skipTest[i]))
648 {
649 vlog_error( "Out of memory initializing buffers 2\n" );
650 return TEST_FAIL;
651 }
652 }
653
654 for( i = 0; i < BUFFER_SIZE / sizeof(float); i++ )
655 ((uint32_t*) buf1)[i] = genrand_int32( gMTdata );
656
657 if( (error = clEnqueueWriteBuffer(gQueue, bufA, CL_FALSE, 0, BUFFER_SIZE, buf1, 0, NULL, NULL) ))
658 {
659 vlog_error( "Failure %d at clEnqueueWriteBuffer1\n", error );
660 return TEST_FAIL;
661 }
662
663 for( i = 0; i < BUFFER_SIZE / sizeof(float); i++ )
664 ((uint32_t*) buf2)[i] = genrand_int32( gMTdata );
665
666 if( (error = clEnqueueWriteBuffer(gQueue, bufB, CL_FALSE, 0, BUFFER_SIZE, buf2, 0, NULL, NULL) ))
667 {
668 vlog_error( "Failure %d at clEnqueueWriteBuffer2\n", error );
669 return TEST_FAIL;
670 }
671
672 void *ftzInfo = NULL;
673 if( gForceFTZ )
674 ftzInfo = FlushToZero();
675 if ((CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device)) && gIsEmbedded) {
676 oldRoundMode = set_round(kRoundTowardZero, kfloat);
677 isRTZ = 1;
678 }
679 float *f = (float*) buf1;
680 float *f2 = (float*) buf2;
681 float *f3 = (float*) buf3;
682 float *f4 = (float*) buf4;
683 for( i = 0; i < BUFFER_SIZE / sizeof(float); i++ )
684 {
685 float q = f[i];
686 float q2 = f2[i];
687
688 feclearexcept(FE_OVERFLOW);
689 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
690 // VS2005 might use x87 for straight multiplies, and we can't
691 // turn that off
692 f3[i] = sse_mul(q, q2);
693 f4[i] = sse_mul(-q, q2);
694 #elif defined(__PPC__)
695 // None of the current generation PPC processors support HW
696 // FTZ, emulate it in sw.
697 f3[i] = ppc_mul(q, q2);
698 f4[i] = ppc_mul(-q, q2);
699 #else
700 f3[i] = q * q2;
701 f4[i] = -q * q2;
702 #endif
703 // Skip test if the device doesn't support infinities and NaN AND the result overflows
704 // or either input is an infinity of NaN
705 bufSkip[i] = (gSkipNanInf && ((FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW))) ||
706 (fabsf(q) == FLT_MAX) || (q != q) ||
707 (fabsf(q2) == FLT_MAX) || (q2 != q2)));
708 }
709
710 if( gForceFTZ )
711 UnFlushToZero(ftzInfo);
712
713 if (isRTZ)
714 (void)set_round(oldRoundMode, kfloat);
715
716
717 if( (error = clEnqueueWriteBuffer(gQueue, bufC, CL_FALSE, 0, BUFFER_SIZE, buf3, 0, NULL, NULL) ))
718 {
719 vlog_error( "Failure %d at clEnqueueWriteBuffer3\n", error );
720 return TEST_FAIL;
721 }
722 if( (error = clEnqueueWriteBuffer(gQueue, bufD, CL_FALSE, 0, BUFFER_SIZE, buf4, 0, NULL, NULL) ))
723 {
724 vlog_error( "Failure %d at clEnqueueWriteBuffer4\n", error );
725 return TEST_FAIL;
726 }
727
728 // Fill the buffers with NaN
729 float *f5 = (float*) buf5;
730 float nan_val = nanf("");
731 for( i = 0; i < BUFFER_SIZE / sizeof( float ); i++ )
732 f5[i] = nan_val;
733
734 // calculate reference results
735 for( i = 0; i < BUFFER_SIZE / sizeof( float ); i++ )
736 {
737 for ( j=0; j<8; j++)
738 {
739 feclearexcept(FE_OVERFLOW);
740 switch (j)
741 {
742 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
743 // VS2005 might use x87 for straight add/sub, and we can't
744 // turn that off
745 case 0:
746 correct[0][i] = sse_add(buf3[i],buf4[i]); break;
747 case 1:
748 correct[1][i] = sse_sub(buf3[i],buf3[i]); break;
749 case 2:
750 correct[2][i] = sse_add(buf4[i],buf3[i]); break;
751 case 3:
752 correct[3][i] = sse_sub(buf3[i],buf3[i]); break;
753 case 4:
754 correct[4][i] = -sse_add(buf3[i],buf4[i]); break;
755 case 5:
756 correct[5][i] = -sse_sub(buf3[i],buf3[i]); break;
757 case 6:
758 correct[6][i] = -sse_add(buf4[i],buf3[i]); break;
759 case 7:
760 correct[7][i] = -sse_sub(buf3[i],buf3[i]); break;
761 #else
762 case 0:
763 correct[0][i] = buf3[i] + buf4[i]; break;
764 case 1:
765 correct[1][i] = buf3[i] - buf3[i]; break;
766 case 2:
767 correct[2][i] = buf4[i] + buf3[i]; break;
768 case 3:
769 correct[3][i] = buf3[i] - buf3[i]; break;
770 case 4:
771 correct[4][i] = -(buf3[i] + buf4[i]); break;
772 case 5:
773 correct[5][i] = -(buf3[i] - buf3[i]); break;
774 case 6:
775 correct[6][i] = -(buf4[i] + buf3[i]); break;
776 case 7:
777 correct[7][i] = -(buf3[i] - buf3[i]); break;
778 #endif
779 }
780 // Further skip test inputs if the device doesn support infinities AND NaNs
781 // resulting sum overflows
782 skipTest[j][i] = (bufSkip[i] ||
783 (gSkipNanInf && (FE_OVERFLOW == (FE_OVERFLOW & fetestexcept(FE_OVERFLOW)))));
784
785 #if defined(__PPC__)
786 // Since the current Power processors don't emulate flush to zero in HW,
787 // it must be emulated in SW instead.
788 if (gForceFTZ)
789 {
790 if ((fabsf(correct[j][i]) < FLT_MIN) && (correct[j][i] != 0.0f))
791 correct[j][i] = copysignf(0.0f, correct[j][i]);
792 }
793 #endif
794 }
795 }
796 if( gHasDouble )
797 {
798 // Spec requires correct non-flushed results
799 // for doubles. We disable FTZ if this is default on
800 // the platform (like ARM) for reference result computation
801 // It is no-op if platform default is not FTZ (e.g. x86)
802 FPU_mode_type oldMode;
803 DisableFTZ( &oldMode );
804
805 buf3_double = (double *)malloc( BUFFER_SIZE );
806 buf4_double = (double *)malloc( BUFFER_SIZE );
807 buf5_double = (double *)malloc( BUFFER_SIZE );
808 buf6_double = (double *)malloc( BUFFER_SIZE );
809 if( NULL == buf3_double || NULL == buf4_double || NULL == buf5_double || NULL == buf6_double )
810 {
811 vlog_error( "Out of memory initializing DP buffers\n" );
812 return TEST_FAIL;
813 }
814 for( i = 0; i < sizeof( correct_double ) / sizeof( correct_double[0] ); i++ )
815 {
816 correct_double[i] = (double *)malloc( BUFFER_SIZE );
817 if( NULL == correct_double[i] )
818 {
819 vlog_error( "Out of memory initializing DP buffers 2\n" );
820 return TEST_FAIL;
821 }
822 }
823
824
825 double *f = (double*) buf1;
826 double *f2 = (double*) buf2;
827 double *f3 = (double*) buf3_double;
828 double *f4 = (double*) buf4_double;
829 for( i = 0; i < BUFFER_SIZE / sizeof(double); i++ )
830 {
831 double q = f[i];
832 double q2 = f2[i];
833 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
834 // VS2005 might use x87 for straight multiplies, and we can't
835 // turn that off
836 f3[i] = sse_mul_sd(q, q2);
837 f4[i] = sse_mul_sd(-q, q2);
838 #else
839 f3[i] = q * q2;
840 f4[i] = -q * q2;
841 #endif
842 }
843
844 if( (error = clEnqueueWriteBuffer(gQueue, bufC_double, CL_FALSE, 0, BUFFER_SIZE, buf3_double, 0, NULL, NULL) ))
845 {
846 vlog_error( "Failure %d at clEnqueueWriteBuffer3\n", error );
847 return TEST_FAIL;
848 }
849 if( (error = clEnqueueWriteBuffer(gQueue, bufD_double, CL_FALSE, 0, BUFFER_SIZE, buf4_double, 0, NULL, NULL) ))
850 {
851 vlog_error( "Failure %d at clEnqueueWriteBuffer4\n", error );
852 return TEST_FAIL;
853 }
854
855 // Fill the buffers with NaN
856 double *f5 = (double*) buf5_double;
857 double nan_val = nanf("");
858 for( i = 0; i < BUFFER_SIZE / sizeof( double ); i++ )
859 f5[i] = nan_val;
860
861 // calculate reference results
862 for( i = 0; i < BUFFER_SIZE / sizeof( double ); i++ )
863 {
864 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
865 // VS2005 might use x87 for straight add/sub, and we can't
866 // turn that off
867 correct_double[0][i] = sse_add_sd(buf3_double[i],buf4_double[i]);
868 correct_double[1][i] = sse_sub_sd(buf3_double[i],buf3_double[i]);
869 correct_double[2][i] = sse_add_sd(buf4_double[i],buf3_double[i]);
870 correct_double[3][i] = sse_sub_sd(buf3_double[i],buf3_double[i]);
871 correct_double[4][i] = -sse_add_sd(buf3_double[i],buf4_double[i]);
872 correct_double[5][i] = -sse_sub_sd(buf3_double[i],buf3_double[i]);
873 correct_double[6][i] = -sse_add_sd(buf4_double[i],buf3_double[i]);
874 correct_double[7][i] = -sse_sub_sd(buf3_double[i],buf3_double[i]);
875 #else
876 correct_double[0][i] = buf3_double[i] + buf4_double[i];
877 correct_double[1][i] = buf3_double[i] - buf3_double[i];
878 correct_double[2][i] = buf4_double[i] + buf3_double[i];
879 correct_double[3][i] = buf3_double[i] - buf3_double[i];
880 correct_double[4][i] = -(buf3_double[i] + buf4_double[i]);
881 correct_double[5][i] = -(buf3_double[i] - buf3_double[i]);
882 correct_double[6][i] = -(buf4_double[i] + buf3_double[i]);
883 correct_double[7][i] = -(buf3_double[i] - buf3_double[i]);
884 #endif
885 }
886
887 // Restore previous FP state since we modified it for
888 // reference result computation (see DisableFTZ call above)
889 RestoreFPState(&oldMode);
890 }
891 }
892
893 char c[1000];
894 static const char *no_yes[] = { "NO", "YES" };
895 vlog( "\nCompute Device info:\n" );
896 clGetDeviceInfo( device, CL_DEVICE_NAME, sizeof(c), (void *)&c, NULL);
897 vlog( "\tDevice Name: %s\n", c );
898 clGetDeviceInfo( device, CL_DEVICE_VENDOR, sizeof(c), (void *)&c, NULL);
899 vlog( "\tVendor: %s\n", c );
900 clGetDeviceInfo( device, CL_DEVICE_VERSION, sizeof(c), (void *)&c, NULL);
901 vlog( "\tDevice Version: %s\n", c );
902 clGetDeviceInfo( device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
903 vlog( "\tCL C Version: %s\n", c );
904 clGetDeviceInfo( device, CL_DRIVER_VERSION, sizeof(c), (void *)&c, NULL);
905 vlog( "\tDriver Version: %s\n", c );
906 vlog( "\tSubnormal values supported? %s\n", no_yes[0 != (CL_FP_DENORM & floatCapabilities)] );
907 vlog( "\tTesting with FTZ mode ON? %s\n", no_yes[0 != gForceFTZ] );
908 vlog( "\tTesting Doubles? %s\n", no_yes[0 != gHasDouble] );
909 vlog( "\tRandom Number seed: 0x%8.8x\n", gSeed );
910 vlog( "\n\n" );
911
912 return TEST_PASS;
913 }
914
ReleaseCL(void)915 static void ReleaseCL( void )
916 {
917 clReleaseMemObject(bufA);
918 clReleaseMemObject(bufB);
919 clReleaseMemObject(bufC);
920 clReleaseMemObject(bufD);
921 clReleaseMemObject(bufE);
922 clReleaseProgram(gProgram[0]);
923 clReleaseProgram(gProgram[1]);
924 clReleaseProgram(gProgram[2]);
925 clReleaseProgram(gProgram[3]);
926 clReleaseProgram(gProgram[4]);
927 if( gHasDouble )
928 {
929 clReleaseMemObject(bufC_double);
930 clReleaseMemObject(bufD_double);
931 clReleaseProgram(gProgram_double[0]);
932 clReleaseProgram(gProgram_double[1]);
933 clReleaseProgram(gProgram_double[2]);
934 clReleaseProgram(gProgram_double[3]);
935 clReleaseProgram(gProgram_double[4]);
936 }
937 clReleaseCommandQueue(gQueue);
938 clReleaseContext(gContext);
939 }
940
941
RunTest(int testNumber)942 static int RunTest( int testNumber )
943 {
944 size_t i;
945 int error = 0;
946 cl_mem args[4];
947 float *c;
948 const char *kernelName[] = { "kernel1", "kernel2", "kernel3", "kernel4",
949 "kernel5", "kernel6", "kernel7", "kernel8" };
950 switch( testNumber )
951 {
952 case 0: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD; c = buf4; break; // a * b + c
953 case 1: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC; c = buf3; break;
954 case 2: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD; c = buf4; break;
955 case 3: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC; c = buf3; break;
956 case 4: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD; c = buf4; break;
957 case 5: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC; c = buf3; break;
958 case 6: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD; c = buf4; break;
959 case 7: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC; c = buf3; break;
960 default:
961 vlog_error( "Unknown test case %d passed to RunTest\n", testNumber );
962 return -1;
963 }
964
965
966 int vectorSize;
967 for( vectorSize = 0; vectorSize < 5; vectorSize++ )
968 {
969 cl_kernel k = clCreateKernel( gProgram[ vectorSize ], kernelName[ testNumber ], &error );
970 if( NULL == k || error )
971 {
972 vlog_error( "%d) Unable to find kernel \"%s\" for vector size: %d\n", error, kernelName[ testNumber ], 1 << vectorSize );
973 return -2;
974 }
975
976 // set the kernel args
977 for( i = 0; i < sizeof(args ) / sizeof( args[0]); i++ )
978 if( (error = clSetKernelArg(k, i, sizeof( cl_mem ), args + i) ))
979 {
980 vlog_error( "Error %d setting kernel arg # %ld\n", error, i );
981 return error;
982 }
983
984 // write NaNs to the result array
985 if( (error = clEnqueueWriteBuffer(gQueue, bufE, CL_TRUE, 0, BUFFER_SIZE, buf5, 0, NULL, NULL) ))
986 {
987 vlog_error( "Failure %d at clWriteArray %d\n", error, testNumber );
988 return error;
989 }
990
991 // execute the kernel
992 size_t gDim[3] = { BUFFER_SIZE / (sizeof( cl_float ) * (1<<vectorSize)), 0, 0 };
993 if( ((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, gDim, NULL, 0, NULL, NULL) )))
994 {
995 vlog_error( "Got Error # %d trying to execture kernel\n", error );
996 return error;
997 }
998
999 // read the data back
1000 if( (error = clEnqueueReadBuffer(gQueue, bufE, CL_TRUE, 0, BUFFER_SIZE, buf6, 0, NULL, NULL ) ))
1001 {
1002 vlog_error( "Failure %d at clReadArray %d\n", error, testNumber );
1003 return error;
1004 }
1005
1006 // verify results
1007 float *test = (float*) buf6;
1008 float *a = (float*) buf1;
1009 float *b = (float*) buf2;
1010 for( i = 0; i < BUFFER_SIZE / sizeof( float ); i++ )
1011 {
1012 if( isnan(test[i]) && isnan(correct[testNumber][i] ) )
1013 continue;
1014
1015 if( skipTest[testNumber][i] )
1016 continue;
1017
1018 // sign of zero must be correct
1019 if(( ((uint32_t*) test)[i] != ((uint32_t*) correct[testNumber])[i] ) &&
1020 !(gIgnoreZeroSign && (test[i] == 0.0f) && (correct[testNumber][i] == 0.0f)) )
1021 {
1022 switch( testNumber )
1023 {
1024 // Zeros for these should be positive
1025 case 0: vlog_error( "%ld) Error for %s %s: %a * %a + %a = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1026 a[i], b[i], c[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1027 case 1: vlog_error( "%ld) Error for %s %s: %a * %a - %a = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1028 a[i], b[i], c[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1029 case 2: vlog_error( "%ld) Error for %s %s: %a + %a * %a = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1030 c[i], a[i], b[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1031 case 3: vlog_error( "%ld) Error for %s %s: %a - %a * %a = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1032 c[i], a[i], b[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1033
1034 // Zeros for these should be negative
1035 case 4: vlog_error( "%ld) Error for %s %s: -(%a * %a + %a) = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1036 a[i], b[i], c[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1037 case 5: vlog_error( "%ld) Error for %s %s: -(%a * %a - %a) = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1038 a[i], b[i], c[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1039 case 6: vlog_error( "%ld) Error for %s %s: -(%a + %a * %a) = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1040 c[i], a[i], b[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1041 case 7: vlog_error( "%ld) Error for %s %s: -(%a - %a * %a) = *%a vs. %a\n", i, sizeNames[ vectorSize], kernelName[ testNumber ],
1042 c[i], a[i], b[i], correct[testNumber][i], test[i] ); clReleaseKernel(k); return -1;
1043 default:
1044 vlog_error( "error: Unknown test number!\n" );
1045 clReleaseKernel(k);
1046 return -2;
1047 }
1048 }
1049 }
1050
1051 clReleaseKernel(k);
1052 }
1053
1054 return error;
1055 }
1056
RunTest_Double(int testNumber)1057 static int RunTest_Double( int testNumber )
1058 {
1059 if( !gHasDouble )
1060 {
1061 vlog("Double is not supported, test not run.\n");
1062 return 0;
1063 }
1064
1065 size_t i;
1066 int error = 0;
1067 cl_mem args[4];
1068 double *c;
1069 const char *kernelName[] = { "kernel1", "kernel2", "kernel3", "kernel4",
1070 "kernel5", "kernel6", "kernel7", "kernel8" };
1071
1072 switch( testNumber )
1073 {
1074 case 0: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD_double; c = buf4_double; break; // a * b + c
1075 case 1: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC_double; c = buf3_double; break;
1076 case 2: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD_double; c = buf4_double; break;
1077 case 3: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC_double; c = buf3_double; break;
1078 case 4: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD_double; c = buf4_double; break;
1079 case 5: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC_double; c = buf3_double; break;
1080 case 6: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufD_double; c = buf4_double; break;
1081 case 7: args[0] = bufE; args[1] = bufA; args[2] = bufB; args[3] = bufC_double; c = buf3_double; break;
1082 default:
1083 vlog_error( "Unknown test case %d passed to RunTest\n", testNumber );
1084 return -1;
1085 }
1086
1087 int vectorSize;
1088 for( vectorSize = 0; vectorSize < 5; vectorSize++ )
1089 {
1090 cl_kernel k = clCreateKernel( gProgram_double[ vectorSize ], kernelName[ testNumber ], &error );
1091 if( NULL == k || error )
1092 {
1093 vlog_error( "%d) Unable to find kernel \"%s\" for vector size: %d\n", error, kernelName[ testNumber ], 1 << vectorSize );
1094 return -2;
1095 }
1096
1097 // set the kernel args
1098 for( i = 0; i < sizeof(args ) / sizeof( args[0]); i++ )
1099 if( (error = clSetKernelArg(k, i, sizeof( cl_mem ), args + i) ))
1100 {
1101 vlog_error( "Error %d setting kernel arg # %ld\n", error, i );
1102 return error;
1103 }
1104
1105 // write NaNs to the result array
1106 if( (error = clEnqueueWriteBuffer(gQueue, bufE, CL_FALSE, 0, BUFFER_SIZE, buf5_double, 0, NULL, NULL) ))
1107 {
1108 vlog_error( "Failure %d at clWriteArray %d\n", error, testNumber );
1109 return error;
1110 }
1111
1112 // execute the kernel
1113 size_t gDim[3] = { BUFFER_SIZE / (sizeof( cl_double ) * (1<<vectorSize)), 0, 0 };
1114 if( ((error = clEnqueueNDRangeKernel(gQueue, k, 1, NULL, gDim, NULL, 0, NULL, NULL) )))
1115 {
1116 vlog_error( "Got Error # %d trying to execture kernel\n", error );
1117 return error;
1118 }
1119
1120 // read the data back
1121 if( (error = clEnqueueReadBuffer(gQueue, bufE, CL_TRUE, 0, BUFFER_SIZE, buf6_double, 0, NULL, NULL ) ))
1122 {
1123 vlog_error( "Failure %d at clReadArray %d\n", error, testNumber );
1124 return error;
1125 }
1126
1127 // verify results
1128 double *test = (double*) buf6_double;
1129 double *a = (double*) buf1;
1130 double *b = (double*) buf2;
1131 for( i = 0; i < BUFFER_SIZE / sizeof( double ); i++ )
1132 {
1133 if( isnan(test[i]) && isnan(correct_double[testNumber][i] ) )
1134 continue;
1135
1136 // sign of zero must be correct
1137 if( ((uint64_t*) test)[i] != ((uint64_t*) correct_double[testNumber])[i] )
1138 {
1139 switch( testNumber )
1140 {
1141 // Zeros for these should be positive
1142 case 0: vlog_error( "%ld) Error for %s %s: %a * %a + %a = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1143 a[i], b[i], c[i], correct[testNumber][i], test[i] ); return -1;
1144 case 1: vlog_error( "%ld) Error for %s %s: %a * %a - %a = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1145 a[i], b[i], c[i], correct[testNumber][i], test[i] ); return -1;
1146 case 2: vlog_error( "%ld) Error for %s %s: %a + %a * %a = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1147 c[i], a[i], b[i], correct[testNumber][i], test[i] ); return -1;
1148 case 3: vlog_error( "%ld) Error for %s %s: %a - %a * %a = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1149 c[i], a[i], b[i], correct[testNumber][i], test[i] ); return -1;
1150
1151 // Zeros for these should be negative
1152 case 4: vlog_error( "%ld) Error for %s %s: -(%a * %a + %a) = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1153 a[i], b[i], c[i], correct[testNumber][i], test[i] ); return -1;
1154 case 5: vlog_error( "%ld) Error for %s %s: -(%a * %a - %a) = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1155 a[i], b[i], c[i], correct[testNumber][i], test[i] ); return -1;
1156 case 6: vlog_error( "%ld) Error for %s %s: -(%a + %a * %a) = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1157 c[i], a[i], b[i], correct[testNumber][i], test[i] ); return -1;
1158 case 7: vlog_error( "%ld) Error for %s %s: -(%a - %a * %a) = *%a vs. %a\n", i, sizeNames_double[ vectorSize], kernelName[ testNumber ],
1159 c[i], a[i], b[i], correct[testNumber][i], test[i] ); return -1;
1160 default:
1161 vlog_error( "error: Unknown test number!\n" );
1162 return -2;
1163 }
1164 }
1165 }
1166
1167 clReleaseKernel(k);
1168 }
1169
1170 return error;
1171 }
1172