1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25 static int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
26
27
28 static const char *step_kernel_code =
29 "__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n"
30 "{\n"
31 " int tid = get_global_id(0);\n"
32 "\n"
33 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
34 "}\n";
35
36 static const char *step2_kernel_code =
37 "__kernel void test_step2(__global float *srcA, __global float2 *srcB, __global float2 *dst)\n"
38 "{\n"
39 " int tid = get_global_id(0);\n"
40 "\n"
41 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
42 "}\n";
43
44 static const char *step4_kernel_code =
45 "__kernel void test_step4(__global float *srcA, __global float4 *srcB, __global float4 *dst)\n"
46 "{\n"
47 " int tid = get_global_id(0);\n"
48 "\n"
49 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
50 "}\n";
51
52 static const char *step8_kernel_code =
53 "__kernel void test_step8(__global float *srcA, __global float8 *srcB, __global float8 *dst)\n"
54 "{\n"
55 " int tid = get_global_id(0);\n"
56 "\n"
57 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
58 "}\n";
59
60 static const char *step16_kernel_code =
61 "__kernel void test_step16(__global float *srcA, __global float16 *srcB, __global float16 *dst)\n"
62 "{\n"
63 " int tid = get_global_id(0);\n"
64 "\n"
65 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
66 "}\n";
67
68 static const char *step3_kernel_code =
69 "__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n"
70 "{\n"
71 " int tid = get_global_id(0);\n"
72 "\n"
73 " vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n"
74 "}\n";
75
76
77 static int
verify_step(cl_float * inptrA,cl_float * inptrB,cl_float * outptr,int n,int veclen)78 verify_step( cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n, int veclen)
79 {
80 float r;
81 int i, j;
82
83 for (i=0; i<n; ) {
84 int ii = i/veclen;
85 for (j=0; j<veclen && i<n; ++j, ++i) {
86 r = (inptrB[i] < inptrA[ii]) ? 0.0f : 1.0f;
87 if (r != outptr[i])
88 {
89 log_error( "Failure @ {%d, element %d}: step(%a,%a) -> *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] );
90 return -1;
91 }
92 }
93 }
94
95 return 0;
96 }
97
test_stepf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)98 int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
99 {
100 cl_mem streams[3];
101 cl_float *input_ptr[2], *output_ptr, *p;
102 cl_program program[kTotalVecCount];
103 cl_kernel kernel[kTotalVecCount];
104 size_t threads[1];
105 int num_elements;
106 int err;
107 int i;
108 MTdata d;
109 num_elements = n_elems * 16;
110
111 input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
112 input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
113 output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
114 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
115 sizeof(cl_float) * num_elements, NULL, NULL);
116 if (!streams[0])
117 {
118 log_error("clCreateBuffer failed\n");
119 return -1;
120 }
121 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
122 sizeof(cl_float) * num_elements, NULL, NULL);
123 if (!streams[1])
124 {
125 log_error("clCreateBuffer failed\n");
126 return -1;
127 }
128 streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
129 sizeof(cl_float) * num_elements, NULL, NULL);
130 if (!streams[2])
131 {
132 log_error("clCreateBuffer failed\n");
133 return -1;
134 }
135
136 p = input_ptr[0];
137 d = init_genrand( gRandomSeed );
138 for (i=0; i<num_elements; i++)
139 {
140 p[i] = get_random_float(-0x40000000, 0x40000000, d);
141 }
142 p = input_ptr[1];
143 for (i=0; i<num_elements; i++)
144 {
145 p[i] = get_random_float(-0x40000000, 0x40000000, d);
146 }
147 free_mtdata(d); d = NULL;
148
149 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
150 if (err != CL_SUCCESS)
151 {
152 log_error("clWriteArray failed\n");
153 return -1;
154 }
155 err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
156 if (err != CL_SUCCESS)
157 {
158 log_error("clWriteArray failed\n");
159 return -1;
160 }
161
162 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
163 if (err)
164 return -1;
165 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
166 if (err)
167 return -1;
168 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
169 if (err)
170 return -1;
171 err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" );
172 if (err)
173 return -1;
174 err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" );
175 if (err)
176 return -1;
177 err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" );
178 if (err)
179 return -1;
180
181 for (i=0; i <kTotalVecCount; i++)
182 {
183 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
184 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
185 err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
186 if (err != CL_SUCCESS)
187 {
188 log_error("clSetKernelArgs failed\n");
189 return -1;
190 }
191 }
192
193 threads[0] = (size_t)n_elems;
194 for (i=0; i<kTotalVecCount; i++)
195 {
196 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
197 if (err != CL_SUCCESS)
198 {
199 log_error("clEnqueueNDRangeKernel failed\n");
200 return -1;
201 }
202
203 err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
204 if (err != CL_SUCCESS)
205 {
206 log_error("clEnqueueReadBuffer failed\n");
207 return -1;
208 }
209
210 switch (i)
211 {
212 case 0:
213 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems, 1);
214 if (err)
215 log_error("STEP float test failed\n");
216 else
217 log_info("STEP float test passed\n");
218 break;
219
220 case 1:
221 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2, 2);
222 if (err)
223 log_error("STEP float2 test failed\n");
224 else
225 log_info("STEP float2 test passed\n");
226 break;
227
228 case 2:
229 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4, 4);
230 if (err)
231 log_error("STEP float4 test failed\n");
232 else
233 log_info("STEP float4 test passed\n");
234 break;
235
236 case 3:
237 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8, 8);
238 if (err)
239 log_error("STEP float8 test failed\n");
240 else
241 log_info("STEP float8 test passed\n");
242 break;
243
244 case 4:
245 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16, 16);
246 if (err)
247 log_error("STEP float16 test failed\n");
248 else
249 log_info("STEP float16 test passed\n");
250 break;
251
252 case 5:
253 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3, 3);
254 if (err)
255 log_error("STEP float3 test failed\n");
256 else
257 log_info("STEP float3 test passed\n");
258 break;
259 }
260
261 if (err)
262 break;
263 }
264
265 clReleaseMemObject(streams[0]);
266 clReleaseMemObject(streams[1]);
267 clReleaseMemObject(streams[2]);
268 for (i=0; i<kTotalVecCount; i++)
269 {
270 clReleaseKernel(kernel[i]);
271 clReleaseProgram(program[i]);
272 }
273 free(input_ptr[0]);
274 free(input_ptr[1]);
275 free(output_ptr);
276
277 if(err)
278 return err;
279
280 if( ! is_extension_available( device, "cl_khr_fp64" ))
281 {
282 log_info( "Device does not support cl_khr_fp64. Skipping double precision tests.\n" );
283 return 0;
284 }
285
286 return test_stepf_double( device, context, queue, n_elems);
287 }
288
289 #pragma mark -
290
291 static const char *step_kernel_code_double =
292 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
293 "__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
294 "{\n"
295 " int tid = get_global_id(0);\n"
296 "\n"
297 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
298 "}\n";
299
300 static const char *step2_kernel_code_double =
301 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
302 "__kernel void test_step2_double(__global double *srcA, __global double2 *srcB, __global double2 *dst)\n"
303 "{\n"
304 " int tid = get_global_id(0);\n"
305 "\n"
306 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
307 "}\n";
308
309 static const char *step4_kernel_code_double =
310 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
311 "__kernel void test_step4_double(__global double *srcA, __global double4 *srcB, __global double4 *dst)\n"
312 "{\n"
313 " int tid = get_global_id(0);\n"
314 "\n"
315 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
316 "}\n";
317
318 static const char *step8_kernel_code_double =
319 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
320 "__kernel void test_step8_double(__global double *srcA, __global double8 *srcB, __global double8 *dst)\n"
321 "{\n"
322 " int tid = get_global_id(0);\n"
323 "\n"
324 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
325 "}\n";
326
327 static const char *step16_kernel_code_double =
328 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
329 "__kernel void test_step16_double(__global double *srcA, __global double16 *srcB, __global double16 *dst)\n"
330 "{\n"
331 " int tid = get_global_id(0);\n"
332 "\n"
333 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
334 "}\n";
335
336 static const char *step3_kernel_code_double =
337 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
338 "__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
339 "{\n"
340 " int tid = get_global_id(0);\n"
341 "\n"
342 " vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n"
343 "}\n";
344
345
346 static int
verify_step_double(cl_double * inptrA,cl_double * inptrB,cl_double * outptr,int n,int veclen)347 verify_step_double(cl_double *inptrA, cl_double *inptrB, cl_double *outptr, int n, int veclen)
348 {
349 double r;
350 int i, j;
351
352 for (i=0; i<n; ) {
353 int ii = i/veclen;
354 for (j=0; j<veclen && i<n; ++j, ++i) {
355 r = (inptrB[i] < inptrA[ii]) ? 0.0 : 1.0;
356 if (r != outptr[i])
357 {
358 log_error( "Failure @ {%d, element %d}: step(%a,%a) -> *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] );
359 return -1;
360 }
361 }
362 }
363
364 return 0;
365 }
366
test_stepf_double(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)367 int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
368 {
369 cl_mem streams[3];
370 cl_double *input_ptr[2], *output_ptr, *p;
371 cl_program program[kTotalVecCount];
372 cl_kernel kernel[kTotalVecCount];
373 size_t threads[1];
374 int num_elements;
375 int err;
376 int i;
377 MTdata d;
378 num_elements = n_elems * 16;
379
380 input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
381 input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
382 output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
383 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
384 sizeof(cl_double) * num_elements, NULL, NULL);
385 if (!streams[0])
386 {
387 log_error("clCreateBuffer failed\n");
388 return -1;
389 }
390 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
391 sizeof(cl_double) * num_elements, NULL, NULL);
392 if (!streams[1])
393 {
394 log_error("clCreateBuffer failed\n");
395 return -1;
396 }
397 streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
398 sizeof(cl_double) * num_elements, NULL, NULL);
399 if (!streams[2])
400 {
401 log_error("clCreateBuffer failed\n");
402 return -1;
403 }
404
405 p = input_ptr[0];
406 d = init_genrand( gRandomSeed );
407 for (i=0; i<num_elements; i++)
408 p[i] = get_random_double(-0x40000000, 0x40000000, d);
409
410 p = input_ptr[1];
411 for (i=0; i<num_elements; i++)
412 p[i] = get_random_double(-0x40000000, 0x40000000, d);
413
414 free_mtdata(d); d = NULL;
415
416 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
417 if (err != CL_SUCCESS)
418 {
419 log_error("clWriteArray failed\n");
420 return -1;
421 }
422 err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
423 if (err != CL_SUCCESS)
424 {
425 log_error("clWriteArray failed\n");
426 return -1;
427 }
428
429 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" );
430 if (err)
431 return -1;
432 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" );
433 if (err)
434 return -1;
435 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" );
436 if (err)
437 return -1;
438 err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" );
439 if (err)
440 return -1;
441 err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" );
442 if (err)
443 return -1;
444 err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" );
445 if (err)
446 return -1;
447
448 for (i=0; i <kTotalVecCount; i++)
449 {
450 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
451 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
452 err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
453 if (err != CL_SUCCESS)
454 {
455 log_error("clSetKernelArgs failed\n");
456 return -1;
457 }
458 }
459
460 threads[0] = (size_t)n_elems;
461 for (i=0; i<kTotalVecCount; i++)
462 {
463 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
464 if (err != CL_SUCCESS)
465 {
466 log_error("clEnqueueNDRangeKernel failed\n");
467 return -1;
468 }
469
470 err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
471 if (err != CL_SUCCESS)
472 {
473 log_error("clEnqueueReadBuffer failed\n");
474 return -1;
475 }
476
477 switch (i)
478 {
479 case 0:
480 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems, 1);
481 if (err)
482 log_error("STEP double test failed\n");
483 else
484 log_info("STEP double test passed\n");
485 break;
486
487 case 1:
488 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*2, 2);
489 if (err)
490 log_error("STEP double2 test failed\n");
491 else
492 log_info("STEP double2 test passed\n");
493 break;
494
495 case 2:
496 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*4, 4);
497 if (err)
498 log_error("STEP double4 test failed\n");
499 else
500 log_info("STEP double4 test passed\n");
501 break;
502
503 case 3:
504 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*8, 8);
505 if (err)
506 log_error("STEP double8 test failed\n");
507 else
508 log_info("STEP double8 test passed\n");
509 break;
510
511 case 4:
512 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*16, 16);
513 if (err)
514 log_error("STEP double16 test failed\n");
515 else
516 log_info("STEP double16 test passed\n");
517 break;
518
519 case 5:
520 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*3, 3);
521 if (err)
522 log_error("STEP double3 test failed\n");
523 else
524 log_info("STEP double3 test passed\n");
525 break;
526 }
527
528 if (err)
529 break;
530 }
531
532 clReleaseMemObject(streams[0]);
533 clReleaseMemObject(streams[1]);
534 clReleaseMemObject(streams[2]);
535 for (i=0; i<kTotalVecCount; i++)
536 {
537 clReleaseKernel(kernel[i]);
538 clReleaseProgram(program[i]);
539 }
540 free(input_ptr[0]);
541 free(input_ptr[1]);
542 free(output_ptr);
543
544 return err;
545 }
546
547