1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25 static int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
26
27
28 static const char *step_kernel_code =
29 "__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n"
30 "{\n"
31 " int tid = get_global_id(0);\n"
32 "\n"
33 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
34 "}\n";
35
36 static const char *step2_kernel_code =
37 "__kernel void test_step2(__global float *srcA, __global float2 *srcB, __global float2 *dst)\n"
38 "{\n"
39 " int tid = get_global_id(0);\n"
40 "\n"
41 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
42 "}\n";
43
44 static const char *step4_kernel_code =
45 "__kernel void test_step4(__global float *srcA, __global float4 *srcB, __global float4 *dst)\n"
46 "{\n"
47 " int tid = get_global_id(0);\n"
48 "\n"
49 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
50 "}\n";
51
52 static const char *step8_kernel_code =
53 "__kernel void test_step8(__global float *srcA, __global float8 *srcB, __global float8 *dst)\n"
54 "{\n"
55 " int tid = get_global_id(0);\n"
56 "\n"
57 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
58 "}\n";
59
60 static const char *step16_kernel_code =
61 "__kernel void test_step16(__global float *srcA, __global float16 *srcB, __global float16 *dst)\n"
62 "{\n"
63 " int tid = get_global_id(0);\n"
64 "\n"
65 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
66 "}\n";
67
68 static const char *step3_kernel_code =
69 "__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n"
70 "{\n"
71 " int tid = get_global_id(0);\n"
72 "\n"
73 " vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n"
74 "}\n";
75
76
77 static int
verify_step(cl_float * inptrA,cl_float * inptrB,cl_float * outptr,int n,int veclen)78 verify_step( cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n, int veclen)
79 {
80 float r;
81 int i, j;
82
83 for (i=0; i<n; ) {
84 int ii = i/veclen;
85 for (j=0; j<veclen && i<n; ++j, ++i) {
86 r = (inptrB[i] < inptrA[ii]) ? 0.0f : 1.0f;
87 if (r != outptr[i])
88 {
89 log_error( "Failure @ {%d, element %d}: step(%a,%a) -> *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] );
90 return -1;
91 }
92 }
93 }
94
95 return 0;
96 }
97
test_stepf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)98 int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
99 {
100 cl_mem streams[3];
101 cl_float *input_ptr[2], *output_ptr, *p;
102 cl_program program[kTotalVecCount];
103 cl_kernel kernel[kTotalVecCount];
104 size_t threads[1];
105 int num_elements;
106 int err;
107 int i;
108 MTdata d;
109 num_elements = n_elems * 16;
110
111 input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
112 input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
113 output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
114 streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL );
115 if (!streams[0])
116 {
117 log_error("clCreateBuffer failed\n");
118 return -1;
119 }
120 streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL );
121 if (!streams[1])
122 {
123 log_error("clCreateBuffer failed\n");
124 return -1;
125 }
126 streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL );
127 if (!streams[2])
128 {
129 log_error("clCreateBuffer failed\n");
130 return -1;
131 }
132
133 p = input_ptr[0];
134 d = init_genrand( gRandomSeed );
135 for (i=0; i<num_elements; i++)
136 {
137 p[i] = get_random_float(-0x40000000, 0x40000000, d);
138 }
139 p = input_ptr[1];
140 for (i=0; i<num_elements; i++)
141 {
142 p[i] = get_random_float(-0x40000000, 0x40000000, d);
143 }
144 free_mtdata(d); d = NULL;
145
146 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
147 if (err != CL_SUCCESS)
148 {
149 log_error("clWriteArray failed\n");
150 return -1;
151 }
152 err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
153 if (err != CL_SUCCESS)
154 {
155 log_error("clWriteArray failed\n");
156 return -1;
157 }
158
159 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
160 if (err)
161 return -1;
162 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
163 if (err)
164 return -1;
165 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
166 if (err)
167 return -1;
168 err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" );
169 if (err)
170 return -1;
171 err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" );
172 if (err)
173 return -1;
174 err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" );
175 if (err)
176 return -1;
177
178 for (i=0; i <kTotalVecCount; i++)
179 {
180 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
181 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
182 err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
183 if (err != CL_SUCCESS)
184 {
185 log_error("clSetKernelArgs failed\n");
186 return -1;
187 }
188 }
189
190 threads[0] = (size_t)n_elems;
191 for (i=0; i<kTotalVecCount; i++)
192 {
193 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
194 if (err != CL_SUCCESS)
195 {
196 log_error("clEnqueueNDRangeKernel failed\n");
197 return -1;
198 }
199
200 err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
201 if (err != CL_SUCCESS)
202 {
203 log_error("clEnqueueReadBuffer failed\n");
204 return -1;
205 }
206
207 switch (i)
208 {
209 case 0:
210 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems, 1);
211 if (err)
212 log_error("STEP float test failed\n");
213 else
214 log_info("STEP float test passed\n");
215 break;
216
217 case 1:
218 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2, 2);
219 if (err)
220 log_error("STEP float2 test failed\n");
221 else
222 log_info("STEP float2 test passed\n");
223 break;
224
225 case 2:
226 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4, 4);
227 if (err)
228 log_error("STEP float4 test failed\n");
229 else
230 log_info("STEP float4 test passed\n");
231 break;
232
233 case 3:
234 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8, 8);
235 if (err)
236 log_error("STEP float8 test failed\n");
237 else
238 log_info("STEP float8 test passed\n");
239 break;
240
241 case 4:
242 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16, 16);
243 if (err)
244 log_error("STEP float16 test failed\n");
245 else
246 log_info("STEP float16 test passed\n");
247 break;
248
249 case 5:
250 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3, 3);
251 if (err)
252 log_error("STEP float3 test failed\n");
253 else
254 log_info("STEP float3 test passed\n");
255 break;
256 }
257
258 if (err)
259 break;
260 }
261
262 clReleaseMemObject(streams[0]);
263 clReleaseMemObject(streams[1]);
264 clReleaseMemObject(streams[2]);
265 for (i=0; i<kTotalVecCount; i++)
266 {
267 clReleaseKernel(kernel[i]);
268 clReleaseProgram(program[i]);
269 }
270 free(input_ptr[0]);
271 free(input_ptr[1]);
272 free(output_ptr);
273
274 if(err)
275 return err;
276
277 if( ! is_extension_available( device, "cl_khr_fp64" ))
278 {
279 log_info( "Device does not support cl_khr_fp64. Skipping double precision tests.\n" );
280 return 0;
281 }
282
283 return test_stepf_double( device, context, queue, n_elems);
284 }
285
286 #pragma mark -
287
288 static const char *step_kernel_code_double =
289 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
290 "__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
291 "{\n"
292 " int tid = get_global_id(0);\n"
293 "\n"
294 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
295 "}\n";
296
297 static const char *step2_kernel_code_double =
298 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
299 "__kernel void test_step2_double(__global double *srcA, __global double2 *srcB, __global double2 *dst)\n"
300 "{\n"
301 " int tid = get_global_id(0);\n"
302 "\n"
303 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
304 "}\n";
305
306 static const char *step4_kernel_code_double =
307 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
308 "__kernel void test_step4_double(__global double *srcA, __global double4 *srcB, __global double4 *dst)\n"
309 "{\n"
310 " int tid = get_global_id(0);\n"
311 "\n"
312 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
313 "}\n";
314
315 static const char *step8_kernel_code_double =
316 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
317 "__kernel void test_step8_double(__global double *srcA, __global double8 *srcB, __global double8 *dst)\n"
318 "{\n"
319 " int tid = get_global_id(0);\n"
320 "\n"
321 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
322 "}\n";
323
324 static const char *step16_kernel_code_double =
325 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
326 "__kernel void test_step16_double(__global double *srcA, __global double16 *srcB, __global double16 *dst)\n"
327 "{\n"
328 " int tid = get_global_id(0);\n"
329 "\n"
330 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
331 "}\n";
332
333 static const char *step3_kernel_code_double =
334 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
335 "__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
336 "{\n"
337 " int tid = get_global_id(0);\n"
338 "\n"
339 " vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n"
340 "}\n";
341
342
343 static int
verify_step_double(cl_double * inptrA,cl_double * inptrB,cl_double * outptr,int n,int veclen)344 verify_step_double(cl_double *inptrA, cl_double *inptrB, cl_double *outptr, int n, int veclen)
345 {
346 double r;
347 int i, j;
348
349 for (i=0; i<n; ) {
350 int ii = i/veclen;
351 for (j=0; j<veclen && i<n; ++j, ++i) {
352 r = (inptrB[i] < inptrA[ii]) ? 0.0 : 1.0;
353 if (r != outptr[i])
354 {
355 log_error( "Failure @ {%d, element %d}: step(%a,%a) -> *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] );
356 return -1;
357 }
358 }
359 }
360
361 return 0;
362 }
363
test_stepf_double(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)364 int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
365 {
366 cl_mem streams[3];
367 cl_double *input_ptr[2], *output_ptr, *p;
368 cl_program program[kTotalVecCount];
369 cl_kernel kernel[kTotalVecCount];
370 size_t threads[1];
371 int num_elements;
372 int err;
373 int i;
374 MTdata d;
375 num_elements = n_elems * 16;
376
377 input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
378 input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
379 output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
380 streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * num_elements, NULL, NULL );
381 if (!streams[0])
382 {
383 log_error("clCreateBuffer failed\n");
384 return -1;
385 }
386 streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * num_elements, NULL, NULL );
387 if (!streams[1])
388 {
389 log_error("clCreateBuffer failed\n");
390 return -1;
391 }
392 streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * num_elements, NULL, NULL );
393 if (!streams[2])
394 {
395 log_error("clCreateBuffer failed\n");
396 return -1;
397 }
398
399 p = input_ptr[0];
400 d = init_genrand( gRandomSeed );
401 for (i=0; i<num_elements; i++)
402 p[i] = get_random_double(-0x40000000, 0x40000000, d);
403
404 p = input_ptr[1];
405 for (i=0; i<num_elements; i++)
406 p[i] = get_random_double(-0x40000000, 0x40000000, d);
407
408 free_mtdata(d); d = NULL;
409
410 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
411 if (err != CL_SUCCESS)
412 {
413 log_error("clWriteArray failed\n");
414 return -1;
415 }
416 err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
417 if (err != CL_SUCCESS)
418 {
419 log_error("clWriteArray failed\n");
420 return -1;
421 }
422
423 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" );
424 if (err)
425 return -1;
426 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" );
427 if (err)
428 return -1;
429 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" );
430 if (err)
431 return -1;
432 err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" );
433 if (err)
434 return -1;
435 err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" );
436 if (err)
437 return -1;
438 err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" );
439 if (err)
440 return -1;
441
442 for (i=0; i <kTotalVecCount; i++)
443 {
444 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
445 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
446 err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
447 if (err != CL_SUCCESS)
448 {
449 log_error("clSetKernelArgs failed\n");
450 return -1;
451 }
452 }
453
454 threads[0] = (size_t)n_elems;
455 for (i=0; i<kTotalVecCount; i++)
456 {
457 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
458 if (err != CL_SUCCESS)
459 {
460 log_error("clEnqueueNDRangeKernel failed\n");
461 return -1;
462 }
463
464 err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
465 if (err != CL_SUCCESS)
466 {
467 log_error("clEnqueueReadBuffer failed\n");
468 return -1;
469 }
470
471 switch (i)
472 {
473 case 0:
474 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems, 1);
475 if (err)
476 log_error("STEP double test failed\n");
477 else
478 log_info("STEP double test passed\n");
479 break;
480
481 case 1:
482 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*2, 2);
483 if (err)
484 log_error("STEP double2 test failed\n");
485 else
486 log_info("STEP double2 test passed\n");
487 break;
488
489 case 2:
490 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*4, 4);
491 if (err)
492 log_error("STEP double4 test failed\n");
493 else
494 log_info("STEP double4 test passed\n");
495 break;
496
497 case 3:
498 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*8, 8);
499 if (err)
500 log_error("STEP double8 test failed\n");
501 else
502 log_info("STEP double8 test passed\n");
503 break;
504
505 case 4:
506 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*16, 16);
507 if (err)
508 log_error("STEP double16 test failed\n");
509 else
510 log_info("STEP double16 test passed\n");
511 break;
512
513 case 5:
514 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*3, 3);
515 if (err)
516 log_error("STEP double3 test failed\n");
517 else
518 log_info("STEP double3 test passed\n");
519 break;
520 }
521
522 if (err)
523 break;
524 }
525
526 clReleaseMemObject(streams[0]);
527 clReleaseMemObject(streams[1]);
528 clReleaseMemObject(streams[2]);
529 for (i=0; i<kTotalVecCount; i++)
530 {
531 clReleaseKernel(kernel[i]);
532 clReleaseProgram(program[i]);
533 }
534 free(input_ptr[0]);
535 free(input_ptr[1]);
536 free(output_ptr);
537
538 return err;
539 }
540
541