1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25 static int
26 test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
27
28
29 const char *step_kernel_code =
30 "__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n"
31 "{\n"
32 " int tid = get_global_id(0);\n"
33 "\n"
34 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
35 "}\n";
36
37 const char *step2_kernel_code =
38 "__kernel void test_step2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
39 "{\n"
40 " int tid = get_global_id(0);\n"
41 "\n"
42 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
43 "}\n";
44
45 const char *step4_kernel_code =
46 "__kernel void test_step4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
47 "{\n"
48 " int tid = get_global_id(0);\n"
49 "\n"
50 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
51 "}\n";
52
53 const char *step8_kernel_code =
54 "__kernel void test_step8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
55 "{\n"
56 " int tid = get_global_id(0);\n"
57 "\n"
58 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
59 "}\n";
60
61 const char *step16_kernel_code =
62 "__kernel void test_step16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
63 "{\n"
64 " int tid = get_global_id(0);\n"
65 "\n"
66 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
67 "}\n";
68
69 const char *step3_kernel_code =
70 "__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n"
71 "{\n"
72 " int tid = get_global_id(0);\n"
73 "\n"
74 " vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
75 "}\n";
76
77
78 int
verify_step(float * inptrA,float * inptrB,float * outptr,int n)79 verify_step(float *inptrA, float *inptrB, float *outptr, int n)
80 {
81 float r;
82 int i;
83
84 for (i=0; i<n; i++)
85 {
86 r = (inptrB[i] < inptrA[i]) ? 0.0f : 1.0f;
87 if (r != outptr[i])
88 return -1;
89 }
90
91 return 0;
92 }
93
94 int
test_step(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)95 test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
96 {
97 cl_mem streams[3];
98 cl_float *input_ptr[2], *output_ptr, *p;
99 cl_program program[kTotalVecCount];
100 cl_kernel kernel[kTotalVecCount];
101 void *values[3];
102 size_t threads[1];
103 int num_elements;
104 int err;
105 int i;
106 MTdata d;
107 num_elements = n_elems * 16;
108
109 input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
110 input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
111 output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
112 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
113 sizeof(cl_float) * num_elements, NULL, NULL);
114 if (!streams[0])
115 {
116 log_error("clCreateBuffer failed\n");
117 return -1;
118 }
119 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
120 sizeof(cl_float) * num_elements, NULL, NULL);
121 if (!streams[1])
122 {
123 log_error("clCreateBuffer failed\n");
124 return -1;
125 }
126 streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
127 sizeof(cl_float) * num_elements, NULL, NULL);
128 if (!streams[2])
129 {
130 log_error("clCreateBuffer failed\n");
131 return -1;
132 }
133
134 p = input_ptr[0];
135 d = init_genrand( gRandomSeed );
136 for (i=0; i<num_elements; i++)
137 {
138 p[i] = get_random_float(-0x40000000, 0x40000000, d);
139 }
140 p = input_ptr[1];
141 for (i=0; i<num_elements; i++)
142 {
143 p[i] = get_random_float(-0x40000000, 0x40000000, d);
144 }
145 free_mtdata(d); d = NULL;
146
147 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
148 if (err != CL_SUCCESS)
149 {
150 log_error("clWriteArray failed\n");
151 return -1;
152 }
153 err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
154 if (err != CL_SUCCESS)
155 {
156 log_error("clWriteArray failed\n");
157 return -1;
158 }
159
160 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
161 if (err)
162 return -1;
163 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
164 if (err)
165 return -1;
166 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
167 if (err)
168 return -1;
169 err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" );
170 if (err)
171 return -1;
172 err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" );
173 if (err)
174 return -1;
175 err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" );
176 if (err)
177 return -1;
178
179 values[0] = streams[0];
180 values[1] = streams[1];
181 values[2] = streams[2];
182 for (i=0; i <kTotalVecCount; i++)
183 {
184 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
185 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
186 err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
187 if (err != CL_SUCCESS)
188 {
189 log_error("clSetKernelArgs failed\n");
190 return -1;
191 }
192 }
193
194 threads[0] = (size_t)n_elems;
195 for (i=0; i<kTotalVecCount; i++)
196 {
197 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
198 if (err != CL_SUCCESS)
199 {
200 log_error("clEnqueueNDRangeKernel failed\n");
201 return -1;
202 }
203
204 err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
205 if (err != CL_SUCCESS)
206 {
207 log_error("clEnqueueReadBuffer failed\n");
208 return -1;
209 }
210
211 switch (i)
212 {
213 case 0:
214 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems);
215 if (err)
216 log_error("STEP float test failed\n");
217 else
218 log_info("STEP float test passed\n");
219 break;
220
221 case 1:
222 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2);
223 if (err)
224 log_error("STEP float2 test failed\n");
225 else
226 log_info("STEP float2 test passed\n");
227 break;
228
229 case 2:
230 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4);
231 if (err)
232 log_error("STEP float4 test failed\n");
233 else
234 log_info("STEP float4 test passed\n");
235 break;
236
237 case 3:
238 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8);
239 if (err)
240 log_error("STEP float8 test failed\n");
241 else
242 log_info("STEP float8 test passed\n");
243 break;
244
245 case 4:
246 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16);
247 if (err)
248 log_error("STEP float16 test failed\n");
249 else
250 log_info("STEP float16 test passed\n");
251 break;
252
253 case 5:
254 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3);
255 if (err)
256 log_error("STEP float3 test failed\n");
257 else
258 log_info("STEP float3 test passed\n");
259 break;
260 }
261
262 if (err)
263 break;
264 }
265
266 clReleaseMemObject(streams[0]);
267 clReleaseMemObject(streams[1]);
268 clReleaseMemObject(streams[2]);
269 for (i=0; i<kTotalVecCount; i++)
270 {
271 clReleaseKernel(kernel[i]);
272 clReleaseProgram(program[i]);
273 }
274 free(input_ptr[0]);
275 free(input_ptr[1]);
276 free(output_ptr);
277
278 if( err )
279 return err;
280
281 if( ! is_extension_available( device, "cl_khr_fp64" ))
282 return 0;
283
284 return test_step_double( device, context, queue, n_elems);
285 }
286
287
288 #pragma mark -
289
290 const char *step_kernel_code_double =
291 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
292 "__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
293 "{\n"
294 " int tid = get_global_id(0);\n"
295 "\n"
296 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
297 "}\n";
298
299 const char *step2_kernel_code_double =
300 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
301 "__kernel void test_step2_double(__global double2 *srcA, __global double2 *srcB, __global double2 *dst)\n"
302 "{\n"
303 " int tid = get_global_id(0);\n"
304 "\n"
305 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
306 "}\n";
307
308 const char *step4_kernel_code_double =
309 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
310 "__kernel void test_step4_double(__global double4 *srcA, __global double4 *srcB, __global double4 *dst)\n"
311 "{\n"
312 " int tid = get_global_id(0);\n"
313 "\n"
314 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
315 "}\n";
316
317 const char *step8_kernel_code_double =
318 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
319 "__kernel void test_step8_double(__global double8 *srcA, __global double8 *srcB, __global double8 *dst)\n"
320 "{\n"
321 " int tid = get_global_id(0);\n"
322 "\n"
323 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
324 "}\n";
325
326 const char *step16_kernel_code_double =
327 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
328 "__kernel void test_step16_double(__global double16 *srcA, __global double16 *srcB, __global double16 *dst)\n"
329 "{\n"
330 " int tid = get_global_id(0);\n"
331 "\n"
332 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
333 "}\n";
334
335 const char *step3_kernel_code_double =
336 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
337 "__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
338 "{\n"
339 " int tid = get_global_id(0);\n"
340 "\n"
341 " vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
342 "}\n";
343
344
345 int
verify_step_double(double * inptrA,double * inptrB,double * outptr,int n)346 verify_step_double(double *inptrA, double *inptrB, double *outptr, int n)
347 {
348 double r;
349 int i;
350
351 for (i=0; i<n; i++)
352 {
353 r = (inptrB[i] < inptrA[i]) ? 0.0 : 1.0;
354 if (r != outptr[i])
355 return -1;
356 }
357
358 return 0;
359 }
360
361 static int
test_step_double(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)362 test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
363 {
364 cl_mem streams[3];
365 cl_double *input_ptr[2], *output_ptr, *p;
366 cl_program program[kTotalVecCount];
367 cl_kernel kernel[kTotalVecCount];
368 void *values[3];
369 size_t threads[1];
370 int num_elements;
371 int err;
372 int i;
373 MTdata d;
374 num_elements = n_elems * 16;
375
376 input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
377 input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
378 output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
379 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
380 sizeof(cl_double) * num_elements, NULL, NULL);
381 if (!streams[0])
382 {
383 log_error("clCreateBuffer failed\n");
384 return -1;
385 }
386 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
387 sizeof(cl_double) * num_elements, NULL, NULL);
388 if (!streams[1])
389 {
390 log_error("clCreateBuffer failed\n");
391 return -1;
392 }
393 streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
394 sizeof(cl_double) * num_elements, NULL, NULL);
395 if (!streams[2])
396 {
397 log_error("clCreateBuffer failed\n");
398 return -1;
399 }
400
401 p = input_ptr[0];
402 d = init_genrand( gRandomSeed );
403 for (i=0; i<num_elements; i++)
404 {
405 p[i] = get_random_double(-0x40000000, 0x40000000, d);
406 }
407 p = input_ptr[1];
408 for (i=0; i<num_elements; i++)
409 {
410 p[i] = get_random_double(-0x40000000, 0x40000000, d);
411 }
412 free_mtdata(d); d = NULL;
413
414 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
415 if (err != CL_SUCCESS)
416 {
417 log_error("clWriteArray failed\n");
418 return -1;
419 }
420 err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
421 if (err != CL_SUCCESS)
422 {
423 log_error("clWriteArray failed\n");
424 return -1;
425 }
426
427 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" );
428 if (err)
429 return -1;
430 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" );
431 if (err)
432 return -1;
433 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" );
434 if (err)
435 return -1;
436 err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" );
437 if (err)
438 return -1;
439 err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" );
440 if (err)
441 return -1;
442 err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" );
443 if (err)
444 return -1;
445
446 values[0] = streams[0];
447 values[1] = streams[1];
448 values[2] = streams[2];
449 for (i=0; i < kTotalVecCount; i++)
450 {
451 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
452 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
453 err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
454 if (err != CL_SUCCESS)
455 {
456 log_error("clSetKernelArgs failed\n");
457 return -1;
458 }
459 }
460
461 threads[0] = (size_t)n_elems;
462 for (i=0; i<kTotalVecCount; i++)
463 {
464 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
465 if (err != CL_SUCCESS)
466 {
467 log_error("clEnqueueNDRangeKernel failed\n");
468 return -1;
469 }
470
471 err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
472 if (err != CL_SUCCESS)
473 {
474 log_error("clEnqueueReadBuffer failed\n");
475 return -1;
476 }
477
478 switch (i)
479 {
480 case 0:
481 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems);
482 if (err)
483 log_error("STEP double test failed\n");
484 else
485 log_info("STEP double test passed\n");
486 break;
487
488 case 1:
489 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*2);
490 if (err)
491 log_error("STEP double2 test failed\n");
492 else
493 log_info("STEP double2 test passed\n");
494 break;
495
496 case 2:
497 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*4);
498 if (err)
499 log_error("STEP double4 test failed\n");
500 else
501 log_info("STEP double4 test passed\n");
502 break;
503
504 case 3:
505 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*8);
506 if (err)
507 log_error("STEP double8 test failed\n");
508 else
509 log_info("STEP double8 test passed\n");
510 break;
511
512 case 4:
513 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*16);
514 if (err)
515 log_error("STEP double16 test failed\n");
516 else
517 log_info("STEP double16 test passed\n");
518 break;
519
520 case 5:
521 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*3);
522 if (err)
523 log_error("STEP double3 test failed\n");
524 else
525 log_info("STEP double3 test passed\n");
526 break;
527 }
528
529 if (err)
530 break;
531 }
532
533 clReleaseMemObject(streams[0]);
534 clReleaseMemObject(streams[1]);
535 clReleaseMemObject(streams[2]);
536 for (i=0; i<kTotalVecCount; i++)
537 {
538 clReleaseKernel(kernel[i]);
539 clReleaseProgram(program[i]);
540 }
541 free(input_ptr[0]);
542 free(input_ptr[1]);
543 free(output_ptr);
544
545 return err;
546 }
547
548