1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25 static int
26 test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
27
28
29 const char *step_kernel_code =
30 "__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n"
31 "{\n"
32 " int tid = get_global_id(0);\n"
33 "\n"
34 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
35 "}\n";
36
37 const char *step2_kernel_code =
38 "__kernel void test_step2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
39 "{\n"
40 " int tid = get_global_id(0);\n"
41 "\n"
42 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
43 "}\n";
44
45 const char *step4_kernel_code =
46 "__kernel void test_step4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
47 "{\n"
48 " int tid = get_global_id(0);\n"
49 "\n"
50 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
51 "}\n";
52
53 const char *step8_kernel_code =
54 "__kernel void test_step8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
55 "{\n"
56 " int tid = get_global_id(0);\n"
57 "\n"
58 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
59 "}\n";
60
61 const char *step16_kernel_code =
62 "__kernel void test_step16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
63 "{\n"
64 " int tid = get_global_id(0);\n"
65 "\n"
66 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
67 "}\n";
68
69 const char *step3_kernel_code =
70 "__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n"
71 "{\n"
72 " int tid = get_global_id(0);\n"
73 "\n"
74 " vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
75 "}\n";
76
77
78 int
verify_step(float * inptrA,float * inptrB,float * outptr,int n)79 verify_step(float *inptrA, float *inptrB, float *outptr, int n)
80 {
81 float r;
82 int i;
83
84 for (i=0; i<n; i++)
85 {
86 r = (inptrB[i] < inptrA[i]) ? 0.0f : 1.0f;
87 if (r != outptr[i])
88 return -1;
89 }
90
91 return 0;
92 }
93
94 int
test_step(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)95 test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
96 {
97 cl_mem streams[3];
98 cl_float *input_ptr[2], *output_ptr, *p;
99 cl_program program[kTotalVecCount];
100 cl_kernel kernel[kTotalVecCount];
101 void *values[3];
102 size_t threads[1];
103 int num_elements;
104 int err;
105 int i;
106 MTdata d;
107 num_elements = n_elems * 16;
108
109 input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
110 input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
111 output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
112 streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL );
113 if (!streams[0])
114 {
115 log_error("clCreateBuffer failed\n");
116 return -1;
117 }
118 streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL );
119 if (!streams[1])
120 {
121 log_error("clCreateBuffer failed\n");
122 return -1;
123 }
124 streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL );
125 if (!streams[2])
126 {
127 log_error("clCreateBuffer failed\n");
128 return -1;
129 }
130
131 p = input_ptr[0];
132 d = init_genrand( gRandomSeed );
133 for (i=0; i<num_elements; i++)
134 {
135 p[i] = get_random_float(-0x40000000, 0x40000000, d);
136 }
137 p = input_ptr[1];
138 for (i=0; i<num_elements; i++)
139 {
140 p[i] = get_random_float(-0x40000000, 0x40000000, d);
141 }
142 free_mtdata(d); d = NULL;
143
144 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
145 if (err != CL_SUCCESS)
146 {
147 log_error("clWriteArray failed\n");
148 return -1;
149 }
150 err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
151 if (err != CL_SUCCESS)
152 {
153 log_error("clWriteArray failed\n");
154 return -1;
155 }
156
157 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
158 if (err)
159 return -1;
160 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
161 if (err)
162 return -1;
163 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
164 if (err)
165 return -1;
166 err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" );
167 if (err)
168 return -1;
169 err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" );
170 if (err)
171 return -1;
172 err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" );
173 if (err)
174 return -1;
175
176 values[0] = streams[0];
177 values[1] = streams[1];
178 values[2] = streams[2];
179 for (i=0; i <kTotalVecCount; i++)
180 {
181 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
182 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
183 err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
184 if (err != CL_SUCCESS)
185 {
186 log_error("clSetKernelArgs failed\n");
187 return -1;
188 }
189 }
190
191 threads[0] = (size_t)n_elems;
192 for (i=0; i<kTotalVecCount; i++)
193 {
194 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
195 if (err != CL_SUCCESS)
196 {
197 log_error("clEnqueueNDRangeKernel failed\n");
198 return -1;
199 }
200
201 err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
202 if (err != CL_SUCCESS)
203 {
204 log_error("clEnqueueReadBuffer failed\n");
205 return -1;
206 }
207
208 switch (i)
209 {
210 case 0:
211 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems);
212 if (err)
213 log_error("STEP float test failed\n");
214 else
215 log_info("STEP float test passed\n");
216 break;
217
218 case 1:
219 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2);
220 if (err)
221 log_error("STEP float2 test failed\n");
222 else
223 log_info("STEP float2 test passed\n");
224 break;
225
226 case 2:
227 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4);
228 if (err)
229 log_error("STEP float4 test failed\n");
230 else
231 log_info("STEP float4 test passed\n");
232 break;
233
234 case 3:
235 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8);
236 if (err)
237 log_error("STEP float8 test failed\n");
238 else
239 log_info("STEP float8 test passed\n");
240 break;
241
242 case 4:
243 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16);
244 if (err)
245 log_error("STEP float16 test failed\n");
246 else
247 log_info("STEP float16 test passed\n");
248 break;
249
250 case 5:
251 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3);
252 if (err)
253 log_error("STEP float3 test failed\n");
254 else
255 log_info("STEP float3 test passed\n");
256 break;
257 }
258
259 if (err)
260 break;
261 }
262
263 clReleaseMemObject(streams[0]);
264 clReleaseMemObject(streams[1]);
265 clReleaseMemObject(streams[2]);
266 for (i=0; i<kTotalVecCount; i++)
267 {
268 clReleaseKernel(kernel[i]);
269 clReleaseProgram(program[i]);
270 }
271 free(input_ptr[0]);
272 free(input_ptr[1]);
273 free(output_ptr);
274
275 if( err )
276 return err;
277
278 if( ! is_extension_available( device, "cl_khr_fp64" ))
279 return 0;
280
281 return test_step_double( device, context, queue, n_elems);
282 }
283
284
285 #pragma mark -
286
287 const char *step_kernel_code_double =
288 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
289 "__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
290 "{\n"
291 " int tid = get_global_id(0);\n"
292 "\n"
293 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
294 "}\n";
295
296 const char *step2_kernel_code_double =
297 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
298 "__kernel void test_step2_double(__global double2 *srcA, __global double2 *srcB, __global double2 *dst)\n"
299 "{\n"
300 " int tid = get_global_id(0);\n"
301 "\n"
302 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
303 "}\n";
304
305 const char *step4_kernel_code_double =
306 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
307 "__kernel void test_step4_double(__global double4 *srcA, __global double4 *srcB, __global double4 *dst)\n"
308 "{\n"
309 " int tid = get_global_id(0);\n"
310 "\n"
311 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
312 "}\n";
313
314 const char *step8_kernel_code_double =
315 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
316 "__kernel void test_step8_double(__global double8 *srcA, __global double8 *srcB, __global double8 *dst)\n"
317 "{\n"
318 " int tid = get_global_id(0);\n"
319 "\n"
320 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
321 "}\n";
322
323 const char *step16_kernel_code_double =
324 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
325 "__kernel void test_step16_double(__global double16 *srcA, __global double16 *srcB, __global double16 *dst)\n"
326 "{\n"
327 " int tid = get_global_id(0);\n"
328 "\n"
329 " dst[tid] = step(srcA[tid], srcB[tid]);\n"
330 "}\n";
331
332 const char *step3_kernel_code_double =
333 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
334 "__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
335 "{\n"
336 " int tid = get_global_id(0);\n"
337 "\n"
338 " vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
339 "}\n";
340
341
342 int
verify_step_double(double * inptrA,double * inptrB,double * outptr,int n)343 verify_step_double(double *inptrA, double *inptrB, double *outptr, int n)
344 {
345 double r;
346 int i;
347
348 for (i=0; i<n; i++)
349 {
350 r = (inptrB[i] < inptrA[i]) ? 0.0 : 1.0;
351 if (r != outptr[i])
352 return -1;
353 }
354
355 return 0;
356 }
357
358 static int
test_step_double(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)359 test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
360 {
361 cl_mem streams[3];
362 cl_double *input_ptr[2], *output_ptr, *p;
363 cl_program program[kTotalVecCount];
364 cl_kernel kernel[kTotalVecCount];
365 void *values[3];
366 size_t threads[1];
367 int num_elements;
368 int err;
369 int i;
370 MTdata d;
371 num_elements = n_elems * 16;
372
373 input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
374 input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
375 output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
376 streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * num_elements, NULL, NULL );
377 if (!streams[0])
378 {
379 log_error("clCreateBuffer failed\n");
380 return -1;
381 }
382 streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * num_elements, NULL, NULL );
383 if (!streams[1])
384 {
385 log_error("clCreateBuffer failed\n");
386 return -1;
387 }
388 streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * num_elements, NULL, NULL );
389 if (!streams[2])
390 {
391 log_error("clCreateBuffer failed\n");
392 return -1;
393 }
394
395 p = input_ptr[0];
396 d = init_genrand( gRandomSeed );
397 for (i=0; i<num_elements; i++)
398 {
399 p[i] = get_random_double(-0x40000000, 0x40000000, d);
400 }
401 p = input_ptr[1];
402 for (i=0; i<num_elements; i++)
403 {
404 p[i] = get_random_double(-0x40000000, 0x40000000, d);
405 }
406 free_mtdata(d); d = NULL;
407
408 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
409 if (err != CL_SUCCESS)
410 {
411 log_error("clWriteArray failed\n");
412 return -1;
413 }
414 err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
415 if (err != CL_SUCCESS)
416 {
417 log_error("clWriteArray failed\n");
418 return -1;
419 }
420
421 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" );
422 if (err)
423 return -1;
424 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" );
425 if (err)
426 return -1;
427 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" );
428 if (err)
429 return -1;
430 err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" );
431 if (err)
432 return -1;
433 err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" );
434 if (err)
435 return -1;
436 err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" );
437 if (err)
438 return -1;
439
440 values[0] = streams[0];
441 values[1] = streams[1];
442 values[2] = streams[2];
443 for (i=0; i < kTotalVecCount; i++)
444 {
445 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
446 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
447 err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
448 if (err != CL_SUCCESS)
449 {
450 log_error("clSetKernelArgs failed\n");
451 return -1;
452 }
453 }
454
455 threads[0] = (size_t)n_elems;
456 for (i=0; i<kTotalVecCount; i++)
457 {
458 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
459 if (err != CL_SUCCESS)
460 {
461 log_error("clEnqueueNDRangeKernel failed\n");
462 return -1;
463 }
464
465 err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
466 if (err != CL_SUCCESS)
467 {
468 log_error("clEnqueueReadBuffer failed\n");
469 return -1;
470 }
471
472 switch (i)
473 {
474 case 0:
475 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems);
476 if (err)
477 log_error("STEP double test failed\n");
478 else
479 log_info("STEP double test passed\n");
480 break;
481
482 case 1:
483 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*2);
484 if (err)
485 log_error("STEP double2 test failed\n");
486 else
487 log_info("STEP double2 test passed\n");
488 break;
489
490 case 2:
491 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*4);
492 if (err)
493 log_error("STEP double4 test failed\n");
494 else
495 log_info("STEP double4 test passed\n");
496 break;
497
498 case 3:
499 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*8);
500 if (err)
501 log_error("STEP double8 test failed\n");
502 else
503 log_info("STEP double8 test passed\n");
504 break;
505
506 case 4:
507 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*16);
508 if (err)
509 log_error("STEP double16 test failed\n");
510 else
511 log_info("STEP double16 test passed\n");
512 break;
513
514 case 5:
515 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*3);
516 if (err)
517 log_error("STEP double3 test failed\n");
518 else
519 log_info("STEP double3 test passed\n");
520 break;
521 }
522
523 if (err)
524 break;
525 }
526
527 clReleaseMemObject(streams[0]);
528 clReleaseMemObject(streams[1]);
529 clReleaseMemObject(streams[2]);
530 for (i=0; i<kTotalVecCount; i++)
531 {
532 clReleaseKernel(kernel[i]);
533 clReleaseProgram(program[i]);
534 }
535 free(input_ptr[0]);
536 free(input_ptr[1]);
537 free(output_ptr);
538
539 return err;
540 }
541
542