• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 
25 static int
26 test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
27 
28 
29 const char *step_kernel_code =
30 "__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n"
31 "{\n"
32 "    int  tid = get_global_id(0);\n"
33 "\n"
34 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
35 "}\n";
36 
37 const char *step2_kernel_code =
38 "__kernel void test_step2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
39 "{\n"
40 "    int  tid = get_global_id(0);\n"
41 "\n"
42 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
43 "}\n";
44 
45 const char *step4_kernel_code =
46 "__kernel void test_step4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
47 "{\n"
48 "    int  tid = get_global_id(0);\n"
49 "\n"
50 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
51 "}\n";
52 
53 const char *step8_kernel_code =
54 "__kernel void test_step8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
55 "{\n"
56 "    int  tid = get_global_id(0);\n"
57 "\n"
58 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
59 "}\n";
60 
61 const char *step16_kernel_code =
62 "__kernel void test_step16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
63 "{\n"
64 "    int  tid = get_global_id(0);\n"
65 "\n"
66 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
67 "}\n";
68 
69 const char *step3_kernel_code =
70 "__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n"
71 "{\n"
72 "    int  tid = get_global_id(0);\n"
73 "\n"
74 "    vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
75 "}\n";
76 
77 
78 int
verify_step(float * inptrA,float * inptrB,float * outptr,int n)79 verify_step(float *inptrA, float *inptrB, float *outptr, int n)
80 {
81     float       r;
82     int         i;
83 
84     for (i=0; i<n; i++)
85     {
86         r = (inptrB[i] < inptrA[i]) ? 0.0f : 1.0f;
87         if (r != outptr[i])
88             return -1;
89     }
90 
91     return 0;
92 }
93 
94 int
test_step(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)95 test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
96 {
97     cl_mem      streams[3];
98     cl_float    *input_ptr[2], *output_ptr, *p;
99   cl_program  program[kTotalVecCount];
100   cl_kernel   kernel[kTotalVecCount];
101     void        *values[3];
102     size_t  threads[1];
103     int num_elements;
104     int err;
105     int i;
106     MTdata d;
107   num_elements = n_elems * 16;
108 
109     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
110     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
111     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
112     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
113                                 sizeof(cl_float) * num_elements, NULL, NULL);
114     if (!streams[0])
115     {
116         log_error("clCreateBuffer failed\n");
117         return -1;
118     }
119     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
120                                 sizeof(cl_float) * num_elements, NULL, NULL);
121     if (!streams[1])
122     {
123         log_error("clCreateBuffer failed\n");
124         return -1;
125     }
126     streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
127                                 sizeof(cl_float) * num_elements, NULL, NULL);
128     if (!streams[2])
129     {
130         log_error("clCreateBuffer failed\n");
131         return -1;
132     }
133 
134     p = input_ptr[0];
135     d = init_genrand( gRandomSeed );
136     for (i=0; i<num_elements; i++)
137     {
138         p[i] = get_random_float(-0x40000000, 0x40000000, d);
139     }
140     p = input_ptr[1];
141     for (i=0; i<num_elements; i++)
142     {
143         p[i] = get_random_float(-0x40000000, 0x40000000, d);
144     }
145     free_mtdata(d); d = NULL;
146 
147     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
148     if (err != CL_SUCCESS)
149     {
150         log_error("clWriteArray failed\n");
151         return -1;
152     }
153     err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
154     if (err != CL_SUCCESS)
155     {
156         log_error("clWriteArray failed\n");
157         return -1;
158     }
159 
160     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
161     if (err)
162         return -1;
163     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
164     if (err)
165         return -1;
166     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
167     if (err)
168         return -1;
169   err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" );
170   if (err)
171     return -1;
172   err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" );
173   if (err)
174     return -1;
175   err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" );
176   if (err)
177     return -1;
178 
179     values[0] = streams[0];
180     values[1] = streams[1];
181     values[2] = streams[2];
182   for (i=0; i <kTotalVecCount; i++)
183     {
184         err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
185         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
186         err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
187         if (err != CL_SUCCESS)
188         {
189             log_error("clSetKernelArgs failed\n");
190             return -1;
191         }
192     }
193 
194     threads[0] = (size_t)n_elems;
195   for (i=0; i<kTotalVecCount; i++)
196     {
197         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
198         if (err != CL_SUCCESS)
199         {
200             log_error("clEnqueueNDRangeKernel failed\n");
201             return -1;
202         }
203 
204         err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
205         if (err != CL_SUCCESS)
206         {
207             log_error("clEnqueueReadBuffer failed\n");
208             return -1;
209         }
210 
211         switch (i)
212         {
213             case 0:
214                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems);
215                 if (err)
216                     log_error("STEP float test failed\n");
217                 else
218                     log_info("STEP float test passed\n");
219                 break;
220 
221             case 1:
222                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2);
223                 if (err)
224                     log_error("STEP float2 test failed\n");
225                 else
226                     log_info("STEP float2 test passed\n");
227                 break;
228 
229             case 2:
230                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4);
231                 if (err)
232                     log_error("STEP float4 test failed\n");
233                 else
234                     log_info("STEP float4 test passed\n");
235                 break;
236 
237         case 3:
238         err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8);
239         if (err)
240           log_error("STEP float8 test failed\n");
241         else
242           log_info("STEP float8 test passed\n");
243         break;
244 
245         case 4:
246         err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16);
247         if (err)
248           log_error("STEP float16 test failed\n");
249         else
250           log_info("STEP float16 test passed\n");
251         break;
252 
253         case 5:
254         err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3);
255         if (err)
256           log_error("STEP float3 test failed\n");
257         else
258           log_info("STEP float3 test passed\n");
259         break;
260         }
261 
262         if (err)
263             break;
264     }
265 
266     clReleaseMemObject(streams[0]);
267     clReleaseMemObject(streams[1]);
268     clReleaseMemObject(streams[2]);
269   for (i=0; i<kTotalVecCount; i++)
270     {
271         clReleaseKernel(kernel[i]);
272         clReleaseProgram(program[i]);
273     }
274     free(input_ptr[0]);
275     free(input_ptr[1]);
276     free(output_ptr);
277 
278     if( err )
279         return err;
280 
281     if( ! is_extension_available( device, "cl_khr_fp64" ))
282         return 0;
283 
284     return test_step_double( device, context, queue, n_elems);
285 }
286 
287 
288 #pragma mark -
289 
290 const char *step_kernel_code_double =
291 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
292 "__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
293 "{\n"
294 "    int  tid = get_global_id(0);\n"
295 "\n"
296 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
297 "}\n";
298 
299 const char *step2_kernel_code_double =
300 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
301 "__kernel void test_step2_double(__global double2 *srcA, __global double2 *srcB, __global double2 *dst)\n"
302 "{\n"
303 "    int  tid = get_global_id(0);\n"
304 "\n"
305 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
306 "}\n";
307 
308 const char *step4_kernel_code_double =
309 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
310 "__kernel void test_step4_double(__global double4 *srcA, __global double4 *srcB, __global double4 *dst)\n"
311 "{\n"
312 "    int  tid = get_global_id(0);\n"
313 "\n"
314 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
315 "}\n";
316 
317 const char *step8_kernel_code_double =
318 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
319 "__kernel void test_step8_double(__global double8 *srcA, __global double8 *srcB, __global double8 *dst)\n"
320 "{\n"
321 "    int  tid = get_global_id(0);\n"
322 "\n"
323 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
324 "}\n";
325 
326 const char *step16_kernel_code_double =
327 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
328 "__kernel void test_step16_double(__global double16 *srcA, __global double16 *srcB, __global double16 *dst)\n"
329 "{\n"
330 "    int  tid = get_global_id(0);\n"
331 "\n"
332 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
333 "}\n";
334 
335 const char *step3_kernel_code_double =
336 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
337 "__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
338 "{\n"
339 "    int  tid = get_global_id(0);\n"
340 "\n"
341 "    vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
342 "}\n";
343 
344 
345 int
verify_step_double(double * inptrA,double * inptrB,double * outptr,int n)346 verify_step_double(double *inptrA, double *inptrB, double *outptr, int n)
347 {
348     double       r;
349     int         i;
350 
351     for (i=0; i<n; i++)
352     {
353         r = (inptrB[i] < inptrA[i]) ? 0.0 : 1.0;
354         if (r != outptr[i])
355             return -1;
356     }
357 
358     return 0;
359 }
360 
361 static int
test_step_double(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)362 test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
363 {
364     cl_mem      streams[3];
365     cl_double    *input_ptr[2], *output_ptr, *p;
366     cl_program  program[kTotalVecCount];
367     cl_kernel   kernel[kTotalVecCount];
368     void        *values[3];
369     size_t  threads[1];
370     int num_elements;
371     int err;
372     int i;
373     MTdata d;
374     num_elements = n_elems * 16;
375 
376     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
377     input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
378     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
379     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
380                                 sizeof(cl_double) * num_elements, NULL, NULL);
381     if (!streams[0])
382     {
383         log_error("clCreateBuffer failed\n");
384         return -1;
385     }
386     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
387                                 sizeof(cl_double) * num_elements, NULL, NULL);
388     if (!streams[1])
389     {
390         log_error("clCreateBuffer failed\n");
391         return -1;
392     }
393     streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
394                                 sizeof(cl_double) * num_elements, NULL, NULL);
395     if (!streams[2])
396     {
397         log_error("clCreateBuffer failed\n");
398         return -1;
399     }
400 
401     p = input_ptr[0];
402     d = init_genrand( gRandomSeed );
403     for (i=0; i<num_elements; i++)
404     {
405         p[i] = get_random_double(-0x40000000, 0x40000000, d);
406     }
407     p = input_ptr[1];
408     for (i=0; i<num_elements; i++)
409     {
410         p[i] = get_random_double(-0x40000000, 0x40000000, d);
411     }
412     free_mtdata(d); d = NULL;
413 
414     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
415     if (err != CL_SUCCESS)
416     {
417         log_error("clWriteArray failed\n");
418         return -1;
419     }
420     err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
421     if (err != CL_SUCCESS)
422     {
423         log_error("clWriteArray failed\n");
424         return -1;
425     }
426 
427     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" );
428     if (err)
429         return -1;
430     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" );
431     if (err)
432         return -1;
433     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" );
434     if (err)
435         return -1;
436     err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" );
437     if (err)
438         return -1;
439     err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" );
440     if (err)
441         return -1;
442     err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" );
443     if (err)
444         return -1;
445 
446     values[0] = streams[0];
447     values[1] = streams[1];
448     values[2] = streams[2];
449     for (i=0; i < kTotalVecCount; i++)
450     {
451         err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
452         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
453         err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
454         if (err != CL_SUCCESS)
455         {
456             log_error("clSetKernelArgs failed\n");
457             return -1;
458         }
459     }
460 
461     threads[0] = (size_t)n_elems;
462     for (i=0; i<kTotalVecCount; i++)
463     {
464         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
465         if (err != CL_SUCCESS)
466         {
467             log_error("clEnqueueNDRangeKernel failed\n");
468             return -1;
469         }
470 
471         err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
472         if (err != CL_SUCCESS)
473         {
474             log_error("clEnqueueReadBuffer failed\n");
475             return -1;
476         }
477 
478         switch (i)
479         {
480             case 0:
481                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems);
482                 if (err)
483                     log_error("STEP double test failed\n");
484                 else
485                     log_info("STEP double test passed\n");
486                 break;
487 
488             case 1:
489                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*2);
490                 if (err)
491                     log_error("STEP double2 test failed\n");
492                 else
493                     log_info("STEP double2 test passed\n");
494                 break;
495 
496             case 2:
497                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*4);
498                 if (err)
499                     log_error("STEP double4 test failed\n");
500                 else
501                     log_info("STEP double4 test passed\n");
502                 break;
503 
504         case 3:
505         err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*8);
506         if (err)
507           log_error("STEP double8 test failed\n");
508         else
509           log_info("STEP double8 test passed\n");
510         break;
511 
512         case 4:
513         err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*16);
514         if (err)
515           log_error("STEP double16 test failed\n");
516         else
517           log_info("STEP double16 test passed\n");
518         break;
519 
520         case 5:
521         err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*3);
522         if (err)
523           log_error("STEP double3 test failed\n");
524         else
525           log_info("STEP double3 test passed\n");
526         break;
527         }
528 
529         if (err)
530             break;
531     }
532 
533     clReleaseMemObject(streams[0]);
534     clReleaseMemObject(streams[1]);
535     clReleaseMemObject(streams[2]);
536     for (i=0; i<kTotalVecCount; i++)
537     {
538         clReleaseKernel(kernel[i]);
539         clReleaseProgram(program[i]);
540     }
541     free(input_ptr[0]);
542     free(input_ptr[1]);
543     free(output_ptr);
544 
545     return err;
546 }
547 
548