• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 
25 static int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
26 
27 
28 static const char *step_kernel_code =
29 "__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n"
30 "{\n"
31 "    int  tid = get_global_id(0);\n"
32 "\n"
33 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
34 "}\n";
35 
36 static const char *step2_kernel_code =
37 "__kernel void test_step2(__global float *srcA, __global float2 *srcB, __global float2 *dst)\n"
38 "{\n"
39 "    int  tid = get_global_id(0);\n"
40 "\n"
41 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
42 "}\n";
43 
44 static const char *step4_kernel_code =
45 "__kernel void test_step4(__global float *srcA, __global float4 *srcB, __global float4 *dst)\n"
46 "{\n"
47 "    int  tid = get_global_id(0);\n"
48 "\n"
49 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
50 "}\n";
51 
52 static const char *step8_kernel_code =
53 "__kernel void test_step8(__global float *srcA, __global float8 *srcB, __global float8 *dst)\n"
54 "{\n"
55 "    int  tid = get_global_id(0);\n"
56 "\n"
57 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
58 "}\n";
59 
60 static const char *step16_kernel_code =
61 "__kernel void test_step16(__global float *srcA, __global float16 *srcB, __global float16 *dst)\n"
62 "{\n"
63 "    int  tid = get_global_id(0);\n"
64 "\n"
65 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
66 "}\n";
67 
68 static const char *step3_kernel_code =
69 "__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n"
70 "{\n"
71 "    int  tid = get_global_id(0);\n"
72 "\n"
73 "    vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n"
74 "}\n";
75 
76 
77 static int
verify_step(cl_float * inptrA,cl_float * inptrB,cl_float * outptr,int n,int veclen)78 verify_step( cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n, int veclen)
79 {
80     float       r;
81     int         i, j;
82 
83     for (i=0; i<n; ) {
84         int ii = i/veclen;
85         for (j=0; j<veclen && i<n; ++j, ++i) {
86             r = (inptrB[i] < inptrA[ii]) ? 0.0f : 1.0f;
87             if (r != outptr[i])
88             {
89                 log_error( "Failure @ {%d, element %d}: step(%a,%a) -> *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] );
90                 return -1;
91             }
92         }
93     }
94 
95     return 0;
96 }
97 
test_stepf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)98 int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
99 {
100     cl_mem      streams[3];
101     cl_float    *input_ptr[2], *output_ptr, *p;
102     cl_program  program[kTotalVecCount];
103     cl_kernel   kernel[kTotalVecCount];
104     size_t  threads[1];
105     int num_elements;
106     int err;
107     int i;
108     MTdata d;
109     num_elements = n_elems * 16;
110 
111     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
112     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
113     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
114     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
115                                 sizeof(cl_float) * num_elements, NULL, NULL);
116     if (!streams[0])
117     {
118         log_error("clCreateBuffer failed\n");
119         return -1;
120     }
121     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
122                                 sizeof(cl_float) * num_elements, NULL, NULL);
123     if (!streams[1])
124     {
125         log_error("clCreateBuffer failed\n");
126         return -1;
127     }
128     streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
129                                 sizeof(cl_float) * num_elements, NULL, NULL);
130     if (!streams[2])
131     {
132         log_error("clCreateBuffer failed\n");
133         return -1;
134     }
135 
136     p = input_ptr[0];
137     d = init_genrand( gRandomSeed );
138     for (i=0; i<num_elements; i++)
139     {
140         p[i] = get_random_float(-0x40000000, 0x40000000, d);
141     }
142     p = input_ptr[1];
143     for (i=0; i<num_elements; i++)
144     {
145         p[i] = get_random_float(-0x40000000, 0x40000000, d);
146     }
147     free_mtdata(d);   d = NULL;
148 
149     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
150     if (err != CL_SUCCESS)
151     {
152         log_error("clWriteArray failed\n");
153         return -1;
154     }
155     err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
156     if (err != CL_SUCCESS)
157     {
158         log_error("clWriteArray failed\n");
159         return -1;
160     }
161 
162     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
163     if (err)
164         return -1;
165     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
166     if (err)
167         return -1;
168     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
169     if (err)
170         return -1;
171     err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" );
172     if (err)
173         return -1;
174     err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" );
175     if (err)
176         return -1;
177     err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" );
178     if (err)
179         return -1;
180 
181     for (i=0; i <kTotalVecCount; i++)
182     {
183         err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
184         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
185         err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
186         if (err != CL_SUCCESS)
187         {
188             log_error("clSetKernelArgs failed\n");
189             return -1;
190         }
191     }
192 
193     threads[0] = (size_t)n_elems;
194     for (i=0; i<kTotalVecCount; i++)
195     {
196         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
197         if (err != CL_SUCCESS)
198         {
199             log_error("clEnqueueNDRangeKernel failed\n");
200             return -1;
201         }
202 
203         err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
204         if (err != CL_SUCCESS)
205         {
206             log_error("clEnqueueReadBuffer failed\n");
207             return -1;
208         }
209 
210         switch (i)
211         {
212             case 0:
213                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems, 1);
214                 if (err)
215                     log_error("STEP float test failed\n");
216                 else
217                     log_info("STEP float test passed\n");
218                 break;
219 
220             case 1:
221                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2, 2);
222                 if (err)
223                     log_error("STEP float2 test failed\n");
224                 else
225                     log_info("STEP float2 test passed\n");
226                 break;
227 
228             case 2:
229                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4, 4);
230                 if (err)
231                     log_error("STEP float4 test failed\n");
232                 else
233                     log_info("STEP float4 test passed\n");
234                 break;
235 
236             case 3:
237                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8, 8);
238                 if (err)
239                     log_error("STEP float8 test failed\n");
240                 else
241                     log_info("STEP float8 test passed\n");
242                 break;
243 
244             case 4:
245                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16, 16);
246                 if (err)
247                     log_error("STEP float16 test failed\n");
248                 else
249                     log_info("STEP float16 test passed\n");
250                 break;
251 
252             case 5:
253                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3, 3);
254                 if (err)
255                     log_error("STEP float3 test failed\n");
256                 else
257                     log_info("STEP float3 test passed\n");
258                 break;
259         }
260 
261         if (err)
262             break;
263     }
264 
265     clReleaseMemObject(streams[0]);
266     clReleaseMemObject(streams[1]);
267     clReleaseMemObject(streams[2]);
268     for (i=0; i<kTotalVecCount; i++)
269     {
270         clReleaseKernel(kernel[i]);
271         clReleaseProgram(program[i]);
272     }
273     free(input_ptr[0]);
274     free(input_ptr[1]);
275     free(output_ptr);
276 
277     if(err)
278         return err;
279 
280     if( ! is_extension_available( device, "cl_khr_fp64" ))
281     {
282         log_info( "Device does not support cl_khr_fp64.  Skipping double precision tests.\n" );
283         return 0;
284     }
285 
286     return test_stepf_double( device, context, queue, n_elems);
287 }
288 
289 #pragma mark -
290 
291 static const char *step_kernel_code_double =
292 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
293 "__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
294 "{\n"
295 "    int  tid = get_global_id(0);\n"
296 "\n"
297 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
298 "}\n";
299 
300 static const char *step2_kernel_code_double =
301 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
302 "__kernel void test_step2_double(__global double *srcA, __global double2 *srcB, __global double2 *dst)\n"
303 "{\n"
304 "    int  tid = get_global_id(0);\n"
305 "\n"
306 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
307 "}\n";
308 
309 static const char *step4_kernel_code_double =
310 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
311 "__kernel void test_step4_double(__global double *srcA, __global double4 *srcB, __global double4 *dst)\n"
312 "{\n"
313 "    int  tid = get_global_id(0);\n"
314 "\n"
315 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
316 "}\n";
317 
318 static const char *step8_kernel_code_double =
319 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
320 "__kernel void test_step8_double(__global double *srcA, __global double8 *srcB, __global double8 *dst)\n"
321 "{\n"
322 "    int  tid = get_global_id(0);\n"
323 "\n"
324 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
325 "}\n";
326 
327 static const char *step16_kernel_code_double =
328 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
329 "__kernel void test_step16_double(__global double *srcA, __global double16 *srcB, __global double16 *dst)\n"
330 "{\n"
331 "    int  tid = get_global_id(0);\n"
332 "\n"
333 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
334 "}\n";
335 
336 static const char *step3_kernel_code_double =
337 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
338 "__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
339 "{\n"
340 "    int  tid = get_global_id(0);\n"
341 "\n"
342 "    vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n"
343 "}\n";
344 
345 
346 static int
verify_step_double(cl_double * inptrA,cl_double * inptrB,cl_double * outptr,int n,int veclen)347 verify_step_double(cl_double *inptrA, cl_double *inptrB, cl_double *outptr, int n, int veclen)
348 {
349     double r;
350     int    i, j;
351 
352     for (i=0; i<n; ) {
353         int ii = i/veclen;
354         for (j=0; j<veclen && i<n; ++j, ++i) {
355             r = (inptrB[i] < inptrA[ii]) ? 0.0 : 1.0;
356             if (r != outptr[i])
357             {
358                 log_error( "Failure @ {%d, element %d}: step(%a,%a) -> *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] );
359                 return -1;
360             }
361         }
362     }
363 
364     return 0;
365 }
366 
test_stepf_double(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)367 int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
368 {
369     cl_mem      streams[3];
370     cl_double  *input_ptr[2], *output_ptr, *p;
371     cl_program  program[kTotalVecCount];
372     cl_kernel   kernel[kTotalVecCount];
373     size_t  threads[1];
374     int num_elements;
375     int err;
376     int i;
377     MTdata    d;
378     num_elements = n_elems * 16;
379 
380     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
381     input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
382     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
383     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
384                                 sizeof(cl_double) * num_elements, NULL, NULL);
385     if (!streams[0])
386     {
387         log_error("clCreateBuffer failed\n");
388         return -1;
389     }
390     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
391                                 sizeof(cl_double) * num_elements, NULL, NULL);
392     if (!streams[1])
393     {
394         log_error("clCreateBuffer failed\n");
395         return -1;
396     }
397     streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
398                                 sizeof(cl_double) * num_elements, NULL, NULL);
399     if (!streams[2])
400     {
401         log_error("clCreateBuffer failed\n");
402         return -1;
403     }
404 
405     p = input_ptr[0];
406     d = init_genrand( gRandomSeed );
407     for (i=0; i<num_elements; i++)
408         p[i] = get_random_double(-0x40000000, 0x40000000, d);
409 
410     p = input_ptr[1];
411     for (i=0; i<num_elements; i++)
412         p[i] = get_random_double(-0x40000000, 0x40000000, d);
413 
414     free_mtdata(d);   d = NULL;
415 
416     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
417     if (err != CL_SUCCESS)
418     {
419         log_error("clWriteArray failed\n");
420         return -1;
421     }
422     err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
423     if (err != CL_SUCCESS)
424     {
425         log_error("clWriteArray failed\n");
426         return -1;
427     }
428 
429     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" );
430     if (err)
431         return -1;
432     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" );
433     if (err)
434         return -1;
435     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" );
436     if (err)
437         return -1;
438     err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" );
439     if (err)
440         return -1;
441     err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" );
442     if (err)
443         return -1;
444     err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" );
445     if (err)
446         return -1;
447 
448     for (i=0; i <kTotalVecCount; i++)
449     {
450         err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
451         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
452         err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
453         if (err != CL_SUCCESS)
454         {
455             log_error("clSetKernelArgs failed\n");
456             return -1;
457         }
458     }
459 
460     threads[0] = (size_t)n_elems;
461     for (i=0; i<kTotalVecCount; i++)
462     {
463         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
464         if (err != CL_SUCCESS)
465         {
466             log_error("clEnqueueNDRangeKernel failed\n");
467             return -1;
468         }
469 
470         err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
471         if (err != CL_SUCCESS)
472         {
473             log_error("clEnqueueReadBuffer failed\n");
474             return -1;
475         }
476 
477         switch (i)
478         {
479             case 0:
480                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems, 1);
481                 if (err)
482                     log_error("STEP double test failed\n");
483                 else
484                     log_info("STEP double test passed\n");
485                 break;
486 
487             case 1:
488                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*2, 2);
489                 if (err)
490                     log_error("STEP double2 test failed\n");
491                 else
492                     log_info("STEP double2 test passed\n");
493                 break;
494 
495             case 2:
496                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*4, 4);
497                 if (err)
498                     log_error("STEP double4 test failed\n");
499                 else
500                     log_info("STEP double4 test passed\n");
501                 break;
502 
503             case 3:
504                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*8, 8);
505                 if (err)
506                     log_error("STEP double8 test failed\n");
507                 else
508                     log_info("STEP double8 test passed\n");
509                 break;
510 
511             case 4:
512                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*16, 16);
513                 if (err)
514                     log_error("STEP double16 test failed\n");
515                 else
516                     log_info("STEP double16 test passed\n");
517                 break;
518 
519             case 5:
520                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*3, 3);
521                 if (err)
522                     log_error("STEP double3 test failed\n");
523                 else
524                     log_info("STEP double3 test passed\n");
525                 break;
526         }
527 
528         if (err)
529             break;
530     }
531 
532     clReleaseMemObject(streams[0]);
533     clReleaseMemObject(streams[1]);
534     clReleaseMemObject(streams[2]);
535     for (i=0; i<kTotalVecCount; i++)
536     {
537         clReleaseKernel(kernel[i]);
538         clReleaseProgram(program[i]);
539     }
540     free(input_ptr[0]);
541     free(input_ptr[1]);
542     free(output_ptr);
543 
544     return err;
545 }
546 
547