• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 
25 static int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
26 
27 
28 static const char *step_kernel_code =
29 "__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n"
30 "{\n"
31 "    int  tid = get_global_id(0);\n"
32 "\n"
33 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
34 "}\n";
35 
36 static const char *step2_kernel_code =
37 "__kernel void test_step2(__global float *srcA, __global float2 *srcB, __global float2 *dst)\n"
38 "{\n"
39 "    int  tid = get_global_id(0);\n"
40 "\n"
41 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
42 "}\n";
43 
44 static const char *step4_kernel_code =
45 "__kernel void test_step4(__global float *srcA, __global float4 *srcB, __global float4 *dst)\n"
46 "{\n"
47 "    int  tid = get_global_id(0);\n"
48 "\n"
49 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
50 "}\n";
51 
52 static const char *step8_kernel_code =
53 "__kernel void test_step8(__global float *srcA, __global float8 *srcB, __global float8 *dst)\n"
54 "{\n"
55 "    int  tid = get_global_id(0);\n"
56 "\n"
57 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
58 "}\n";
59 
60 static const char *step16_kernel_code =
61 "__kernel void test_step16(__global float *srcA, __global float16 *srcB, __global float16 *dst)\n"
62 "{\n"
63 "    int  tid = get_global_id(0);\n"
64 "\n"
65 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
66 "}\n";
67 
68 static const char *step3_kernel_code =
69 "__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n"
70 "{\n"
71 "    int  tid = get_global_id(0);\n"
72 "\n"
73 "    vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n"
74 "}\n";
75 
76 
77 static int
verify_step(cl_float * inptrA,cl_float * inptrB,cl_float * outptr,int n,int veclen)78 verify_step( cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n, int veclen)
79 {
80     float       r;
81     int         i, j;
82 
83     for (i=0; i<n; ) {
84         int ii = i/veclen;
85         for (j=0; j<veclen && i<n; ++j, ++i) {
86             r = (inptrB[i] < inptrA[ii]) ? 0.0f : 1.0f;
87             if (r != outptr[i])
88             {
89                 log_error( "Failure @ {%d, element %d}: step(%a,%a) -> *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] );
90                 return -1;
91             }
92         }
93     }
94 
95     return 0;
96 }
97 
test_stepf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)98 int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
99 {
100     cl_mem      streams[3];
101     cl_float    *input_ptr[2], *output_ptr, *p;
102     cl_program  program[kTotalVecCount];
103     cl_kernel   kernel[kTotalVecCount];
104     size_t  threads[1];
105     int num_elements;
106     int err;
107     int i;
108     MTdata d;
109     num_elements = n_elems * 16;
110 
111     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
112     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
113     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
114     streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
115     if (!streams[0])
116     {
117         log_error("clCreateBuffer failed\n");
118         return -1;
119     }
120     streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
121     if (!streams[1])
122     {
123         log_error("clCreateBuffer failed\n");
124         return -1;
125     }
126     streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
127     if (!streams[2])
128     {
129         log_error("clCreateBuffer failed\n");
130         return -1;
131     }
132 
133     p = input_ptr[0];
134     d = init_genrand( gRandomSeed );
135     for (i=0; i<num_elements; i++)
136     {
137         p[i] = get_random_float(-0x40000000, 0x40000000, d);
138     }
139     p = input_ptr[1];
140     for (i=0; i<num_elements; i++)
141     {
142         p[i] = get_random_float(-0x40000000, 0x40000000, d);
143     }
144     free_mtdata(d);   d = NULL;
145 
146     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
147     if (err != CL_SUCCESS)
148     {
149         log_error("clWriteArray failed\n");
150         return -1;
151     }
152     err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
153     if (err != CL_SUCCESS)
154     {
155         log_error("clWriteArray failed\n");
156         return -1;
157     }
158 
159     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
160     if (err)
161         return -1;
162     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
163     if (err)
164         return -1;
165     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
166     if (err)
167         return -1;
168     err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" );
169     if (err)
170         return -1;
171     err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" );
172     if (err)
173         return -1;
174     err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" );
175     if (err)
176         return -1;
177 
178     for (i=0; i <kTotalVecCount; i++)
179     {
180         err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
181         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
182         err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
183         if (err != CL_SUCCESS)
184         {
185             log_error("clSetKernelArgs failed\n");
186             return -1;
187         }
188     }
189 
190     threads[0] = (size_t)n_elems;
191     for (i=0; i<kTotalVecCount; i++)
192     {
193         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
194         if (err != CL_SUCCESS)
195         {
196             log_error("clEnqueueNDRangeKernel failed\n");
197             return -1;
198         }
199 
200         err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
201         if (err != CL_SUCCESS)
202         {
203             log_error("clEnqueueReadBuffer failed\n");
204             return -1;
205         }
206 
207         switch (i)
208         {
209             case 0:
210                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems, 1);
211                 if (err)
212                     log_error("STEP float test failed\n");
213                 else
214                     log_info("STEP float test passed\n");
215                 break;
216 
217             case 1:
218                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2, 2);
219                 if (err)
220                     log_error("STEP float2 test failed\n");
221                 else
222                     log_info("STEP float2 test passed\n");
223                 break;
224 
225             case 2:
226                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4, 4);
227                 if (err)
228                     log_error("STEP float4 test failed\n");
229                 else
230                     log_info("STEP float4 test passed\n");
231                 break;
232 
233             case 3:
234                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8, 8);
235                 if (err)
236                     log_error("STEP float8 test failed\n");
237                 else
238                     log_info("STEP float8 test passed\n");
239                 break;
240 
241             case 4:
242                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16, 16);
243                 if (err)
244                     log_error("STEP float16 test failed\n");
245                 else
246                     log_info("STEP float16 test passed\n");
247                 break;
248 
249             case 5:
250                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3, 3);
251                 if (err)
252                     log_error("STEP float3 test failed\n");
253                 else
254                     log_info("STEP float3 test passed\n");
255                 break;
256         }
257 
258         if (err)
259             break;
260     }
261 
262     clReleaseMemObject(streams[0]);
263     clReleaseMemObject(streams[1]);
264     clReleaseMemObject(streams[2]);
265     for (i=0; i<kTotalVecCount; i++)
266     {
267         clReleaseKernel(kernel[i]);
268         clReleaseProgram(program[i]);
269     }
270     free(input_ptr[0]);
271     free(input_ptr[1]);
272     free(output_ptr);
273 
274     if(err)
275         return err;
276 
277     if( ! is_extension_available( device, "cl_khr_fp64" ))
278     {
279         log_info( "Device does not support cl_khr_fp64.  Skipping double precision tests.\n" );
280         return 0;
281     }
282 
283     return test_stepf_double( device, context, queue, n_elems);
284 }
285 
286 #pragma mark -
287 
288 static const char *step_kernel_code_double =
289 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
290 "__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
291 "{\n"
292 "    int  tid = get_global_id(0);\n"
293 "\n"
294 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
295 "}\n";
296 
297 static const char *step2_kernel_code_double =
298 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
299 "__kernel void test_step2_double(__global double *srcA, __global double2 *srcB, __global double2 *dst)\n"
300 "{\n"
301 "    int  tid = get_global_id(0);\n"
302 "\n"
303 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
304 "}\n";
305 
306 static const char *step4_kernel_code_double =
307 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
308 "__kernel void test_step4_double(__global double *srcA, __global double4 *srcB, __global double4 *dst)\n"
309 "{\n"
310 "    int  tid = get_global_id(0);\n"
311 "\n"
312 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
313 "}\n";
314 
315 static const char *step8_kernel_code_double =
316 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
317 "__kernel void test_step8_double(__global double *srcA, __global double8 *srcB, __global double8 *dst)\n"
318 "{\n"
319 "    int  tid = get_global_id(0);\n"
320 "\n"
321 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
322 "}\n";
323 
324 static const char *step16_kernel_code_double =
325 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
326 "__kernel void test_step16_double(__global double *srcA, __global double16 *srcB, __global double16 *dst)\n"
327 "{\n"
328 "    int  tid = get_global_id(0);\n"
329 "\n"
330 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
331 "}\n";
332 
333 static const char *step3_kernel_code_double =
334 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
335 "__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
336 "{\n"
337 "    int  tid = get_global_id(0);\n"
338 "\n"
339 "    vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n"
340 "}\n";
341 
342 
343 static int
verify_step_double(cl_double * inptrA,cl_double * inptrB,cl_double * outptr,int n,int veclen)344 verify_step_double(cl_double *inptrA, cl_double *inptrB, cl_double *outptr, int n, int veclen)
345 {
346     double r;
347     int    i, j;
348 
349     for (i=0; i<n; ) {
350         int ii = i/veclen;
351         for (j=0; j<veclen && i<n; ++j, ++i) {
352             r = (inptrB[i] < inptrA[ii]) ? 0.0 : 1.0;
353             if (r != outptr[i])
354             {
355                 log_error( "Failure @ {%d, element %d}: step(%a,%a) -> *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] );
356                 return -1;
357             }
358         }
359     }
360 
361     return 0;
362 }
363 
test_stepf_double(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)364 int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
365 {
366     cl_mem      streams[3];
367     cl_double  *input_ptr[2], *output_ptr, *p;
368     cl_program  program[kTotalVecCount];
369     cl_kernel   kernel[kTotalVecCount];
370     size_t  threads[1];
371     int num_elements;
372     int err;
373     int i;
374     MTdata    d;
375     num_elements = n_elems * 16;
376 
377     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
378     input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
379     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
380     streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
381     if (!streams[0])
382     {
383         log_error("clCreateBuffer failed\n");
384         return -1;
385     }
386     streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
387     if (!streams[1])
388     {
389         log_error("clCreateBuffer failed\n");
390         return -1;
391     }
392     streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
393     if (!streams[2])
394     {
395         log_error("clCreateBuffer failed\n");
396         return -1;
397     }
398 
399     p = input_ptr[0];
400     d = init_genrand( gRandomSeed );
401     for (i=0; i<num_elements; i++)
402         p[i] = get_random_double(-0x40000000, 0x40000000, d);
403 
404     p = input_ptr[1];
405     for (i=0; i<num_elements; i++)
406         p[i] = get_random_double(-0x40000000, 0x40000000, d);
407 
408     free_mtdata(d);   d = NULL;
409 
410     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
411     if (err != CL_SUCCESS)
412     {
413         log_error("clWriteArray failed\n");
414         return -1;
415     }
416     err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
417     if (err != CL_SUCCESS)
418     {
419         log_error("clWriteArray failed\n");
420         return -1;
421     }
422 
423     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" );
424     if (err)
425         return -1;
426     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" );
427     if (err)
428         return -1;
429     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" );
430     if (err)
431         return -1;
432     err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" );
433     if (err)
434         return -1;
435     err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" );
436     if (err)
437         return -1;
438     err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" );
439     if (err)
440         return -1;
441 
442     for (i=0; i <kTotalVecCount; i++)
443     {
444         err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
445         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
446         err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
447         if (err != CL_SUCCESS)
448         {
449             log_error("clSetKernelArgs failed\n");
450             return -1;
451         }
452     }
453 
454     threads[0] = (size_t)n_elems;
455     for (i=0; i<kTotalVecCount; i++)
456     {
457         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
458         if (err != CL_SUCCESS)
459         {
460             log_error("clEnqueueNDRangeKernel failed\n");
461             return -1;
462         }
463 
464         err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
465         if (err != CL_SUCCESS)
466         {
467             log_error("clEnqueueReadBuffer failed\n");
468             return -1;
469         }
470 
471         switch (i)
472         {
473             case 0:
474                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems, 1);
475                 if (err)
476                     log_error("STEP double test failed\n");
477                 else
478                     log_info("STEP double test passed\n");
479                 break;
480 
481             case 1:
482                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*2, 2);
483                 if (err)
484                     log_error("STEP double2 test failed\n");
485                 else
486                     log_info("STEP double2 test passed\n");
487                 break;
488 
489             case 2:
490                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*4, 4);
491                 if (err)
492                     log_error("STEP double4 test failed\n");
493                 else
494                     log_info("STEP double4 test passed\n");
495                 break;
496 
497             case 3:
498                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*8, 8);
499                 if (err)
500                     log_error("STEP double8 test failed\n");
501                 else
502                     log_info("STEP double8 test passed\n");
503                 break;
504 
505             case 4:
506                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*16, 16);
507                 if (err)
508                     log_error("STEP double16 test failed\n");
509                 else
510                     log_info("STEP double16 test passed\n");
511                 break;
512 
513             case 5:
514                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*3, 3);
515                 if (err)
516                     log_error("STEP double3 test failed\n");
517                 else
518                     log_info("STEP double3 test passed\n");
519                 break;
520         }
521 
522         if (err)
523             break;
524     }
525 
526     clReleaseMemObject(streams[0]);
527     clReleaseMemObject(streams[1]);
528     clReleaseMemObject(streams[2]);
529     for (i=0; i<kTotalVecCount; i++)
530     {
531         clReleaseKernel(kernel[i]);
532         clReleaseProgram(program[i]);
533     }
534     free(input_ptr[0]);
535     free(input_ptr[1]);
536     free(output_ptr);
537 
538     return err;
539 }
540 
541