• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 
25 static int
26 test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
27 
28 
29 const char *step_kernel_code =
30 "__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n"
31 "{\n"
32 "    int  tid = get_global_id(0);\n"
33 "\n"
34 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
35 "}\n";
36 
37 const char *step2_kernel_code =
38 "__kernel void test_step2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
39 "{\n"
40 "    int  tid = get_global_id(0);\n"
41 "\n"
42 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
43 "}\n";
44 
45 const char *step4_kernel_code =
46 "__kernel void test_step4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
47 "{\n"
48 "    int  tid = get_global_id(0);\n"
49 "\n"
50 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
51 "}\n";
52 
53 const char *step8_kernel_code =
54 "__kernel void test_step8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
55 "{\n"
56 "    int  tid = get_global_id(0);\n"
57 "\n"
58 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
59 "}\n";
60 
61 const char *step16_kernel_code =
62 "__kernel void test_step16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
63 "{\n"
64 "    int  tid = get_global_id(0);\n"
65 "\n"
66 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
67 "}\n";
68 
69 const char *step3_kernel_code =
70 "__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n"
71 "{\n"
72 "    int  tid = get_global_id(0);\n"
73 "\n"
74 "    vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
75 "}\n";
76 
77 
78 int
verify_step(float * inptrA,float * inptrB,float * outptr,int n)79 verify_step(float *inptrA, float *inptrB, float *outptr, int n)
80 {
81     float       r;
82     int         i;
83 
84     for (i=0; i<n; i++)
85     {
86         r = (inptrB[i] < inptrA[i]) ? 0.0f : 1.0f;
87         if (r != outptr[i])
88             return -1;
89     }
90 
91     return 0;
92 }
93 
94 int
test_step(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)95 test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
96 {
97     cl_mem      streams[3];
98     cl_float    *input_ptr[2], *output_ptr, *p;
99   cl_program  program[kTotalVecCount];
100   cl_kernel   kernel[kTotalVecCount];
101     void        *values[3];
102     size_t  threads[1];
103     int num_elements;
104     int err;
105     int i;
106     MTdata d;
107   num_elements = n_elems * 16;
108 
109     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
110     input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
111     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
112     streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
113     if (!streams[0])
114     {
115         log_error("clCreateBuffer failed\n");
116         return -1;
117     }
118     streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
119     if (!streams[1])
120     {
121         log_error("clCreateBuffer failed\n");
122         return -1;
123     }
124     streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
125     if (!streams[2])
126     {
127         log_error("clCreateBuffer failed\n");
128         return -1;
129     }
130 
131     p = input_ptr[0];
132     d = init_genrand( gRandomSeed );
133     for (i=0; i<num_elements; i++)
134     {
135         p[i] = get_random_float(-0x40000000, 0x40000000, d);
136     }
137     p = input_ptr[1];
138     for (i=0; i<num_elements; i++)
139     {
140         p[i] = get_random_float(-0x40000000, 0x40000000, d);
141     }
142     free_mtdata(d); d = NULL;
143 
144     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
145     if (err != CL_SUCCESS)
146     {
147         log_error("clWriteArray failed\n");
148         return -1;
149     }
150     err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
151     if (err != CL_SUCCESS)
152     {
153         log_error("clWriteArray failed\n");
154         return -1;
155     }
156 
157     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
158     if (err)
159         return -1;
160     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
161     if (err)
162         return -1;
163     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
164     if (err)
165         return -1;
166   err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" );
167   if (err)
168     return -1;
169   err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" );
170   if (err)
171     return -1;
172   err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" );
173   if (err)
174     return -1;
175 
176     values[0] = streams[0];
177     values[1] = streams[1];
178     values[2] = streams[2];
179   for (i=0; i <kTotalVecCount; i++)
180     {
181         err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
182         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
183         err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
184         if (err != CL_SUCCESS)
185         {
186             log_error("clSetKernelArgs failed\n");
187             return -1;
188         }
189     }
190 
191     threads[0] = (size_t)n_elems;
192   for (i=0; i<kTotalVecCount; i++)
193     {
194         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
195         if (err != CL_SUCCESS)
196         {
197             log_error("clEnqueueNDRangeKernel failed\n");
198             return -1;
199         }
200 
201         err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
202         if (err != CL_SUCCESS)
203         {
204             log_error("clEnqueueReadBuffer failed\n");
205             return -1;
206         }
207 
208         switch (i)
209         {
210             case 0:
211                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems);
212                 if (err)
213                     log_error("STEP float test failed\n");
214                 else
215                     log_info("STEP float test passed\n");
216                 break;
217 
218             case 1:
219                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2);
220                 if (err)
221                     log_error("STEP float2 test failed\n");
222                 else
223                     log_info("STEP float2 test passed\n");
224                 break;
225 
226             case 2:
227                 err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4);
228                 if (err)
229                     log_error("STEP float4 test failed\n");
230                 else
231                     log_info("STEP float4 test passed\n");
232                 break;
233 
234         case 3:
235         err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8);
236         if (err)
237           log_error("STEP float8 test failed\n");
238         else
239           log_info("STEP float8 test passed\n");
240         break;
241 
242         case 4:
243         err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16);
244         if (err)
245           log_error("STEP float16 test failed\n");
246         else
247           log_info("STEP float16 test passed\n");
248         break;
249 
250         case 5:
251         err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3);
252         if (err)
253           log_error("STEP float3 test failed\n");
254         else
255           log_info("STEP float3 test passed\n");
256         break;
257         }
258 
259         if (err)
260             break;
261     }
262 
263     clReleaseMemObject(streams[0]);
264     clReleaseMemObject(streams[1]);
265     clReleaseMemObject(streams[2]);
266   for (i=0; i<kTotalVecCount; i++)
267     {
268         clReleaseKernel(kernel[i]);
269         clReleaseProgram(program[i]);
270     }
271     free(input_ptr[0]);
272     free(input_ptr[1]);
273     free(output_ptr);
274 
275     if( err )
276         return err;
277 
278     if( ! is_extension_available( device, "cl_khr_fp64" ))
279         return 0;
280 
281     return test_step_double( device, context, queue, n_elems);
282 }
283 
284 
285 #pragma mark -
286 
287 const char *step_kernel_code_double =
288 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
289 "__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
290 "{\n"
291 "    int  tid = get_global_id(0);\n"
292 "\n"
293 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
294 "}\n";
295 
296 const char *step2_kernel_code_double =
297 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
298 "__kernel void test_step2_double(__global double2 *srcA, __global double2 *srcB, __global double2 *dst)\n"
299 "{\n"
300 "    int  tid = get_global_id(0);\n"
301 "\n"
302 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
303 "}\n";
304 
305 const char *step4_kernel_code_double =
306 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
307 "__kernel void test_step4_double(__global double4 *srcA, __global double4 *srcB, __global double4 *dst)\n"
308 "{\n"
309 "    int  tid = get_global_id(0);\n"
310 "\n"
311 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
312 "}\n";
313 
314 const char *step8_kernel_code_double =
315 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
316 "__kernel void test_step8_double(__global double8 *srcA, __global double8 *srcB, __global double8 *dst)\n"
317 "{\n"
318 "    int  tid = get_global_id(0);\n"
319 "\n"
320 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
321 "}\n";
322 
323 const char *step16_kernel_code_double =
324 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
325 "__kernel void test_step16_double(__global double16 *srcA, __global double16 *srcB, __global double16 *dst)\n"
326 "{\n"
327 "    int  tid = get_global_id(0);\n"
328 "\n"
329 "    dst[tid] = step(srcA[tid], srcB[tid]);\n"
330 "}\n";
331 
332 const char *step3_kernel_code_double =
333 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
334 "__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
335 "{\n"
336 "    int  tid = get_global_id(0);\n"
337 "\n"
338 "    vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
339 "}\n";
340 
341 
342 int
verify_step_double(double * inptrA,double * inptrB,double * outptr,int n)343 verify_step_double(double *inptrA, double *inptrB, double *outptr, int n)
344 {
345     double       r;
346     int         i;
347 
348     for (i=0; i<n; i++)
349     {
350         r = (inptrB[i] < inptrA[i]) ? 0.0 : 1.0;
351         if (r != outptr[i])
352             return -1;
353     }
354 
355     return 0;
356 }
357 
358 static int
test_step_double(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)359 test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
360 {
361     cl_mem      streams[3];
362     cl_double    *input_ptr[2], *output_ptr, *p;
363     cl_program  program[kTotalVecCount];
364     cl_kernel   kernel[kTotalVecCount];
365     void        *values[3];
366     size_t  threads[1];
367     int num_elements;
368     int err;
369     int i;
370     MTdata d;
371     num_elements = n_elems * 16;
372 
373     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
374     input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
375     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
376     streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
377     if (!streams[0])
378     {
379         log_error("clCreateBuffer failed\n");
380         return -1;
381     }
382     streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
383     if (!streams[1])
384     {
385         log_error("clCreateBuffer failed\n");
386         return -1;
387     }
388     streams[2] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
389     if (!streams[2])
390     {
391         log_error("clCreateBuffer failed\n");
392         return -1;
393     }
394 
395     p = input_ptr[0];
396     d = init_genrand( gRandomSeed );
397     for (i=0; i<num_elements; i++)
398     {
399         p[i] = get_random_double(-0x40000000, 0x40000000, d);
400     }
401     p = input_ptr[1];
402     for (i=0; i<num_elements; i++)
403     {
404         p[i] = get_random_double(-0x40000000, 0x40000000, d);
405     }
406     free_mtdata(d); d = NULL;
407 
408     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
409     if (err != CL_SUCCESS)
410     {
411         log_error("clWriteArray failed\n");
412         return -1;
413     }
414     err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
415     if (err != CL_SUCCESS)
416     {
417         log_error("clWriteArray failed\n");
418         return -1;
419     }
420 
421     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" );
422     if (err)
423         return -1;
424     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" );
425     if (err)
426         return -1;
427     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" );
428     if (err)
429         return -1;
430     err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" );
431     if (err)
432         return -1;
433     err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" );
434     if (err)
435         return -1;
436     err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" );
437     if (err)
438         return -1;
439 
440     values[0] = streams[0];
441     values[1] = streams[1];
442     values[2] = streams[2];
443     for (i=0; i < kTotalVecCount; i++)
444     {
445         err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
446         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
447         err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
448         if (err != CL_SUCCESS)
449         {
450             log_error("clSetKernelArgs failed\n");
451             return -1;
452         }
453     }
454 
455     threads[0] = (size_t)n_elems;
456     for (i=0; i<kTotalVecCount; i++)
457     {
458         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
459         if (err != CL_SUCCESS)
460         {
461             log_error("clEnqueueNDRangeKernel failed\n");
462             return -1;
463         }
464 
465         err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
466         if (err != CL_SUCCESS)
467         {
468             log_error("clEnqueueReadBuffer failed\n");
469             return -1;
470         }
471 
472         switch (i)
473         {
474             case 0:
475                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems);
476                 if (err)
477                     log_error("STEP double test failed\n");
478                 else
479                     log_info("STEP double test passed\n");
480                 break;
481 
482             case 1:
483                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*2);
484                 if (err)
485                     log_error("STEP double2 test failed\n");
486                 else
487                     log_info("STEP double2 test passed\n");
488                 break;
489 
490             case 2:
491                 err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*4);
492                 if (err)
493                     log_error("STEP double4 test failed\n");
494                 else
495                     log_info("STEP double4 test passed\n");
496                 break;
497 
498         case 3:
499         err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*8);
500         if (err)
501           log_error("STEP double8 test failed\n");
502         else
503           log_info("STEP double8 test passed\n");
504         break;
505 
506         case 4:
507         err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*16);
508         if (err)
509           log_error("STEP double16 test failed\n");
510         else
511           log_info("STEP double16 test passed\n");
512         break;
513 
514         case 5:
515         err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*3);
516         if (err)
517           log_error("STEP double3 test failed\n");
518         else
519           log_info("STEP double3 test passed\n");
520         break;
521         }
522 
523         if (err)
524             break;
525     }
526 
527     clReleaseMemObject(streams[0]);
528     clReleaseMemObject(streams[1]);
529     clReleaseMemObject(streams[2]);
530     for (i=0; i<kTotalVecCount; i++)
531     {
532         clReleaseKernel(kernel[i]);
533         clReleaseProgram(program[i]);
534     }
535     free(input_ptr[0]);
536     free(input_ptr[1]);
537     free(output_ptr);
538 
539     return err;
540 }
541 
542