• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 
25 #ifndef M_PI
26 #define M_PI    3.14159265358979323846264338327950288
27 #endif
28 
29 static int test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
30 
31 
32 const char *radians_kernel_code =
33 "__kernel void test_radians(__global float *src, __global float *dst)\n"
34 "{\n"
35 "    int  tid = get_global_id(0);\n"
36 "\n"
37 "    dst[tid] = radians(src[tid]);\n"
38 "}\n";
39 
40 const char *radians2_kernel_code =
41 "__kernel void test_radians2(__global float2 *src, __global float2 *dst)\n"
42 "{\n"
43 "    int  tid = get_global_id(0);\n"
44 "\n"
45 "    dst[tid] = radians(src[tid]);\n"
46 "}\n";
47 
48 const char *radians4_kernel_code =
49 "__kernel void test_radians4(__global float4 *src, __global float4 *dst)\n"
50 "{\n"
51 "    int  tid = get_global_id(0);\n"
52 "\n"
53 "    dst[tid] = radians(src[tid]);\n"
54 "}\n";
55 
56 const char *radians8_kernel_code =
57 "__kernel void test_radians8(__global float8 *src, __global float8 *dst)\n"
58 "{\n"
59 "    int  tid = get_global_id(0);\n"
60 "\n"
61 "    dst[tid] = radians(src[tid]);\n"
62 "}\n";
63 
64 const char *radians16_kernel_code =
65 "__kernel void test_radians16(__global float16 *src, __global float16 *dst)\n"
66 "{\n"
67 "    int  tid = get_global_id(0);\n"
68 "\n"
69 "    dst[tid] = radians(src[tid]);\n"
70 "}\n";
71 
72 const char *radians3_kernel_code =
73 "__kernel void test_radians3(__global float *src, __global float *dst)\n"
74 "{\n"
75 "    int  tid = get_global_id(0);\n"
76 "\n"
77 "    vstore3(radians(vload3(tid,src)),tid,dst);\n"
78 "}\n";
79 
80 
81 #define MAX_ERR  2.0f
82 
83 static float
verify_radians(float * inptr,float * outptr,int n)84 verify_radians(float *inptr, float *outptr, int n)
85 {
86     float error, max_error = 0.0f;
87     double   r, max_val = NAN;
88     int     i, j, max_index = 0;
89 
90     for (i=0,j=0; i<n; i++,j++)
91     {
92         r = (M_PI / 180.0) * inptr[i];
93         error = Ulp_Error( outptr[i], r );
94         if( fabsf(error) > max_error)
95         {
96             max_error = error;
97             max_index = i;
98             max_val = r;
99             if( fabsf(error) > MAX_ERR)
100             {
101                 log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
102                 return 1;
103             }
104         }
105     }
106 
107     log_info( "radians: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
108 
109     return 0;
110 }
111 
112 
113 int
test_radians(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)114 test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
115 {
116     cl_mem       streams[2];
117     cl_float     *input_ptr[1], *output_ptr, *p;
118     cl_program   *program;
119     cl_kernel    *kernel;
120     void         *values[2];
121     size_t       threads[1];
122     int          num_elements;
123     int          err;
124     int          i;
125     MTdata       d;
126 
127     program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
128     kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
129 
130     num_elements = n_elems * (1 << (kTotalVecCount-1));
131 
132     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
133     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
134     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
135                                 sizeof(cl_float) * num_elements, NULL, NULL);
136     if (!streams[0])
137     {
138         log_error("clCreateBuffer failed\n");
139         return -1;
140     }
141 
142     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
143                                 sizeof(cl_float) * num_elements, NULL, NULL);
144     if (!streams[1])
145     {
146         log_error("clCreateBuffer failed\n");
147         return -1;
148     }
149 
150     p = input_ptr[0];
151     d = init_genrand( gRandomSeed );
152     for (i=0; i<num_elements; i++)
153     {
154         p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
155     }
156     free_mtdata(d); d = NULL;
157 
158     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
159     if (err != CL_SUCCESS)
160     {
161         log_error("clWriteArray failed\n");
162         return -1;
163     }
164 
165     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &radians_kernel_code, "test_radians" );
166     if (err)
167         return -1;
168     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &radians2_kernel_code, "test_radians2" );
169     if (err)
170         return -1;
171     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &radians4_kernel_code, "test_radians4" );
172     if (err)
173         return -1;
174     err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &radians8_kernel_code, "test_radians8" );
175     if (err)
176         return -1;
177     err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &radians16_kernel_code, "test_radians16" );
178     if (err)
179         return -1;
180     err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &radians3_kernel_code, "test_radians3" );
181     if (err)
182         return -1;
183 
184     values[0] = streams[0];
185     values[1] = streams[1];
186     for (i=0; i < kTotalVecCount; i++)
187     {
188         err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
189         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
190         if (err != CL_SUCCESS)
191         {
192             log_error("clSetKernelArgs failed\n");
193             return -1;
194         }
195     }
196 
197     for (i=0; i < kTotalVecCount; i++)
198     {
199         threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
200         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
201         if (err != CL_SUCCESS)
202         {
203             log_error("clEnqueueNDRangeKernel failed\n");
204             return -1;
205         }
206 
207         cl_uint dead = 0xdeaddead;
208         memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
209         err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
210         if (err != CL_SUCCESS)
211         {
212             log_error("clEnqueueReadBuffer failed\n");
213             return -1;
214         }
215 
216         if (verify_radians(input_ptr[0], output_ptr, n_elems*(i+1)))
217         {
218             log_error("RADIANS float%d test failed\n",((g_arrVecSizes[i])));
219             err = -1;
220         }
221         else
222         {
223             log_info("RADIANS float%d test passed\n", ((g_arrVecSizes[i])));
224         }
225 
226         if (err)
227             break;
228     }
229 
230     clReleaseMemObject(streams[0]);
231     clReleaseMemObject(streams[1]);
232     for (i=0; i < kTotalVecCount; i++) {
233         clReleaseKernel(kernel[i]);
234         clReleaseProgram(program[i]);
235     }
236     free(program);
237     free(kernel);
238     free(input_ptr[0]);
239     free(output_ptr);
240     if( err )
241         return err;
242 
243     if( ! is_extension_available( device, "cl_khr_fp64" ) )
244     {
245         log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
246         return 0;
247     }
248 
249     return test_radians_double( device,  context,  queue,  n_elems);
250 }
251 
252 
253 
254 #pragma mark -
255 
256 const char *radians_kernel_code_double =
257 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
258 "__kernel void test_radians_double(__global double *src, __global double *dst)\n"
259 "{\n"
260 "    int  tid = get_global_id(0);\n"
261 "\n"
262 "    dst[tid] = radians(src[tid]);\n"
263 "}\n";
264 
265 const char *radians2_kernel_code_double =
266 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
267 "__kernel void test_radians2_double(__global double2 *src, __global double2 *dst)\n"
268 "{\n"
269 "    int  tid = get_global_id(0);\n"
270 "\n"
271 "    dst[tid] = radians(src[tid]);\n"
272 "}\n";
273 
274 const char *radians4_kernel_code_double =
275 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
276 "__kernel void test_radians4_double(__global double4 *src, __global double4 *dst)\n"
277 "{\n"
278 "    int  tid = get_global_id(0);\n"
279 "\n"
280 "    dst[tid] = radians(src[tid]);\n"
281 "}\n";
282 
283 const char *radians8_kernel_code_double =
284 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
285 "__kernel void test_radians8_double(__global double8 *src, __global double8 *dst)\n"
286 "{\n"
287 "    int  tid = get_global_id(0);\n"
288 "\n"
289 "    dst[tid] = radians(src[tid]);\n"
290 "}\n";
291 
292 const char *radians16_kernel_code_double =
293 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
294 "__kernel void test_radians16_double(__global double16 *src, __global double16 *dst)\n"
295 "{\n"
296 "    int  tid = get_global_id(0);\n"
297 "\n"
298 "    dst[tid] = radians(src[tid]);\n"
299 "}\n";
300 
301 const char *radians3_kernel_code_double =
302 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
303 "__kernel void test_radians3_double(__global double *src, __global double *dst)\n"
304 "{\n"
305 "    int  tid = get_global_id(0);\n"
306 "\n"
307 "    vstore3(radians(vload3(tid,src)),tid,dst);\n"
308 "}\n";
309 
310 
311 #define MAX_ERR  2.0f
312 
313 static double
verify_radians_double(double * inptr,double * outptr,int n)314 verify_radians_double(double *inptr, double *outptr, int n)
315 {
316     float error, max_error = 0.0f;
317     double   r, max_val = NAN;
318     int     i, j, max_index = 0;
319 
320     for (i=0,j=0; i<n; i++,j++)
321     {
322         r = (3.14159265358979323846264338327950288L / 180.0L) * inptr[i];
323         error = Ulp_Error_Double( outptr[i], r );
324         if( fabsf(error) > max_error)
325         {
326             max_error = error;
327             max_index = i;
328             max_val = r;
329             if( fabsf(error) > MAX_ERR)
330             {
331                 log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
332                 return 1;
333             }
334         }
335     }
336 
337     log_info( "radiansd: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
338 
339     return 0;
340 }
341 
342 
343 int
test_radians_double(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)344 test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
345 {
346     cl_mem       streams[2];
347     cl_double     *input_ptr[1], *output_ptr, *p;
348     cl_program   *program;
349     cl_kernel    *kernel;
350     void         *values[2];
351     size_t       threads[1];
352     int          num_elements;
353     int          err;
354     int          i;
355     MTdata       d;
356 
357 
358     program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
359     kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
360 
361     //TODO: line below is clearly wrong
362     num_elements = n_elems * (1 << (kTotalVecCount-1));
363 
364     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
365     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
366     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
367                                 sizeof(cl_double) * num_elements, NULL, NULL);
368     if (!streams[0])
369     {
370         log_error("clCreateBuffer failed\n");
371         return -1;
372     }
373 
374     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
375                                 sizeof(cl_double) * num_elements, NULL, NULL);
376     if (!streams[1])
377     {
378         log_error("clCreateBuffer failed\n");
379         return -1;
380     }
381 
382     p = input_ptr[0];
383     d = init_genrand( gRandomSeed );
384     for (i=0; i<num_elements; i++)
385         p[i] = get_random_double((float)(-100000.0 * M_PI), (float)(100000.0 * M_PI) ,d);
386 
387     free_mtdata(d); d = NULL;
388 
389     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
390     if (err != CL_SUCCESS)
391     {
392         log_error("clWriteArray failed\n");
393         return -1;
394     }
395 
396     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &radians_kernel_code_double, "test_radians_double" );
397     if (err)
398         return -1;
399     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &radians2_kernel_code_double, "test_radians2_double" );
400     if (err)
401         return -1;
402     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &radians4_kernel_code_double, "test_radians4_double" );
403     if (err)
404         return -1;
405     err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &radians8_kernel_code_double, "test_radians8_double" );
406     if (err)
407         return -1;
408     err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &radians16_kernel_code_double, "test_radians16_double" );
409     if (err)
410         return -1;
411     err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &radians3_kernel_code_double, "test_radians3_double" );
412     if (err)
413         return -1;
414 
415     values[0] = streams[0];
416     values[1] = streams[1];
417     for (i=0; i < kTotalVecCount; i++)
418     {
419         err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
420         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
421         if (err != CL_SUCCESS)
422         {
423             log_error("clSetKernelArgs failed\n");
424             return -1;
425         }
426     }
427 
428     for (i=0; i < kTotalVecCount; i++)
429     {
430         threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
431         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
432         if (err != CL_SUCCESS)
433         {
434             log_error("clEnqueueNDRangeKernel failed\n");
435             return -1;
436         }
437 
438         cl_uint dead = 0xdeaddead;
439         memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
440         err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
441         if (err != CL_SUCCESS)
442         {
443             log_error("clEnqueueReadBuffer failed\n");
444             return -1;
445         }
446 
447         if (verify_radians_double(input_ptr[0], output_ptr, n_elems*(i+1)))
448         {
449             log_error("RADIANS double%d test failed\n",((g_arrVecSizes[i])));
450             err = -1;
451         }
452         else
453         {
454             log_info("RADIANS double%d test passed\n", ((g_arrVecSizes[i])));
455         }
456 
457         if (err)
458             break;
459     }
460 
461     clReleaseMemObject(streams[0]);
462     clReleaseMemObject(streams[1]);
463     for (i=0; i < kTotalVecCount; i++) {
464         clReleaseKernel(kernel[i]);
465         clReleaseProgram(program[i]);
466     }
467     free(program);
468     free(kernel);
469     free(input_ptr[0]);
470     free(output_ptr);
471 
472     return err;
473 }
474 
475