• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 
25 #ifndef M_PI
26 #define M_PI    3.14159265358979323846264338327950288
27 #endif
28 
29 static int test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
30 
31 
32 const char *degrees_kernel_code =
33 "__kernel void test_degrees(__global float *src, __global float *dst)\n"
34 "{\n"
35 "    int  tid = get_global_id(0);\n"
36 "\n"
37 "    dst[tid] = degrees(src[tid]);\n"
38 "}\n";
39 
40 const char *degrees2_kernel_code =
41 "__kernel void test_degrees2(__global float2 *src, __global float2 *dst)\n"
42 "{\n"
43 "    int  tid = get_global_id(0);\n"
44 "\n"
45 "    dst[tid] = degrees(src[tid]);\n"
46 "}\n";
47 
48 const char *degrees4_kernel_code =
49 "__kernel void test_degrees4(__global float4 *src, __global float4 *dst)\n"
50 "{\n"
51 "    int  tid = get_global_id(0);\n"
52 "\n"
53 "    dst[tid] = degrees(src[tid]);\n"
54 "}\n";
55 
56 const char *degrees8_kernel_code =
57 "__kernel void test_degrees8(__global float8 *src, __global float8 *dst)\n"
58 "{\n"
59 "    int  tid = get_global_id(0);\n"
60 "\n"
61 "    dst[tid] = degrees(src[tid]);\n"
62 "}\n";
63 
64 const char *degrees16_kernel_code =
65 "__kernel void test_degrees16(__global float16 *src, __global float16 *dst)\n"
66 "{\n"
67 "    int  tid = get_global_id(0);\n"
68 "\n"
69 "    dst[tid] = degrees(src[tid]);\n"
70 "}\n";
71 
72 const char *degrees3_kernel_code =
73 "__kernel void test_degrees3(__global float *src, __global float *dst)\n"
74 "{\n"
75 "    int  tid = get_global_id(0);\n"
76 "\n"
77 "    vstore3(degrees(vload3(tid,src)),tid,dst);\n"
78 "}\n";
79 
80 
81 #define MAX_ERR  2.0f
82 
83 static int
verify_degrees(float * inptr,float * outptr,int n)84 verify_degrees(float *inptr, float *outptr, int n)
85 {
86     float error, max_error = 0.0f;
87     double   r, max_val = NAN;
88     int     i, j, max_index = 0;
89 
90     for (i=0,j=0; i<n; i++,j++)
91     {
92         r = (180.0 / M_PI) * inptr[i];
93         error = Ulp_Error( outptr[i], r );
94         if( fabsf(error) > max_error)
95         {
96             max_error = error;
97             max_index = i;
98             max_val = r;
99             if( fabsf(error) > MAX_ERR)
100             {
101                 log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
102                 return 1;
103             }
104         }
105     }
106 
107     log_info( "degrees: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
108 
109     return 0;
110 }
111 
112 int
test_degrees(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)113 test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
114 {
115     cl_mem       streams[2];
116     cl_float     *input_ptr[1], *output_ptr, *p;
117     cl_program   *program;
118     cl_kernel    *kernel;
119     void        *values[2];
120     size_t threads[1];
121     int          num_elements;
122     int          err;
123     int          i;
124     MTdata        d;
125 
126     program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
127     kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
128 
129     num_elements = n_elems * (1 << (kTotalVecCount-1));
130 
131     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
132     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
133     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
134                                 sizeof(cl_float) * num_elements, NULL, NULL);
135     if (!streams[0])
136     {
137         log_error("clCreateBuffer failed\n");
138         return -1;
139     }
140 
141     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
142                                 sizeof(cl_float) * num_elements, NULL, NULL);
143     if (!streams[1])
144     {
145         log_error("clCreateBuffer failed\n");
146         return -1;
147     }
148 
149     p = input_ptr[0];
150     d = init_genrand( gRandomSeed );
151     for (i=0; i<num_elements; i++)
152     {
153         p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
154     }
155     free_mtdata(d); d = NULL;
156 
157     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
158     if (err != CL_SUCCESS)
159     {
160         log_error("clWriteArray failed\n");
161         return -1;
162     }
163 
164     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &degrees_kernel_code, "test_degrees" );
165     if (err)
166         return -1;
167     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &degrees2_kernel_code, "test_degrees2" );
168     if (err)
169         return -1;
170     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &degrees4_kernel_code, "test_degrees4" );
171     if (err)
172         return -1;
173     err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &degrees8_kernel_code, "test_degrees8" );
174     if (err)
175         return -1;
176     err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &degrees16_kernel_code, "test_degrees16" );
177     if (err)
178         return -1;
179     err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &degrees3_kernel_code, "test_degrees3" );
180     if (err)
181         return -1;
182 
183     values[0] = streams[0];
184     values[1] = streams[1];
185     for (i=0; i < kTotalVecCount; i++)
186     {
187         err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
188         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
189         if (err != CL_SUCCESS)
190         {
191             log_error("clSetKernelArgs failed\n");
192             return -1;
193         }
194     }
195 
196     for (i=0; i < kTotalVecCount; i++)
197     {
198 
199         // Line below is troublesome...
200         threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
201         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
202         if (err != CL_SUCCESS)
203         {
204             log_error("clEnqueueNDRangeKernel failed\n");
205             return -1;
206         }
207 
208         cl_uint dead = 0xdeaddead;
209         memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
210         err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
211         if (err != CL_SUCCESS)
212         {
213             log_error("clEnqueueReadBuffer failed\n");
214             return -1;
215         }
216 
217         if (verify_degrees(input_ptr[0], output_ptr, n_elems*(i+1)))
218         {
219             log_error("DEGREES float%d test failed\n",((g_arrVecSizes[i])));
220             err = -1;
221         }
222         else
223         {
224             log_info("DEGREES float%d test passed\n", ((g_arrVecSizes[i])));
225         }
226 
227         if (err)
228             break;
229     }
230 
231     clReleaseMemObject(streams[0]);
232     clReleaseMemObject(streams[1]);
233     for (i=0; i < kTotalVecCount; i++) {
234         clReleaseKernel(kernel[i]);
235         clReleaseProgram(program[i]);
236     }
237     free(program);
238     free(kernel);
239     free(input_ptr[0]);
240     free(output_ptr);
241 
242     if( err )
243         return err;
244 
245     if( ! is_extension_available( device, "cl_khr_fp64" ) )
246     {
247         log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
248         return 0;
249     }
250 
251     return test_degrees_double( device, context, queue, n_elems);
252 }
253 
254 #pragma mark -
255 
256 const char *degrees_kernel_code_double =
257 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
258 "__kernel void test_degrees_double(__global double *src, __global double *dst)\n"
259 "{\n"
260 "    int  tid = get_global_id(0);\n"
261 "\n"
262 "    dst[tid] = degrees(src[tid]);\n"
263 "}\n";
264 
265 const char *degrees2_kernel_code_double =
266 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
267 "__kernel void test_degrees2_double(__global double2 *src, __global double2 *dst)\n"
268 "{\n"
269 "    int  tid = get_global_id(0);\n"
270 "\n"
271 "    dst[tid] = degrees(src[tid]);\n"
272 "}\n";
273 
274 const char *degrees4_kernel_code_double =
275 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
276 "__kernel void test_degrees4_double(__global double4 *src, __global double4 *dst)\n"
277 "{\n"
278 "    int  tid = get_global_id(0);\n"
279 "\n"
280 "    dst[tid] = degrees(src[tid]);\n"
281 "}\n";
282 
283 const char *degrees8_kernel_code_double =
284 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
285 "__kernel void test_degrees8_double(__global double8 *src, __global double8 *dst)\n"
286 "{\n"
287 "    int  tid = get_global_id(0);\n"
288 "\n"
289 "    dst[tid] = degrees(src[tid]);\n"
290 "}\n";
291 
292 const char *degrees16_kernel_code_double =
293 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
294 "__kernel void test_degrees16_double(__global double16 *src, __global double16 *dst)\n"
295 "{\n"
296 "    int  tid = get_global_id(0);\n"
297 "\n"
298 "    dst[tid] = degrees(src[tid]);\n"
299 "}\n";
300 
301 const char *degrees3_kernel_code_double =
302 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
303 "__kernel void test_degrees3_double(__global double *src, __global double *dst)\n"
304 "{\n"
305 "    int  tid = get_global_id(0);\n"
306 "\n"
307 "    vstore3(degrees(vload3(tid,src)),tid,dst);\n"
308 "}\n";
309 
310 
311 #define MAX_ERR  2.0f
312 
313 static int
verify_degrees_double(double * inptr,double * outptr,int n)314 verify_degrees_double(double *inptr, double *outptr, int n)
315 {
316     float error, max_error = 0.0f;
317     double   r, max_val = NAN;
318     int     i, j, max_index = 0;
319 
320     for (i=0,j=0; i<n; i++,j++)
321     {
322         r = (180.0L / 3.14159265358979323846264338327950288L) * inptr[i];
323         error = Ulp_Error_Double( outptr[i], r );
324         if( fabsf(error) > max_error)
325         {
326             max_error = error;
327             max_index = i;
328             max_val = r;
329             if( fabsf(error) > MAX_ERR)
330             {
331                 log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
332                 return 1;
333             }
334         }
335     }
336 
337     log_info( "degreesd: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
338 
339     return 0;
340 }
341 
342 static int
test_degrees_double(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)343 test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
344 {
345     cl_mem       streams[2];
346     cl_double    *input_ptr[1], *output_ptr, *p;
347     cl_program   *program;
348     cl_kernel    *kernel;
349     void        *values[2];
350     size_t threads[1];
351     int          num_elements;
352     int          err;
353     int          i;
354     MTdata        d;
355 
356     program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
357     kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
358 
359     // TODO: line below is clearly wrong
360     num_elements = n_elems * (1 << (kTotalVecCount-1));
361 
362     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
363     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
364     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
365                                 sizeof(cl_double) * num_elements, NULL, NULL);
366     if (!streams[0])
367     {
368         log_error("clCreateBuffer failed\n");
369         return -1;
370     }
371 
372     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
373                                 sizeof(cl_double) * num_elements, NULL, NULL);
374     if (!streams[1])
375     {
376         log_error("clCreateBuffer failed\n");
377         return -1;
378     }
379 
380     p = input_ptr[0];
381     d = init_genrand( gRandomSeed );
382     for (i=0; i<num_elements; i++)
383         p[i] = get_random_double((-100000. * M_PI), (100000. * M_PI) ,d);
384 
385     free_mtdata(d); d = NULL;
386 
387     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
388     if (err != CL_SUCCESS)
389     {
390         log_error("clWriteArray failed\n");
391         return -1;
392     }
393 
394     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &degrees_kernel_code_double, "test_degrees_double" );
395     if (err)
396         return -1;
397     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &degrees2_kernel_code_double, "test_degrees2_double" );
398     if (err)
399         return -1;
400     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &degrees4_kernel_code_double, "test_degrees4_double" );
401     if (err)
402         return -1;
403     err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &degrees8_kernel_code_double, "test_degrees8_double" );
404     if (err)
405         return -1;
406     err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &degrees16_kernel_code_double, "test_degrees16_double" );
407     if (err)
408         return -1;
409     err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &degrees3_kernel_code_double, "test_degrees3_double" );
410     if (err)
411         return -1;
412 
413     values[0] = streams[0];
414     values[1] = streams[1];
415     for (i=0; i < kTotalVecCount; i++)
416     {
417         err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
418         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
419         if (err != CL_SUCCESS)
420         {
421             log_error("clSetKernelArgs failed\n");
422             return -1;
423         }
424     }
425 
426     for (i=0; i < kTotalVecCount; i++)
427     {
428 
429         // Line below is troublesome...
430         threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
431         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
432         if (err != CL_SUCCESS)
433         {
434             log_error("clEnqueueNDRangeKernel failed\n");
435             return -1;
436         }
437 
438         cl_uint dead = 0xdeaddead;
439         memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
440         err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
441         if (err != CL_SUCCESS)
442         {
443             log_error("clEnqueueReadBuffer failed\n");
444             return -1;
445         }
446 
447         if (verify_degrees_double(input_ptr[0], output_ptr, n_elems*(i+1)))
448         {
449             log_error("DEGREES double%d test failed\n",((g_arrVecSizes[i])));
450             err = -1;
451         }
452         else
453         {
454             log_info("DEGREES double%d test passed\n", ((g_arrVecSizes[i])));
455         }
456 
457         if (err)
458             break;
459     }
460 
461     clReleaseMemObject(streams[0]);
462     clReleaseMemObject(streams[1]);
463     for (i=0; i < kTotalVecCount; i++) {
464         clReleaseKernel(kernel[i]);
465         clReleaseProgram(program[i]);
466     }
467     free(program);
468     free(kernel);
469     free(input_ptr[0]);
470     free(output_ptr);
471 
472     return err;
473 }
474 
475 
476 
477