• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 
25 #ifndef M_PI
26 #define M_PI    3.14159265358979323846264338327950288
27 #endif
28 
29 static int test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
30 
31 
32 const char *degrees_kernel_code =
33 "__kernel void test_degrees(__global float *src, __global float *dst)\n"
34 "{\n"
35 "    int  tid = get_global_id(0);\n"
36 "\n"
37 "    dst[tid] = degrees(src[tid]);\n"
38 "}\n";
39 
40 const char *degrees2_kernel_code =
41 "__kernel void test_degrees2(__global float2 *src, __global float2 *dst)\n"
42 "{\n"
43 "    int  tid = get_global_id(0);\n"
44 "\n"
45 "    dst[tid] = degrees(src[tid]);\n"
46 "}\n";
47 
48 const char *degrees4_kernel_code =
49 "__kernel void test_degrees4(__global float4 *src, __global float4 *dst)\n"
50 "{\n"
51 "    int  tid = get_global_id(0);\n"
52 "\n"
53 "    dst[tid] = degrees(src[tid]);\n"
54 "}\n";
55 
56 const char *degrees8_kernel_code =
57 "__kernel void test_degrees8(__global float8 *src, __global float8 *dst)\n"
58 "{\n"
59 "    int  tid = get_global_id(0);\n"
60 "\n"
61 "    dst[tid] = degrees(src[tid]);\n"
62 "}\n";
63 
64 const char *degrees16_kernel_code =
65 "__kernel void test_degrees16(__global float16 *src, __global float16 *dst)\n"
66 "{\n"
67 "    int  tid = get_global_id(0);\n"
68 "\n"
69 "    dst[tid] = degrees(src[tid]);\n"
70 "}\n";
71 
72 const char *degrees3_kernel_code =
73 "__kernel void test_degrees3(__global float *src, __global float *dst)\n"
74 "{\n"
75 "    int  tid = get_global_id(0);\n"
76 "\n"
77 "    vstore3(degrees(vload3(tid,src)),tid,dst);\n"
78 "}\n";
79 
80 
81 #define MAX_ERR  2.0f
82 
83 static int
verify_degrees(float * inptr,float * outptr,int n)84 verify_degrees(float *inptr, float *outptr, int n)
85 {
86     float error, max_error = 0.0f;
87     double   r, max_val = NAN;
88     int     i, j, max_index = 0;
89 
90     for (i=0,j=0; i<n; i++,j++)
91     {
92         r = (180.0 / M_PI) * inptr[i];
93         error = Ulp_Error( outptr[i], r );
94         if( fabsf(error) > max_error)
95         {
96             max_error = error;
97             max_index = i;
98             max_val = r;
99             if( fabsf(error) > MAX_ERR)
100             {
101                 log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
102                 return 1;
103             }
104         }
105     }
106 
107     log_info( "degrees: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
108 
109     return 0;
110 }
111 
112 int
test_degrees(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)113 test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
114 {
115     cl_mem       streams[2];
116     cl_float     *input_ptr[1], *output_ptr, *p;
117     cl_program   *program;
118     cl_kernel    *kernel;
119     void        *values[2];
120     size_t threads[1];
121     int          num_elements;
122     int          err;
123     int          i;
124     MTdata        d;
125 
126     program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
127     kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
128 
129     num_elements = n_elems * (1 << (kTotalVecCount-1));
130 
131     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
132     output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
133     streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
134     if (!streams[0])
135     {
136         log_error("clCreateBuffer failed\n");
137         return -1;
138     }
139 
140     streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_float) * num_elements, NULL, NULL );
141     if (!streams[1])
142     {
143         log_error("clCreateBuffer failed\n");
144         return -1;
145     }
146 
147     p = input_ptr[0];
148     d = init_genrand( gRandomSeed );
149     for (i=0; i<num_elements; i++)
150     {
151         p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
152     }
153     free_mtdata(d); d = NULL;
154 
155     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
156     if (err != CL_SUCCESS)
157     {
158         log_error("clWriteArray failed\n");
159         return -1;
160     }
161 
162     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &degrees_kernel_code, "test_degrees" );
163     if (err)
164         return -1;
165     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &degrees2_kernel_code, "test_degrees2" );
166     if (err)
167         return -1;
168     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &degrees4_kernel_code, "test_degrees4" );
169     if (err)
170         return -1;
171     err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &degrees8_kernel_code, "test_degrees8" );
172     if (err)
173         return -1;
174     err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &degrees16_kernel_code, "test_degrees16" );
175     if (err)
176         return -1;
177     err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &degrees3_kernel_code, "test_degrees3" );
178     if (err)
179         return -1;
180 
181     values[0] = streams[0];
182     values[1] = streams[1];
183     for (i=0; i < kTotalVecCount; i++)
184     {
185         err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
186         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
187         if (err != CL_SUCCESS)
188         {
189             log_error("clSetKernelArgs failed\n");
190             return -1;
191         }
192     }
193 
194     for (i=0; i < kTotalVecCount; i++)
195     {
196 
197         // Line below is troublesome...
198         threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
199         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
200         if (err != CL_SUCCESS)
201         {
202             log_error("clEnqueueNDRangeKernel failed\n");
203             return -1;
204         }
205 
206         cl_uint dead = 0xdeaddead;
207         memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
208         err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
209         if (err != CL_SUCCESS)
210         {
211             log_error("clEnqueueReadBuffer failed\n");
212             return -1;
213         }
214 
215         if (verify_degrees(input_ptr[0], output_ptr, n_elems*(i+1)))
216         {
217             log_error("DEGREES float%d test failed\n",((g_arrVecSizes[i])));
218             err = -1;
219         }
220         else
221         {
222             log_info("DEGREES float%d test passed\n", ((g_arrVecSizes[i])));
223         }
224 
225         if (err)
226             break;
227     }
228 
229     clReleaseMemObject(streams[0]);
230     clReleaseMemObject(streams[1]);
231     for (i=0; i < kTotalVecCount; i++) {
232         clReleaseKernel(kernel[i]);
233         clReleaseProgram(program[i]);
234     }
235     free(program);
236     free(kernel);
237     free(input_ptr[0]);
238     free(output_ptr);
239 
240     if( err )
241         return err;
242 
243     if( ! is_extension_available( device, "cl_khr_fp64" ) )
244     {
245         log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
246         return 0;
247     }
248 
249     return test_degrees_double( device, context, queue, n_elems);
250 }
251 
252 #pragma mark -
253 
254 const char *degrees_kernel_code_double =
255 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
256 "__kernel void test_degrees_double(__global double *src, __global double *dst)\n"
257 "{\n"
258 "    int  tid = get_global_id(0);\n"
259 "\n"
260 "    dst[tid] = degrees(src[tid]);\n"
261 "}\n";
262 
263 const char *degrees2_kernel_code_double =
264 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
265 "__kernel void test_degrees2_double(__global double2 *src, __global double2 *dst)\n"
266 "{\n"
267 "    int  tid = get_global_id(0);\n"
268 "\n"
269 "    dst[tid] = degrees(src[tid]);\n"
270 "}\n";
271 
272 const char *degrees4_kernel_code_double =
273 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
274 "__kernel void test_degrees4_double(__global double4 *src, __global double4 *dst)\n"
275 "{\n"
276 "    int  tid = get_global_id(0);\n"
277 "\n"
278 "    dst[tid] = degrees(src[tid]);\n"
279 "}\n";
280 
281 const char *degrees8_kernel_code_double =
282 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
283 "__kernel void test_degrees8_double(__global double8 *src, __global double8 *dst)\n"
284 "{\n"
285 "    int  tid = get_global_id(0);\n"
286 "\n"
287 "    dst[tid] = degrees(src[tid]);\n"
288 "}\n";
289 
290 const char *degrees16_kernel_code_double =
291 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
292 "__kernel void test_degrees16_double(__global double16 *src, __global double16 *dst)\n"
293 "{\n"
294 "    int  tid = get_global_id(0);\n"
295 "\n"
296 "    dst[tid] = degrees(src[tid]);\n"
297 "}\n";
298 
299 const char *degrees3_kernel_code_double =
300 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
301 "__kernel void test_degrees3_double(__global double *src, __global double *dst)\n"
302 "{\n"
303 "    int  tid = get_global_id(0);\n"
304 "\n"
305 "    vstore3(degrees(vload3(tid,src)),tid,dst);\n"
306 "}\n";
307 
308 
309 #define MAX_ERR  2.0f
310 
311 static int
verify_degrees_double(double * inptr,double * outptr,int n)312 verify_degrees_double(double *inptr, double *outptr, int n)
313 {
314     float error, max_error = 0.0f;
315     double   r, max_val = NAN;
316     int     i, j, max_index = 0;
317 
318     for (i=0,j=0; i<n; i++,j++)
319     {
320         r = (180.0L / 3.14159265358979323846264338327950288L) * inptr[i];
321         error = Ulp_Error_Double( outptr[i], r );
322         if( fabsf(error) > max_error)
323         {
324             max_error = error;
325             max_index = i;
326             max_val = r;
327             if( fabsf(error) > MAX_ERR)
328             {
329                 log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
330                 return 1;
331             }
332         }
333     }
334 
335     log_info( "degreesd: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
336 
337     return 0;
338 }
339 
340 static int
test_degrees_double(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)341 test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
342 {
343     cl_mem       streams[2];
344     cl_double    *input_ptr[1], *output_ptr, *p;
345     cl_program   *program;
346     cl_kernel    *kernel;
347     void        *values[2];
348     size_t threads[1];
349     int          num_elements;
350     int          err;
351     int          i;
352     MTdata        d;
353 
354     program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
355     kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
356 
357     // TODO: line below is clearly wrong
358     num_elements = n_elems * (1 << (kTotalVecCount-1));
359 
360     input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
361     output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
362     streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
363     if (!streams[0])
364     {
365         log_error("clCreateBuffer failed\n");
366         return -1;
367     }
368 
369     streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  sizeof(cl_double) * num_elements, NULL, NULL );
370     if (!streams[1])
371     {
372         log_error("clCreateBuffer failed\n");
373         return -1;
374     }
375 
376     p = input_ptr[0];
377     d = init_genrand( gRandomSeed );
378     for (i=0; i<num_elements; i++)
379         p[i] = get_random_double((-100000. * M_PI), (100000. * M_PI) ,d);
380 
381     free_mtdata(d); d = NULL;
382 
383     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
384     if (err != CL_SUCCESS)
385     {
386         log_error("clWriteArray failed\n");
387         return -1;
388     }
389 
390     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &degrees_kernel_code_double, "test_degrees_double" );
391     if (err)
392         return -1;
393     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &degrees2_kernel_code_double, "test_degrees2_double" );
394     if (err)
395         return -1;
396     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &degrees4_kernel_code_double, "test_degrees4_double" );
397     if (err)
398         return -1;
399     err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &degrees8_kernel_code_double, "test_degrees8_double" );
400     if (err)
401         return -1;
402     err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &degrees16_kernel_code_double, "test_degrees16_double" );
403     if (err)
404         return -1;
405     err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &degrees3_kernel_code_double, "test_degrees3_double" );
406     if (err)
407         return -1;
408 
409     values[0] = streams[0];
410     values[1] = streams[1];
411     for (i=0; i < kTotalVecCount; i++)
412     {
413         err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
414         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
415         if (err != CL_SUCCESS)
416         {
417             log_error("clSetKernelArgs failed\n");
418             return -1;
419         }
420     }
421 
422     for (i=0; i < kTotalVecCount; i++)
423     {
424 
425         // Line below is troublesome...
426         threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
427         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
428         if (err != CL_SUCCESS)
429         {
430             log_error("clEnqueueNDRangeKernel failed\n");
431             return -1;
432         }
433 
434         cl_uint dead = 0xdeaddead;
435         memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
436         err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
437         if (err != CL_SUCCESS)
438         {
439             log_error("clEnqueueReadBuffer failed\n");
440             return -1;
441         }
442 
443         if (verify_degrees_double(input_ptr[0], output_ptr, n_elems*(i+1)))
444         {
445             log_error("DEGREES double%d test failed\n",((g_arrVecSizes[i])));
446             err = -1;
447         }
448         else
449         {
450             log_info("DEGREES double%d test passed\n", ((g_arrVecSizes[i])));
451         }
452 
453         if (err)
454             break;
455     }
456 
457     clReleaseMemObject(streams[0]);
458     clReleaseMemObject(streams[1]);
459     for (i=0; i < kTotalVecCount; i++) {
460         clReleaseKernel(kernel[i]);
461         clReleaseProgram(program[i]);
462     }
463     free(program);
464     free(kernel);
465     free(input_ptr[0]);
466     free(output_ptr);
467 
468     return err;
469 }
470 
471 
472 
473