1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25 #ifndef M_PI
26 #define M_PI 3.14159265358979323846264338327950288
27 #endif
28
29 static int test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
30
31
32 const char *radians_kernel_code =
33 "__kernel void test_radians(__global float *src, __global float *dst)\n"
34 "{\n"
35 " int tid = get_global_id(0);\n"
36 "\n"
37 " dst[tid] = radians(src[tid]);\n"
38 "}\n";
39
40 const char *radians2_kernel_code =
41 "__kernel void test_radians2(__global float2 *src, __global float2 *dst)\n"
42 "{\n"
43 " int tid = get_global_id(0);\n"
44 "\n"
45 " dst[tid] = radians(src[tid]);\n"
46 "}\n";
47
48 const char *radians4_kernel_code =
49 "__kernel void test_radians4(__global float4 *src, __global float4 *dst)\n"
50 "{\n"
51 " int tid = get_global_id(0);\n"
52 "\n"
53 " dst[tid] = radians(src[tid]);\n"
54 "}\n";
55
56 const char *radians8_kernel_code =
57 "__kernel void test_radians8(__global float8 *src, __global float8 *dst)\n"
58 "{\n"
59 " int tid = get_global_id(0);\n"
60 "\n"
61 " dst[tid] = radians(src[tid]);\n"
62 "}\n";
63
64 const char *radians16_kernel_code =
65 "__kernel void test_radians16(__global float16 *src, __global float16 *dst)\n"
66 "{\n"
67 " int tid = get_global_id(0);\n"
68 "\n"
69 " dst[tid] = radians(src[tid]);\n"
70 "}\n";
71
72 const char *radians3_kernel_code =
73 "__kernel void test_radians3(__global float *src, __global float *dst)\n"
74 "{\n"
75 " int tid = get_global_id(0);\n"
76 "\n"
77 " vstore3(radians(vload3(tid,src)),tid,dst);\n"
78 "}\n";
79
80
81 #define MAX_ERR 2.0f
82
83 static float
verify_radians(float * inptr,float * outptr,int n)84 verify_radians(float *inptr, float *outptr, int n)
85 {
86 float error, max_error = 0.0f;
87 double r, max_val = NAN;
88 int i, j, max_index = 0;
89
90 for (i=0,j=0; i<n; i++,j++)
91 {
92 r = (M_PI / 180.0) * inptr[i];
93 error = Ulp_Error( outptr[i], r );
94 if( fabsf(error) > max_error)
95 {
96 max_error = error;
97 max_index = i;
98 max_val = r;
99 if( fabsf(error) > MAX_ERR)
100 {
101 log_error( "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
102 return 1;
103 }
104 }
105 }
106
107 log_info( "radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
108
109 return 0;
110 }
111
112
113 int
test_radians(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)114 test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
115 {
116 cl_mem streams[2];
117 cl_float *input_ptr[1], *output_ptr, *p;
118 cl_program *program;
119 cl_kernel *kernel;
120 void *values[2];
121 size_t threads[1];
122 int num_elements;
123 int err;
124 int i;
125 MTdata d;
126
127 program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
128 kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
129
130 num_elements = n_elems * (1 << (kTotalVecCount-1));
131
132 input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
133 output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
134 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
135 sizeof(cl_float) * num_elements, NULL, NULL);
136 if (!streams[0])
137 {
138 log_error("clCreateBuffer failed\n");
139 return -1;
140 }
141
142 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
143 sizeof(cl_float) * num_elements, NULL, NULL);
144 if (!streams[1])
145 {
146 log_error("clCreateBuffer failed\n");
147 return -1;
148 }
149
150 p = input_ptr[0];
151 d = init_genrand( gRandomSeed );
152 for (i=0; i<num_elements; i++)
153 {
154 p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
155 }
156 free_mtdata(d); d = NULL;
157
158 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
159 if (err != CL_SUCCESS)
160 {
161 log_error("clWriteArray failed\n");
162 return -1;
163 }
164
165 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &radians_kernel_code, "test_radians" );
166 if (err)
167 return -1;
168 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &radians2_kernel_code, "test_radians2" );
169 if (err)
170 return -1;
171 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &radians4_kernel_code, "test_radians4" );
172 if (err)
173 return -1;
174 err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &radians8_kernel_code, "test_radians8" );
175 if (err)
176 return -1;
177 err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &radians16_kernel_code, "test_radians16" );
178 if (err)
179 return -1;
180 err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &radians3_kernel_code, "test_radians3" );
181 if (err)
182 return -1;
183
184 values[0] = streams[0];
185 values[1] = streams[1];
186 for (i=0; i < kTotalVecCount; i++)
187 {
188 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
189 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
190 if (err != CL_SUCCESS)
191 {
192 log_error("clSetKernelArgs failed\n");
193 return -1;
194 }
195 }
196
197 for (i=0; i < kTotalVecCount; i++)
198 {
199 threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
200 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
201 if (err != CL_SUCCESS)
202 {
203 log_error("clEnqueueNDRangeKernel failed\n");
204 return -1;
205 }
206
207 cl_uint dead = 0xdeaddead;
208 memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
209 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
210 if (err != CL_SUCCESS)
211 {
212 log_error("clEnqueueReadBuffer failed\n");
213 return -1;
214 }
215
216 if (verify_radians(input_ptr[0], output_ptr, n_elems*(i+1)))
217 {
218 log_error("RADIANS float%d test failed\n",((g_arrVecSizes[i])));
219 err = -1;
220 }
221 else
222 {
223 log_info("RADIANS float%d test passed\n", ((g_arrVecSizes[i])));
224 }
225
226 if (err)
227 break;
228 }
229
230 clReleaseMemObject(streams[0]);
231 clReleaseMemObject(streams[1]);
232 for (i=0; i < kTotalVecCount; i++) {
233 clReleaseKernel(kernel[i]);
234 clReleaseProgram(program[i]);
235 }
236 free(program);
237 free(kernel);
238 free(input_ptr[0]);
239 free(output_ptr);
240 if( err )
241 return err;
242
243 if( ! is_extension_available( device, "cl_khr_fp64" ) )
244 {
245 log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
246 return 0;
247 }
248
249 return test_radians_double( device, context, queue, n_elems);
250 }
251
252
253
254 #pragma mark -
255
256 const char *radians_kernel_code_double =
257 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
258 "__kernel void test_radians_double(__global double *src, __global double *dst)\n"
259 "{\n"
260 " int tid = get_global_id(0);\n"
261 "\n"
262 " dst[tid] = radians(src[tid]);\n"
263 "}\n";
264
265 const char *radians2_kernel_code_double =
266 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
267 "__kernel void test_radians2_double(__global double2 *src, __global double2 *dst)\n"
268 "{\n"
269 " int tid = get_global_id(0);\n"
270 "\n"
271 " dst[tid] = radians(src[tid]);\n"
272 "}\n";
273
274 const char *radians4_kernel_code_double =
275 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
276 "__kernel void test_radians4_double(__global double4 *src, __global double4 *dst)\n"
277 "{\n"
278 " int tid = get_global_id(0);\n"
279 "\n"
280 " dst[tid] = radians(src[tid]);\n"
281 "}\n";
282
283 const char *radians8_kernel_code_double =
284 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
285 "__kernel void test_radians8_double(__global double8 *src, __global double8 *dst)\n"
286 "{\n"
287 " int tid = get_global_id(0);\n"
288 "\n"
289 " dst[tid] = radians(src[tid]);\n"
290 "}\n";
291
292 const char *radians16_kernel_code_double =
293 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
294 "__kernel void test_radians16_double(__global double16 *src, __global double16 *dst)\n"
295 "{\n"
296 " int tid = get_global_id(0);\n"
297 "\n"
298 " dst[tid] = radians(src[tid]);\n"
299 "}\n";
300
301 const char *radians3_kernel_code_double =
302 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
303 "__kernel void test_radians3_double(__global double *src, __global double *dst)\n"
304 "{\n"
305 " int tid = get_global_id(0);\n"
306 "\n"
307 " vstore3(radians(vload3(tid,src)),tid,dst);\n"
308 "}\n";
309
310
311 #define MAX_ERR 2.0f
312
313 static double
verify_radians_double(double * inptr,double * outptr,int n)314 verify_radians_double(double *inptr, double *outptr, int n)
315 {
316 float error, max_error = 0.0f;
317 double r, max_val = NAN;
318 int i, j, max_index = 0;
319
320 for (i=0,j=0; i<n; i++,j++)
321 {
322 r = (3.14159265358979323846264338327950288L / 180.0L) * inptr[i];
323 error = Ulp_Error_Double( outptr[i], r );
324 if( fabsf(error) > max_error)
325 {
326 max_error = error;
327 max_index = i;
328 max_val = r;
329 if( fabsf(error) > MAX_ERR)
330 {
331 log_error( "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
332 return 1;
333 }
334 }
335 }
336
337 log_info( "radiansd: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
338
339 return 0;
340 }
341
342
343 int
test_radians_double(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)344 test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
345 {
346 cl_mem streams[2];
347 cl_double *input_ptr[1], *output_ptr, *p;
348 cl_program *program;
349 cl_kernel *kernel;
350 void *values[2];
351 size_t threads[1];
352 int num_elements;
353 int err;
354 int i;
355 MTdata d;
356
357
358 program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
359 kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
360
361 //TODO: line below is clearly wrong
362 num_elements = n_elems * (1 << (kTotalVecCount-1));
363
364 input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
365 output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
366 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
367 sizeof(cl_double) * num_elements, NULL, NULL);
368 if (!streams[0])
369 {
370 log_error("clCreateBuffer failed\n");
371 return -1;
372 }
373
374 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
375 sizeof(cl_double) * num_elements, NULL, NULL);
376 if (!streams[1])
377 {
378 log_error("clCreateBuffer failed\n");
379 return -1;
380 }
381
382 p = input_ptr[0];
383 d = init_genrand( gRandomSeed );
384 for (i=0; i<num_elements; i++)
385 p[i] = get_random_double((float)(-100000.0 * M_PI), (float)(100000.0 * M_PI) ,d);
386
387 free_mtdata(d); d = NULL;
388
389 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
390 if (err != CL_SUCCESS)
391 {
392 log_error("clWriteArray failed\n");
393 return -1;
394 }
395
396 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &radians_kernel_code_double, "test_radians_double" );
397 if (err)
398 return -1;
399 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &radians2_kernel_code_double, "test_radians2_double" );
400 if (err)
401 return -1;
402 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &radians4_kernel_code_double, "test_radians4_double" );
403 if (err)
404 return -1;
405 err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &radians8_kernel_code_double, "test_radians8_double" );
406 if (err)
407 return -1;
408 err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &radians16_kernel_code_double, "test_radians16_double" );
409 if (err)
410 return -1;
411 err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &radians3_kernel_code_double, "test_radians3_double" );
412 if (err)
413 return -1;
414
415 values[0] = streams[0];
416 values[1] = streams[1];
417 for (i=0; i < kTotalVecCount; i++)
418 {
419 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
420 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
421 if (err != CL_SUCCESS)
422 {
423 log_error("clSetKernelArgs failed\n");
424 return -1;
425 }
426 }
427
428 for (i=0; i < kTotalVecCount; i++)
429 {
430 threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
431 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
432 if (err != CL_SUCCESS)
433 {
434 log_error("clEnqueueNDRangeKernel failed\n");
435 return -1;
436 }
437
438 cl_uint dead = 0xdeaddead;
439 memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
440 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
441 if (err != CL_SUCCESS)
442 {
443 log_error("clEnqueueReadBuffer failed\n");
444 return -1;
445 }
446
447 if (verify_radians_double(input_ptr[0], output_ptr, n_elems*(i+1)))
448 {
449 log_error("RADIANS double%d test failed\n",((g_arrVecSizes[i])));
450 err = -1;
451 }
452 else
453 {
454 log_info("RADIANS double%d test passed\n", ((g_arrVecSizes[i])));
455 }
456
457 if (err)
458 break;
459 }
460
461 clReleaseMemObject(streams[0]);
462 clReleaseMemObject(streams[1]);
463 for (i=0; i < kTotalVecCount; i++) {
464 clReleaseKernel(kernel[i]);
465 clReleaseProgram(program[i]);
466 }
467 free(program);
468 free(kernel);
469 free(input_ptr[0]);
470 free(output_ptr);
471
472 return err;
473 }
474
475