1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25 #ifndef M_PI
26 #define M_PI 3.14159265358979323846264338327950288
27 #endif
28
29 static int test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
30
31
32 const char *degrees_kernel_code =
33 "__kernel void test_degrees(__global float *src, __global float *dst)\n"
34 "{\n"
35 " int tid = get_global_id(0);\n"
36 "\n"
37 " dst[tid] = degrees(src[tid]);\n"
38 "}\n";
39
40 const char *degrees2_kernel_code =
41 "__kernel void test_degrees2(__global float2 *src, __global float2 *dst)\n"
42 "{\n"
43 " int tid = get_global_id(0);\n"
44 "\n"
45 " dst[tid] = degrees(src[tid]);\n"
46 "}\n";
47
48 const char *degrees4_kernel_code =
49 "__kernel void test_degrees4(__global float4 *src, __global float4 *dst)\n"
50 "{\n"
51 " int tid = get_global_id(0);\n"
52 "\n"
53 " dst[tid] = degrees(src[tid]);\n"
54 "}\n";
55
56 const char *degrees8_kernel_code =
57 "__kernel void test_degrees8(__global float8 *src, __global float8 *dst)\n"
58 "{\n"
59 " int tid = get_global_id(0);\n"
60 "\n"
61 " dst[tid] = degrees(src[tid]);\n"
62 "}\n";
63
64 const char *degrees16_kernel_code =
65 "__kernel void test_degrees16(__global float16 *src, __global float16 *dst)\n"
66 "{\n"
67 " int tid = get_global_id(0);\n"
68 "\n"
69 " dst[tid] = degrees(src[tid]);\n"
70 "}\n";
71
72 const char *degrees3_kernel_code =
73 "__kernel void test_degrees3(__global float *src, __global float *dst)\n"
74 "{\n"
75 " int tid = get_global_id(0);\n"
76 "\n"
77 " vstore3(degrees(vload3(tid,src)),tid,dst);\n"
78 "}\n";
79
80
81 #define MAX_ERR 2.0f
82
83 static int
verify_degrees(float * inptr,float * outptr,int n)84 verify_degrees(float *inptr, float *outptr, int n)
85 {
86 float error, max_error = 0.0f;
87 double r, max_val = NAN;
88 int i, j, max_index = 0;
89
90 for (i=0,j=0; i<n; i++,j++)
91 {
92 r = (180.0 / M_PI) * inptr[i];
93 error = Ulp_Error( outptr[i], r );
94 if( fabsf(error) > max_error)
95 {
96 max_error = error;
97 max_index = i;
98 max_val = r;
99 if( fabsf(error) > MAX_ERR)
100 {
101 log_error( "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
102 return 1;
103 }
104 }
105 }
106
107 log_info( "degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
108
109 return 0;
110 }
111
112 int
test_degrees(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)113 test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
114 {
115 cl_mem streams[2];
116 cl_float *input_ptr[1], *output_ptr, *p;
117 cl_program *program;
118 cl_kernel *kernel;
119 void *values[2];
120 size_t threads[1];
121 int num_elements;
122 int err;
123 int i;
124 MTdata d;
125
126 program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
127 kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
128
129 num_elements = n_elems * (1 << (kTotalVecCount-1));
130
131 input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
132 output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
133 streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL );
134 if (!streams[0])
135 {
136 log_error("clCreateBuffer failed\n");
137 return -1;
138 }
139
140 streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_float) * num_elements, NULL, NULL );
141 if (!streams[1])
142 {
143 log_error("clCreateBuffer failed\n");
144 return -1;
145 }
146
147 p = input_ptr[0];
148 d = init_genrand( gRandomSeed );
149 for (i=0; i<num_elements; i++)
150 {
151 p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
152 }
153 free_mtdata(d); d = NULL;
154
155 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
156 if (err != CL_SUCCESS)
157 {
158 log_error("clWriteArray failed\n");
159 return -1;
160 }
161
162 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, °rees_kernel_code, "test_degrees" );
163 if (err)
164 return -1;
165 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, °rees2_kernel_code, "test_degrees2" );
166 if (err)
167 return -1;
168 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, °rees4_kernel_code, "test_degrees4" );
169 if (err)
170 return -1;
171 err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, °rees8_kernel_code, "test_degrees8" );
172 if (err)
173 return -1;
174 err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, °rees16_kernel_code, "test_degrees16" );
175 if (err)
176 return -1;
177 err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, °rees3_kernel_code, "test_degrees3" );
178 if (err)
179 return -1;
180
181 values[0] = streams[0];
182 values[1] = streams[1];
183 for (i=0; i < kTotalVecCount; i++)
184 {
185 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
186 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
187 if (err != CL_SUCCESS)
188 {
189 log_error("clSetKernelArgs failed\n");
190 return -1;
191 }
192 }
193
194 for (i=0; i < kTotalVecCount; i++)
195 {
196
197 // Line below is troublesome...
198 threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
199 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
200 if (err != CL_SUCCESS)
201 {
202 log_error("clEnqueueNDRangeKernel failed\n");
203 return -1;
204 }
205
206 cl_uint dead = 0xdeaddead;
207 memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
208 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
209 if (err != CL_SUCCESS)
210 {
211 log_error("clEnqueueReadBuffer failed\n");
212 return -1;
213 }
214
215 if (verify_degrees(input_ptr[0], output_ptr, n_elems*(i+1)))
216 {
217 log_error("DEGREES float%d test failed\n",((g_arrVecSizes[i])));
218 err = -1;
219 }
220 else
221 {
222 log_info("DEGREES float%d test passed\n", ((g_arrVecSizes[i])));
223 }
224
225 if (err)
226 break;
227 }
228
229 clReleaseMemObject(streams[0]);
230 clReleaseMemObject(streams[1]);
231 for (i=0; i < kTotalVecCount; i++) {
232 clReleaseKernel(kernel[i]);
233 clReleaseProgram(program[i]);
234 }
235 free(program);
236 free(kernel);
237 free(input_ptr[0]);
238 free(output_ptr);
239
240 if( err )
241 return err;
242
243 if( ! is_extension_available( device, "cl_khr_fp64" ) )
244 {
245 log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
246 return 0;
247 }
248
249 return test_degrees_double( device, context, queue, n_elems);
250 }
251
252 #pragma mark -
253
254 const char *degrees_kernel_code_double =
255 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
256 "__kernel void test_degrees_double(__global double *src, __global double *dst)\n"
257 "{\n"
258 " int tid = get_global_id(0);\n"
259 "\n"
260 " dst[tid] = degrees(src[tid]);\n"
261 "}\n";
262
263 const char *degrees2_kernel_code_double =
264 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
265 "__kernel void test_degrees2_double(__global double2 *src, __global double2 *dst)\n"
266 "{\n"
267 " int tid = get_global_id(0);\n"
268 "\n"
269 " dst[tid] = degrees(src[tid]);\n"
270 "}\n";
271
272 const char *degrees4_kernel_code_double =
273 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
274 "__kernel void test_degrees4_double(__global double4 *src, __global double4 *dst)\n"
275 "{\n"
276 " int tid = get_global_id(0);\n"
277 "\n"
278 " dst[tid] = degrees(src[tid]);\n"
279 "}\n";
280
281 const char *degrees8_kernel_code_double =
282 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
283 "__kernel void test_degrees8_double(__global double8 *src, __global double8 *dst)\n"
284 "{\n"
285 " int tid = get_global_id(0);\n"
286 "\n"
287 " dst[tid] = degrees(src[tid]);\n"
288 "}\n";
289
290 const char *degrees16_kernel_code_double =
291 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
292 "__kernel void test_degrees16_double(__global double16 *src, __global double16 *dst)\n"
293 "{\n"
294 " int tid = get_global_id(0);\n"
295 "\n"
296 " dst[tid] = degrees(src[tid]);\n"
297 "}\n";
298
299 const char *degrees3_kernel_code_double =
300 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
301 "__kernel void test_degrees3_double(__global double *src, __global double *dst)\n"
302 "{\n"
303 " int tid = get_global_id(0);\n"
304 "\n"
305 " vstore3(degrees(vload3(tid,src)),tid,dst);\n"
306 "}\n";
307
308
309 #define MAX_ERR 2.0f
310
311 static int
verify_degrees_double(double * inptr,double * outptr,int n)312 verify_degrees_double(double *inptr, double *outptr, int n)
313 {
314 float error, max_error = 0.0f;
315 double r, max_val = NAN;
316 int i, j, max_index = 0;
317
318 for (i=0,j=0; i<n; i++,j++)
319 {
320 r = (180.0L / 3.14159265358979323846264338327950288L) * inptr[i];
321 error = Ulp_Error_Double( outptr[i], r );
322 if( fabsf(error) > max_error)
323 {
324 max_error = error;
325 max_index = i;
326 max_val = r;
327 if( fabsf(error) > MAX_ERR)
328 {
329 log_error( "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
330 return 1;
331 }
332 }
333 }
334
335 log_info( "degreesd: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
336
337 return 0;
338 }
339
340 static int
test_degrees_double(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)341 test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
342 {
343 cl_mem streams[2];
344 cl_double *input_ptr[1], *output_ptr, *p;
345 cl_program *program;
346 cl_kernel *kernel;
347 void *values[2];
348 size_t threads[1];
349 int num_elements;
350 int err;
351 int i;
352 MTdata d;
353
354 program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
355 kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
356
357 // TODO: line below is clearly wrong
358 num_elements = n_elems * (1 << (kTotalVecCount-1));
359
360 input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
361 output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
362 streams[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * num_elements, NULL, NULL );
363 if (!streams[0])
364 {
365 log_error("clCreateBuffer failed\n");
366 return -1;
367 }
368
369 streams[1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), sizeof(cl_double) * num_elements, NULL, NULL );
370 if (!streams[1])
371 {
372 log_error("clCreateBuffer failed\n");
373 return -1;
374 }
375
376 p = input_ptr[0];
377 d = init_genrand( gRandomSeed );
378 for (i=0; i<num_elements; i++)
379 p[i] = get_random_double((-100000. * M_PI), (100000. * M_PI) ,d);
380
381 free_mtdata(d); d = NULL;
382
383 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
384 if (err != CL_SUCCESS)
385 {
386 log_error("clWriteArray failed\n");
387 return -1;
388 }
389
390 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, °rees_kernel_code_double, "test_degrees_double" );
391 if (err)
392 return -1;
393 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, °rees2_kernel_code_double, "test_degrees2_double" );
394 if (err)
395 return -1;
396 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, °rees4_kernel_code_double, "test_degrees4_double" );
397 if (err)
398 return -1;
399 err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, °rees8_kernel_code_double, "test_degrees8_double" );
400 if (err)
401 return -1;
402 err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, °rees16_kernel_code_double, "test_degrees16_double" );
403 if (err)
404 return -1;
405 err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, °rees3_kernel_code_double, "test_degrees3_double" );
406 if (err)
407 return -1;
408
409 values[0] = streams[0];
410 values[1] = streams[1];
411 for (i=0; i < kTotalVecCount; i++)
412 {
413 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
414 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
415 if (err != CL_SUCCESS)
416 {
417 log_error("clSetKernelArgs failed\n");
418 return -1;
419 }
420 }
421
422 for (i=0; i < kTotalVecCount; i++)
423 {
424
425 // Line below is troublesome...
426 threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
427 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
428 if (err != CL_SUCCESS)
429 {
430 log_error("clEnqueueNDRangeKernel failed\n");
431 return -1;
432 }
433
434 cl_uint dead = 0xdeaddead;
435 memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
436 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
437 if (err != CL_SUCCESS)
438 {
439 log_error("clEnqueueReadBuffer failed\n");
440 return -1;
441 }
442
443 if (verify_degrees_double(input_ptr[0], output_ptr, n_elems*(i+1)))
444 {
445 log_error("DEGREES double%d test failed\n",((g_arrVecSizes[i])));
446 err = -1;
447 }
448 else
449 {
450 log_info("DEGREES double%d test passed\n", ((g_arrVecSizes[i])));
451 }
452
453 if (err)
454 break;
455 }
456
457 clReleaseMemObject(streams[0]);
458 clReleaseMemObject(streams[1]);
459 for (i=0; i < kTotalVecCount; i++) {
460 clReleaseKernel(kernel[i]);
461 clReleaseProgram(program[i]);
462 }
463 free(program);
464 free(kernel);
465 free(input_ptr[0]);
466 free(output_ptr);
467
468 return err;
469 }
470
471
472
473