1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25 #ifndef M_PI
26 #define M_PI 3.14159265358979323846264338327950288
27 #endif
28
29 static int test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
30
31
32 const char *degrees_kernel_code =
33 "__kernel void test_degrees(__global float *src, __global float *dst)\n"
34 "{\n"
35 " int tid = get_global_id(0);\n"
36 "\n"
37 " dst[tid] = degrees(src[tid]);\n"
38 "}\n";
39
40 const char *degrees2_kernel_code =
41 "__kernel void test_degrees2(__global float2 *src, __global float2 *dst)\n"
42 "{\n"
43 " int tid = get_global_id(0);\n"
44 "\n"
45 " dst[tid] = degrees(src[tid]);\n"
46 "}\n";
47
48 const char *degrees4_kernel_code =
49 "__kernel void test_degrees4(__global float4 *src, __global float4 *dst)\n"
50 "{\n"
51 " int tid = get_global_id(0);\n"
52 "\n"
53 " dst[tid] = degrees(src[tid]);\n"
54 "}\n";
55
56 const char *degrees8_kernel_code =
57 "__kernel void test_degrees8(__global float8 *src, __global float8 *dst)\n"
58 "{\n"
59 " int tid = get_global_id(0);\n"
60 "\n"
61 " dst[tid] = degrees(src[tid]);\n"
62 "}\n";
63
64 const char *degrees16_kernel_code =
65 "__kernel void test_degrees16(__global float16 *src, __global float16 *dst)\n"
66 "{\n"
67 " int tid = get_global_id(0);\n"
68 "\n"
69 " dst[tid] = degrees(src[tid]);\n"
70 "}\n";
71
72 const char *degrees3_kernel_code =
73 "__kernel void test_degrees3(__global float *src, __global float *dst)\n"
74 "{\n"
75 " int tid = get_global_id(0);\n"
76 "\n"
77 " vstore3(degrees(vload3(tid,src)),tid,dst);\n"
78 "}\n";
79
80
81 #define MAX_ERR 2.0f
82
83 static int
verify_degrees(float * inptr,float * outptr,int n)84 verify_degrees(float *inptr, float *outptr, int n)
85 {
86 float error, max_error = 0.0f;
87 double r, max_val = NAN;
88 int i, j, max_index = 0;
89
90 for (i=0,j=0; i<n; i++,j++)
91 {
92 r = (180.0 / M_PI) * inptr[i];
93 error = Ulp_Error( outptr[i], r );
94 if( fabsf(error) > max_error)
95 {
96 max_error = error;
97 max_index = i;
98 max_val = r;
99 if( fabsf(error) > MAX_ERR)
100 {
101 log_error( "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
102 return 1;
103 }
104 }
105 }
106
107 log_info( "degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
108
109 return 0;
110 }
111
112 int
test_degrees(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)113 test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
114 {
115 cl_mem streams[2];
116 cl_float *input_ptr[1], *output_ptr, *p;
117 cl_program *program;
118 cl_kernel *kernel;
119 void *values[2];
120 size_t threads[1];
121 int num_elements;
122 int err;
123 int i;
124 MTdata d;
125
126 program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
127 kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
128
129 num_elements = n_elems * (1 << (kTotalVecCount-1));
130
131 input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
132 output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
133 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
134 sizeof(cl_float) * num_elements, NULL, NULL);
135 if (!streams[0])
136 {
137 log_error("clCreateBuffer failed\n");
138 return -1;
139 }
140
141 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
142 sizeof(cl_float) * num_elements, NULL, NULL);
143 if (!streams[1])
144 {
145 log_error("clCreateBuffer failed\n");
146 return -1;
147 }
148
149 p = input_ptr[0];
150 d = init_genrand( gRandomSeed );
151 for (i=0; i<num_elements; i++)
152 {
153 p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
154 }
155 free_mtdata(d); d = NULL;
156
157 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
158 if (err != CL_SUCCESS)
159 {
160 log_error("clWriteArray failed\n");
161 return -1;
162 }
163
164 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, °rees_kernel_code, "test_degrees" );
165 if (err)
166 return -1;
167 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, °rees2_kernel_code, "test_degrees2" );
168 if (err)
169 return -1;
170 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, °rees4_kernel_code, "test_degrees4" );
171 if (err)
172 return -1;
173 err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, °rees8_kernel_code, "test_degrees8" );
174 if (err)
175 return -1;
176 err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, °rees16_kernel_code, "test_degrees16" );
177 if (err)
178 return -1;
179 err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, °rees3_kernel_code, "test_degrees3" );
180 if (err)
181 return -1;
182
183 values[0] = streams[0];
184 values[1] = streams[1];
185 for (i=0; i < kTotalVecCount; i++)
186 {
187 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
188 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
189 if (err != CL_SUCCESS)
190 {
191 log_error("clSetKernelArgs failed\n");
192 return -1;
193 }
194 }
195
196 for (i=0; i < kTotalVecCount; i++)
197 {
198
199 // Line below is troublesome...
200 threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
201 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
202 if (err != CL_SUCCESS)
203 {
204 log_error("clEnqueueNDRangeKernel failed\n");
205 return -1;
206 }
207
208 cl_uint dead = 0xdeaddead;
209 memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
210 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
211 if (err != CL_SUCCESS)
212 {
213 log_error("clEnqueueReadBuffer failed\n");
214 return -1;
215 }
216
217 if (verify_degrees(input_ptr[0], output_ptr, n_elems*(i+1)))
218 {
219 log_error("DEGREES float%d test failed\n",((g_arrVecSizes[i])));
220 err = -1;
221 }
222 else
223 {
224 log_info("DEGREES float%d test passed\n", ((g_arrVecSizes[i])));
225 }
226
227 if (err)
228 break;
229 }
230
231 clReleaseMemObject(streams[0]);
232 clReleaseMemObject(streams[1]);
233 for (i=0; i < kTotalVecCount; i++) {
234 clReleaseKernel(kernel[i]);
235 clReleaseProgram(program[i]);
236 }
237 free(program);
238 free(kernel);
239 free(input_ptr[0]);
240 free(output_ptr);
241
242 if( err )
243 return err;
244
245 if( ! is_extension_available( device, "cl_khr_fp64" ) )
246 {
247 log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
248 return 0;
249 }
250
251 return test_degrees_double( device, context, queue, n_elems);
252 }
253
254 #pragma mark -
255
256 const char *degrees_kernel_code_double =
257 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
258 "__kernel void test_degrees_double(__global double *src, __global double *dst)\n"
259 "{\n"
260 " int tid = get_global_id(0);\n"
261 "\n"
262 " dst[tid] = degrees(src[tid]);\n"
263 "}\n";
264
265 const char *degrees2_kernel_code_double =
266 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
267 "__kernel void test_degrees2_double(__global double2 *src, __global double2 *dst)\n"
268 "{\n"
269 " int tid = get_global_id(0);\n"
270 "\n"
271 " dst[tid] = degrees(src[tid]);\n"
272 "}\n";
273
274 const char *degrees4_kernel_code_double =
275 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
276 "__kernel void test_degrees4_double(__global double4 *src, __global double4 *dst)\n"
277 "{\n"
278 " int tid = get_global_id(0);\n"
279 "\n"
280 " dst[tid] = degrees(src[tid]);\n"
281 "}\n";
282
283 const char *degrees8_kernel_code_double =
284 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
285 "__kernel void test_degrees8_double(__global double8 *src, __global double8 *dst)\n"
286 "{\n"
287 " int tid = get_global_id(0);\n"
288 "\n"
289 " dst[tid] = degrees(src[tid]);\n"
290 "}\n";
291
292 const char *degrees16_kernel_code_double =
293 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
294 "__kernel void test_degrees16_double(__global double16 *src, __global double16 *dst)\n"
295 "{\n"
296 " int tid = get_global_id(0);\n"
297 "\n"
298 " dst[tid] = degrees(src[tid]);\n"
299 "}\n";
300
301 const char *degrees3_kernel_code_double =
302 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
303 "__kernel void test_degrees3_double(__global double *src, __global double *dst)\n"
304 "{\n"
305 " int tid = get_global_id(0);\n"
306 "\n"
307 " vstore3(degrees(vload3(tid,src)),tid,dst);\n"
308 "}\n";
309
310
311 #define MAX_ERR 2.0f
312
313 static int
verify_degrees_double(double * inptr,double * outptr,int n)314 verify_degrees_double(double *inptr, double *outptr, int n)
315 {
316 float error, max_error = 0.0f;
317 double r, max_val = NAN;
318 int i, j, max_index = 0;
319
320 for (i=0,j=0; i<n; i++,j++)
321 {
322 r = (180.0L / 3.14159265358979323846264338327950288L) * inptr[i];
323 error = Ulp_Error_Double( outptr[i], r );
324 if( fabsf(error) > max_error)
325 {
326 max_error = error;
327 max_index = i;
328 max_val = r;
329 if( fabsf(error) > MAX_ERR)
330 {
331 log_error( "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
332 return 1;
333 }
334 }
335 }
336
337 log_info( "degreesd: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
338
339 return 0;
340 }
341
342 static int
test_degrees_double(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)343 test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
344 {
345 cl_mem streams[2];
346 cl_double *input_ptr[1], *output_ptr, *p;
347 cl_program *program;
348 cl_kernel *kernel;
349 void *values[2];
350 size_t threads[1];
351 int num_elements;
352 int err;
353 int i;
354 MTdata d;
355
356 program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
357 kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
358
359 // TODO: line below is clearly wrong
360 num_elements = n_elems * (1 << (kTotalVecCount-1));
361
362 input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
363 output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
364 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
365 sizeof(cl_double) * num_elements, NULL, NULL);
366 if (!streams[0])
367 {
368 log_error("clCreateBuffer failed\n");
369 return -1;
370 }
371
372 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
373 sizeof(cl_double) * num_elements, NULL, NULL);
374 if (!streams[1])
375 {
376 log_error("clCreateBuffer failed\n");
377 return -1;
378 }
379
380 p = input_ptr[0];
381 d = init_genrand( gRandomSeed );
382 for (i=0; i<num_elements; i++)
383 p[i] = get_random_double((-100000. * M_PI), (100000. * M_PI) ,d);
384
385 free_mtdata(d); d = NULL;
386
387 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
388 if (err != CL_SUCCESS)
389 {
390 log_error("clWriteArray failed\n");
391 return -1;
392 }
393
394 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, °rees_kernel_code_double, "test_degrees_double" );
395 if (err)
396 return -1;
397 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, °rees2_kernel_code_double, "test_degrees2_double" );
398 if (err)
399 return -1;
400 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, °rees4_kernel_code_double, "test_degrees4_double" );
401 if (err)
402 return -1;
403 err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, °rees8_kernel_code_double, "test_degrees8_double" );
404 if (err)
405 return -1;
406 err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, °rees16_kernel_code_double, "test_degrees16_double" );
407 if (err)
408 return -1;
409 err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, °rees3_kernel_code_double, "test_degrees3_double" );
410 if (err)
411 return -1;
412
413 values[0] = streams[0];
414 values[1] = streams[1];
415 for (i=0; i < kTotalVecCount; i++)
416 {
417 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
418 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
419 if (err != CL_SUCCESS)
420 {
421 log_error("clSetKernelArgs failed\n");
422 return -1;
423 }
424 }
425
426 for (i=0; i < kTotalVecCount; i++)
427 {
428
429 // Line below is troublesome...
430 threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
431 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
432 if (err != CL_SUCCESS)
433 {
434 log_error("clEnqueueNDRangeKernel failed\n");
435 return -1;
436 }
437
438 cl_uint dead = 0xdeaddead;
439 memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
440 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
441 if (err != CL_SUCCESS)
442 {
443 log_error("clEnqueueReadBuffer failed\n");
444 return -1;
445 }
446
447 if (verify_degrees_double(input_ptr[0], output_ptr, n_elems*(i+1)))
448 {
449 log_error("DEGREES double%d test failed\n",((g_arrVecSizes[i])));
450 err = -1;
451 }
452 else
453 {
454 log_info("DEGREES double%d test passed\n", ((g_arrVecSizes[i])));
455 }
456
457 if (err)
458 break;
459 }
460
461 clReleaseMemObject(streams[0]);
462 clReleaseMemObject(streams[1]);
463 for (i=0; i < kTotalVecCount; i++) {
464 clReleaseKernel(kernel[i]);
465 clReleaseProgram(program[i]);
466 }
467 free(program);
468 free(kernel);
469 free(input_ptr[0]);
470 free(output_ptr);
471
472 return err;
473 }
474
475
476
477