• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 
25 static const char *smoothstep_kernel_code =
26 "__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
27 "{\n"
28 "    int  tid = get_global_id(0);\n"
29 "\n"
30 "    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
31 "}\n";
32 
33 static const char *smoothstep2_kernel_code =
34 "__kernel void test_smoothstep2f(__global float *edge0, __global float *edge1, __global float2 *x, __global float2 *dst)\n"
35 "{\n"
36 "    int  tid = get_global_id(0);\n"
37 "\n"
38 "    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
39 "}\n";
40 
41 static const char *smoothstep4_kernel_code =
42 "__kernel void test_smoothstep4f(__global float *edge0, __global float *edge1, __global float4 *x, __global float4 *dst)\n"
43 "{\n"
44 "    int  tid = get_global_id(0);\n"
45 "\n"
46 "    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
47 "}\n";
48 
49 #define MAX_ERR (1e-5f)
50 
verify_smoothstep(float * edge0,float * edge1,float * x,float * outptr,int n,int veclen)51 float verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr,
52                         int n, int veclen)
53 {
54   float       r, t, delta, max_err = 0.0f;
55   int         i, j;
56 
57   for (i = 0; i < n; ++i) {
58     int vi = i * veclen;
59     for (j = 0; j < veclen; ++j, ++vi) {
60       t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]);
61       if (t < 0.0f)
62         t = 0.0f;
63       else if (t > 1.0f)
64         t = 1.0f;
65       r = t * t * (3.0f - 2.0f * t);
66       delta = (float)fabs(r - outptr[vi]);
67       if (delta > max_err)
68         max_err = delta;
69     }
70   }
71   return max_err;
72 }
73 
74 const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4"};
75 
76 int
test_smoothstepf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)77 test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
78 {
79   cl_mem      streams[4];
80   cl_float    *input_ptr[3], *output_ptr, *p, *p_edge0;
81   cl_program  program[3];
82   cl_kernel   kernel[3];
83   size_t  threads[1];
84   float max_err = 0.0f;
85   int num_elements;
86   int err;
87   int i;
88   MTdata d;
89 
90   num_elements = n_elems * 4;
91 
92   input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
93   input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
94   input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
95   output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
96   streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
97                               sizeof(cl_float) * num_elements, NULL, NULL);
98   if (!streams[0])
99   {
100     log_error("clCreateBuffer failed\n");
101     return -1;
102   }
103   streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
104                               sizeof(cl_float) * num_elements, NULL, NULL);
105   if (!streams[1])
106   {
107     log_error("clCreateBuffer failed\n");
108     return -1;
109   }
110   streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
111                               sizeof(cl_float) * num_elements, NULL, NULL);
112   if (!streams[2])
113   {
114     log_error("clCreateBuffer failed\n");
115     return -1;
116   }
117 
118   streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
119                               sizeof(cl_float) * num_elements, NULL, NULL);
120   if (!streams[3])
121   {
122     log_error("clCreateBuffer failed\n");
123     return -1;
124   }
125 
126   d = init_genrand( gRandomSeed );
127   p = input_ptr[0];
128   for (i=0; i<num_elements; i++)
129   {
130     p[i] = get_random_float(-0x00200000, 0x00200000, d);
131   }
132 
133   p = input_ptr[1];
134   p_edge0 = input_ptr[0];
135   for (i=0; i<num_elements; i++)
136   {
137     float edge0 = p_edge0[i];
138     float edge1;
139     do {
140       edge1 = get_random_float( -0x00200000, 0x00200000, d);
141       if (edge0 < edge1)
142         break;
143     } while (1);
144     p[i] = edge1;
145   }
146 
147   p = input_ptr[2];
148   for (i=0; i<num_elements; i++)
149   {
150     p[i] = get_random_float(-0x00200000, 0x00200000, d);
151   }
152   free_mtdata(d);
153   d = NULL;
154 
155   err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
156   if (err != CL_SUCCESS)
157   {
158     log_error("clWriteArray failed\n");
159     return -1;
160   }
161   err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
162   if (err != CL_SUCCESS)
163   {
164     log_error("clWriteArray failed\n");
165     return -1;
166   }
167   err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
168   if (err != CL_SUCCESS)
169   {
170     log_error("clWriteArray failed\n");
171     return -1;
172   }
173 
174   err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &smoothstep_kernel_code, "test_smoothstep" );
175   if (err)
176     return -1;
177   err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &smoothstep2_kernel_code, "test_smoothstep2f" );
178   if (err)
179     return -1;
180   err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &smoothstep4_kernel_code, "test_smoothstep4f" );
181   if (err)
182     return -1;
183 
184   for (i=0; i<3; i++)
185   {
186       err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
187       err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
188       err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
189       err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3] );
190       if (err != CL_SUCCESS)
191     {
192       log_error("clSetKernelArgs failed\n");
193       return -1;
194     }
195   }
196 
197   threads[0] = (size_t)n_elems;
198   for (i=0; i<3; i++)
199   {
200     err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
201     if (err != CL_SUCCESS)
202     {
203       log_error("clEnqueueNDRangeKernel failed\n");
204       return -1;
205     }
206 
207     err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
208     if (err != CL_SUCCESS)
209     {
210       log_error("clEnqueueReadBuffer failed\n");
211       return -1;
212     }
213 
214     switch (i)
215     {
216       case 0:
217         max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 1);
218         break;
219       case 1:
220         max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 2);
221         break;
222       case 2:
223         max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 4);
224         break;
225     }
226 
227     if (max_err > MAX_ERR)
228     {
229       log_error("%s test failed %g max err\n", fn_names[i], max_err);
230       err = -1;
231     }
232     else
233     {
234       log_info("%s test passed %g max err\n", fn_names[i], max_err);
235       err = 0;
236     }
237 
238     if (err)
239       break;
240   }
241 
242   clReleaseMemObject(streams[0]);
243   clReleaseMemObject(streams[1]);
244   clReleaseMemObject(streams[2]);
245   clReleaseMemObject(streams[3]);
246   for (i=0; i<3; i++)
247   {
248     clReleaseKernel(kernel[i]);
249     clReleaseProgram(program[i]);
250   }
251   free(input_ptr[0]);
252   free(input_ptr[1]);
253   free(input_ptr[2]);
254   free(output_ptr);
255 
256   return err;
257 }
258 
259 
260