1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25 static const char *smoothstep_kernel_code =
26 "__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
27 "{\n"
28 " int tid = get_global_id(0);\n"
29 "\n"
30 " dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
31 "}\n";
32
33 static const char *smoothstep2_kernel_code =
34 "__kernel void test_smoothstep2f(__global float *edge0, __global float *edge1, __global float2 *x, __global float2 *dst)\n"
35 "{\n"
36 " int tid = get_global_id(0);\n"
37 "\n"
38 " dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
39 "}\n";
40
41 static const char *smoothstep4_kernel_code =
42 "__kernel void test_smoothstep4f(__global float *edge0, __global float *edge1, __global float4 *x, __global float4 *dst)\n"
43 "{\n"
44 " int tid = get_global_id(0);\n"
45 "\n"
46 " dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
47 "}\n";
48
49 #define MAX_ERR (1e-5f)
50
verify_smoothstep(float * edge0,float * edge1,float * x,float * outptr,int n,int veclen)51 float verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr,
52 int n, int veclen)
53 {
54 float r, t, delta, max_err = 0.0f;
55 int i, j;
56
57 for (i = 0; i < n; ++i) {
58 int vi = i * veclen;
59 for (j = 0; j < veclen; ++j, ++vi) {
60 t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]);
61 if (t < 0.0f)
62 t = 0.0f;
63 else if (t > 1.0f)
64 t = 1.0f;
65 r = t * t * (3.0f - 2.0f * t);
66 delta = (float)fabs(r - outptr[vi]);
67 if (delta > max_err)
68 max_err = delta;
69 }
70 }
71 return max_err;
72 }
73
74 const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4"};
75
76 int
test_smoothstepf(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)77 test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
78 {
79 cl_mem streams[4];
80 cl_float *input_ptr[3], *output_ptr, *p, *p_edge0;
81 cl_program program[3];
82 cl_kernel kernel[3];
83 size_t threads[1];
84 float max_err = 0.0f;
85 int num_elements;
86 int err;
87 int i;
88 MTdata d;
89
90 num_elements = n_elems * 4;
91
92 input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
93 input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
94 input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
95 output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
96 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
97 sizeof(cl_float) * num_elements, NULL, NULL);
98 if (!streams[0])
99 {
100 log_error("clCreateBuffer failed\n");
101 return -1;
102 }
103 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
104 sizeof(cl_float) * num_elements, NULL, NULL);
105 if (!streams[1])
106 {
107 log_error("clCreateBuffer failed\n");
108 return -1;
109 }
110 streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
111 sizeof(cl_float) * num_elements, NULL, NULL);
112 if (!streams[2])
113 {
114 log_error("clCreateBuffer failed\n");
115 return -1;
116 }
117
118 streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
119 sizeof(cl_float) * num_elements, NULL, NULL);
120 if (!streams[3])
121 {
122 log_error("clCreateBuffer failed\n");
123 return -1;
124 }
125
126 d = init_genrand( gRandomSeed );
127 p = input_ptr[0];
128 for (i=0; i<num_elements; i++)
129 {
130 p[i] = get_random_float(-0x00200000, 0x00200000, d);
131 }
132
133 p = input_ptr[1];
134 p_edge0 = input_ptr[0];
135 for (i=0; i<num_elements; i++)
136 {
137 float edge0 = p_edge0[i];
138 float edge1;
139 do {
140 edge1 = get_random_float( -0x00200000, 0x00200000, d);
141 if (edge0 < edge1)
142 break;
143 } while (1);
144 p[i] = edge1;
145 }
146
147 p = input_ptr[2];
148 for (i=0; i<num_elements; i++)
149 {
150 p[i] = get_random_float(-0x00200000, 0x00200000, d);
151 }
152 free_mtdata(d);
153 d = NULL;
154
155 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
156 if (err != CL_SUCCESS)
157 {
158 log_error("clWriteArray failed\n");
159 return -1;
160 }
161 err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
162 if (err != CL_SUCCESS)
163 {
164 log_error("clWriteArray failed\n");
165 return -1;
166 }
167 err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
168 if (err != CL_SUCCESS)
169 {
170 log_error("clWriteArray failed\n");
171 return -1;
172 }
173
174 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &smoothstep_kernel_code, "test_smoothstep" );
175 if (err)
176 return -1;
177 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &smoothstep2_kernel_code, "test_smoothstep2f" );
178 if (err)
179 return -1;
180 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &smoothstep4_kernel_code, "test_smoothstep4f" );
181 if (err)
182 return -1;
183
184 for (i=0; i<3; i++)
185 {
186 err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
187 err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
188 err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
189 err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3] );
190 if (err != CL_SUCCESS)
191 {
192 log_error("clSetKernelArgs failed\n");
193 return -1;
194 }
195 }
196
197 threads[0] = (size_t)n_elems;
198 for (i=0; i<3; i++)
199 {
200 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
201 if (err != CL_SUCCESS)
202 {
203 log_error("clEnqueueNDRangeKernel failed\n");
204 return -1;
205 }
206
207 err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
208 if (err != CL_SUCCESS)
209 {
210 log_error("clEnqueueReadBuffer failed\n");
211 return -1;
212 }
213
214 switch (i)
215 {
216 case 0:
217 max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 1);
218 break;
219 case 1:
220 max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 2);
221 break;
222 case 2:
223 max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 4);
224 break;
225 }
226
227 if (max_err > MAX_ERR)
228 {
229 log_error("%s test failed %g max err\n", fn_names[i], max_err);
230 err = -1;
231 }
232 else
233 {
234 log_info("%s test passed %g max err\n", fn_names[i], max_err);
235 err = 0;
236 }
237
238 if (err)
239 break;
240 }
241
242 clReleaseMemObject(streams[0]);
243 clReleaseMemObject(streams[1]);
244 clReleaseMemObject(streams[2]);
245 clReleaseMemObject(streams[3]);
246 for (i=0; i<3; i++)
247 {
248 clReleaseKernel(kernel[i]);
249 clReleaseProgram(program[i]);
250 }
251 free(input_ptr[0]);
252 free(input_ptr[1]);
253 free(input_ptr[2]);
254 free(output_ptr);
255
256 return err;
257 }
258
259
260