1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include "harness/rounding_mode.h"
24
25 #include "procs.h"
26
27 static const char *fpadd_kernel_code =
28 "__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
29 "{\n"
30 " int tid = get_global_id(0);\n"
31 "\n"
32 " dst[tid] = srcA[tid] + srcB[tid];\n"
33 "}\n";
34
35 static const char *fpsub_kernel_code =
36 "__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
37 "{\n"
38 " int tid = get_global_id(0);\n"
39 "\n"
40 " dst[tid] = srcA[tid] - srcB[tid];\n"
41 "}\n";
42
43 static const char *fpmul_kernel_code =
44 "__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
45 "{\n"
46 " int tid = get_global_id(0);\n"
47 "\n"
48 " dst[tid] = srcA[tid] * srcB[tid];\n"
49 "}\n";
50
51
52 static const float MAX_ERR = 1e-5f;
53
54 static int
verify_fpadd(float * inptrA,float * inptrB,float * outptr,int n)55 verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
56 {
57 float r;
58 int i;
59
60 for (i=0; i<n; i++)
61 {
62 r = inptrA[i] + inptrB[i];
63 if (r != outptr[i])
64 {
65 log_error("FP_ADD float test failed\n");
66 return -1;
67 }
68 }
69
70 log_info("FP_ADD float test passed\n");
71 return 0;
72 }
73
74 static int
verify_fpsub(float * inptrA,float * inptrB,float * outptr,int n)75 verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n)
76 {
77 float r;
78 int i;
79
80 for (i=0; i<n; i++)
81 {
82 r = inptrA[i] - inptrB[i];
83 if (r != outptr[i])
84 {
85 log_error("FP_SUB float test failed\n");
86 return -1;
87 }
88 }
89
90 log_info("FP_SUB float test passed\n");
91 return 0;
92 }
93
94 static int
verify_fpmul(float * inptrA,float * inptrB,float * outptr,int n)95 verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n)
96 {
97 float r;
98 int i;
99
100 for (i=0; i<n; i++)
101 {
102 r = inptrA[i] * inptrB[i];
103 if (r != outptr[i])
104 {
105 log_error("FP_MUL float test failed\n");
106 return -1;
107 }
108 }
109
110 log_info("FP_MUL float test passed\n");
111 return 0;
112 }
113
114
115 int
test_fpmath_float(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)116 test_fpmath_float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
117 {
118 cl_mem streams[4];
119 cl_program program[3];
120 cl_kernel kernel[3];
121
122 float *input_ptr[3], *output_ptr, *p;
123 size_t threads[1];
124 int err, i;
125 MTdata d = init_genrand( gRandomSeed );
126 size_t length = sizeof(cl_float) * num_elements;
127 int isRTZ = 0;
128 RoundingMode oldMode = kDefaultRoundingMode;
129
130 // check for floating point capabilities
131 cl_device_fp_config single_config = 0;
132 err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
133 if (err) {
134 log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
135 return -1;
136 }
137 //If we only support rtz mode
138 if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
139 {
140 //Check to make sure we are an embedded device
141 char profile[32];
142 err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
143 if( err )
144 {
145 log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
146 return -1;
147 }
148 if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
149 {
150 log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
151 return -1;
152 }
153
154 isRTZ = 1;
155 oldMode = get_round();
156 }
157
158
159 input_ptr[0] = (cl_float*)malloc(length);
160 input_ptr[1] = (cl_float*)malloc(length);
161 input_ptr[2] = (cl_float*)malloc(length);
162 output_ptr = (cl_float*)malloc(length);
163
164 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
165 test_error( err, "clCreateBuffer failed.");
166 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
167 test_error( err, "clCreateBuffer failed.");
168 streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
169 test_error( err, "clCreateBuffer failed.");
170 streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
171 test_error( err, "clCreateBuffer failed.");
172
173 p = input_ptr[0];
174 for (i=0; i<num_elements; i++)
175 p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
176 p = input_ptr[1];
177 for (i=0; i<num_elements; i++)
178 p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
179 p = input_ptr[2];
180 for (i=0; i<num_elements; i++)
181 p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
182
183 err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
184 test_error( err, "clEnqueueWriteBuffer failed.");
185
186 err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
187 test_error( err, "clEnqueueWriteBuffer failed.");
188
189 err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
190 test_error( err, "clEnqueueWriteBuffer failed.");
191
192 err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
193 test_error( err, "create_single_kernel_helper failed");
194
195 err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
196 test_error( err, "create_single_kernel_helper failed");
197
198 err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
199 test_error( err, "create_single_kernel_helper failed");
200
201
202 err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
203 err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
204 err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
205 test_error( err, "clSetKernelArgs failed.");
206
207 err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
208 err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
209 err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
210 test_error( err, "clSetKernelArgs failed.");
211
212 err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
213 err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
214 err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
215 test_error( err, "clSetKernelArgs failed.");
216
217 threads[0] = (unsigned int)num_elements;
218 for (i=0; i<3; i++)
219 {
220 err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
221 test_error( err, "clEnqueueNDRangeKernel failed.");
222
223 err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
224 test_error( err, "clEnqueueReadBuffer failed.");
225
226 if( isRTZ )
227 set_round( kRoundTowardZero, kfloat );
228
229 switch (i)
230 {
231 case 0:
232 err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements);
233 break;
234 case 1:
235 err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
236 break;
237 case 2:
238 err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
239 break;
240 }
241
242 if( isRTZ )
243 set_round( oldMode, kfloat );
244
245 if (err)
246 break;
247 }
248
249 // cleanup
250 clReleaseMemObject(streams[0]);
251 clReleaseMemObject(streams[1]);
252 clReleaseMemObject(streams[2]);
253 clReleaseMemObject(streams[3]);
254 for (i=0; i<3; i++)
255 {
256 clReleaseKernel(kernel[i]);
257 clReleaseProgram(program[i]);
258 }
259 free(input_ptr[0]);
260 free(input_ptr[1]);
261 free(input_ptr[2]);
262 free(output_ptr);
263 free_mtdata( d );
264
265 return err;
266 }
267
268
269