1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include "harness/rounding_mode.h"
24
25
26 #include "procs.h"
27
28 const char *fpadd2_kernel_code =
29 "__kernel void test_fpadd2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
30 "{\n"
31 " int tid = get_global_id(0);\n"
32 "\n"
33 " dst[tid] = srcA[tid] + srcB[tid];\n"
34 "}\n";
35
36 const char *fpsub2_kernel_code =
37 "__kernel void test_fpsub2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
38 "{\n"
39 " int tid = get_global_id(0);\n"
40 "\n"
41 " dst[tid] = srcA[tid] - srcB[tid];\n"
42 "}\n";
43
44 const char *fpmul2_kernel_code =
45 "__kernel void test_fpmul2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
46 "{\n"
47 " int tid = get_global_id(0);\n"
48 "\n"
49 " dst[tid] = srcA[tid] * srcB[tid];\n"
50 "}\n";
51
52
53 int
verify_fpadd2(float * inptrA,float * inptrB,float * outptr,int n)54 verify_fpadd2(float *inptrA, float *inptrB, float *outptr, int n)
55 {
56 float r;
57 int i;
58
59 for (i=0; i<n; i++)
60 {
61 r = inptrA[i] + inptrB[i];
62 if (r != outptr[i])
63 {
64 log_error("FP_ADD float2 test failed\n");
65 return -1;
66 }
67 }
68
69 log_info("FP_ADD float2 test passed\n");
70 return 0;
71 }
72
73 int
verify_fpsub2(float * inptrA,float * inptrB,float * outptr,int n)74 verify_fpsub2(float *inptrA, float *inptrB, float *outptr, int n)
75 {
76 float r;
77 int i;
78
79 for (i=0; i<n; i++)
80 {
81 r = inptrA[i] - inptrB[i];
82 if (r != outptr[i])
83 {
84 log_error("FP_SUB float2 test failed\n");
85 return -1;
86 }
87 }
88
89 log_info("FP_SUB float2 test passed\n");
90 return 0;
91 }
92
93 int
verify_fpmul2(float * inptrA,float * inptrB,float * outptr,int n)94 verify_fpmul2(float *inptrA, float *inptrB, float *outptr, int n)
95 {
96 float r;
97 int i;
98
99 for (i=0; i<n; i++)
100 {
101 r = inptrA[i] * inptrB[i];
102 if (r != outptr[i])
103 {
104 log_error("FP_MUL float2 test failed\n");
105 return -1;
106 }
107 }
108
109 log_info("FP_MUL float2 test passed\n");
110 return 0;
111 }
112
113
114 int
test_fpmath_float2(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)115 test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
116 {
117 cl_mem streams[4];
118 cl_program program[3];
119 cl_kernel kernel[3];
120
121 cl_float *input_ptr[3], *output_ptr, *p;
122 size_t threads[1];
123 int err, i;
124 MTdata d = init_genrand( gRandomSeed );
125
126 size_t length = sizeof(cl_float) * 2 * num_elements;
127 int isRTZ = 0;
128 RoundingMode oldMode = kDefaultRoundingMode;
129
130 // check for floating point capabilities
131 cl_device_fp_config single_config = 0;
132 err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
133 if (err) {
134 log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
135 return -1;
136 }
137 //If we only support rtz mode
138 if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
139 {
140 //Check to make sure we are an embedded device
141 char profile[32];
142 err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
143 if( err )
144 {
145 log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
146 return -1;
147 }
148 if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
149 {
150 log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
151 return -1;
152 }
153
154 isRTZ = 1;
155 oldMode = get_round();
156 }
157
158 input_ptr[0] = (cl_float*)malloc(length);
159 input_ptr[1] = (cl_float*)malloc(length);
160 input_ptr[2] = (cl_float*)malloc(length);
161 output_ptr = (cl_float*)malloc(length);
162
163 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
164 test_error( err, "clCreateBuffer failed.");
165 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
166 test_error( err, "clCreateBuffer failed.");
167 streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
168 test_error( err, "clCreateBuffer failed.");
169 streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
170 test_error( err, "clCreateBuffer failed.");
171
172 p = input_ptr[0];
173 for (i=0; i<num_elements*2; i++)
174 p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
175 p = input_ptr[1];
176 for (i=0; i<num_elements*2; i++)
177 p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
178 p = input_ptr[2];
179 for (i=0; i<num_elements*2; i++)
180 p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
181
182 err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
183 test_error(err, "clEnqueueWriteBuffer failed");
184 err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
185 test_error(err, "clEnqueueWriteBuffer failed");
186 err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
187 test_error(err, "clEnqueueWriteBuffer failed");
188
189 err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd2_kernel_code, "test_fpadd2");
190 test_error( err, "create_single_kernel_helper failed");
191
192 err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub2_kernel_code, "test_fpsub2");
193 test_error( err, "create_single_kernel_helper failed");
194
195 err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul2_kernel_code, "test_fpmul2");
196 test_error( err, "create_single_kernel_helper failed");
197
198
199 err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
200 err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
201 err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
202 test_error( err, "clSetKernelArgs failed.");
203
204 err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
205 err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
206 err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
207 test_error( err, "clSetKernelArgs failed.");
208
209 err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
210 err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
211 err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
212 test_error( err, "clSetKernelArgs failed.");
213 free_mtdata(d);
214 d = NULL;
215
216 threads[0] = (unsigned int)num_elements;
217 for (i=0; i<3; i++)
218 {
219 err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
220 test_error( err, "clEnqueueNDRangeKernel failed.");
221
222 err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
223 test_error( err, "clEnqueueReadBuffer failed.");
224
225 if( isRTZ )
226 set_round( kRoundTowardZero, kfloat );
227
228 switch (i)
229 {
230 case 0:
231 err = verify_fpadd2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
232 break;
233 case 1:
234 err = verify_fpsub2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
235 break;
236 case 2:
237 err = verify_fpmul2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
238 break;
239 }
240
241 if( isRTZ )
242 set_round( oldMode, kfloat );
243
244 if (err)
245 break;
246 }
247
248
249 // cleanup
250 clReleaseMemObject(streams[0]);
251 clReleaseMemObject(streams[1]);
252 clReleaseMemObject(streams[2]);
253 clReleaseMemObject(streams[3]);
254 for (i=0; i<3; i++)
255 {
256 clReleaseKernel(kernel[i]);
257 clReleaseProgram(program[i]);
258 }
259 free(input_ptr[0]);
260 free(input_ptr[1]);
261 free(input_ptr[2]);
262 free(output_ptr);
263 return err;
264 }
265
266
267