• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include "harness/rounding_mode.h"
24 
25 #include "procs.h"
26 
27 static const char *fpadd_kernel_code =
28 "__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
29 "{\n"
30 "    int  tid = get_global_id(0);\n"
31 "\n"
32 "    dst[tid] = srcA[tid] + srcB[tid];\n"
33 "}\n";
34 
35 static const char *fpsub_kernel_code =
36 "__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
37 "{\n"
38 "    int  tid = get_global_id(0);\n"
39 "\n"
40 "    dst[tid] = srcA[tid] - srcB[tid];\n"
41 "}\n";
42 
43 static const char *fpmul_kernel_code =
44 "__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
45 "{\n"
46 "    int  tid = get_global_id(0);\n"
47 "\n"
48 "    dst[tid] = srcA[tid] * srcB[tid];\n"
49 "}\n";
50 
51 
52 static const float    MAX_ERR = 1e-5f;
53 
54 static int
verify_fpadd(float * inptrA,float * inptrB,float * outptr,int n)55 verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
56 {
57     float       r;
58     int         i;
59 
60     for (i=0; i<n; i++)
61     {
62         r = inptrA[i] + inptrB[i];
63         if (r != outptr[i])
64         {
65             log_error("FP_ADD float test failed\n");
66             return -1;
67         }
68     }
69 
70     log_info("FP_ADD float test passed\n");
71     return 0;
72 }
73 
74 static int
verify_fpsub(float * inptrA,float * inptrB,float * outptr,int n)75 verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n)
76 {
77     float       r;
78     int         i;
79 
80     for (i=0; i<n; i++)
81     {
82         r = inptrA[i] - inptrB[i];
83         if (r != outptr[i])
84         {
85             log_error("FP_SUB float test failed\n");
86             return -1;
87         }
88     }
89 
90     log_info("FP_SUB float test passed\n");
91     return 0;
92 }
93 
94 static int
verify_fpmul(float * inptrA,float * inptrB,float * outptr,int n)95 verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n)
96 {
97     float       r;
98     int         i;
99 
100     for (i=0; i<n; i++)
101     {
102         r = inptrA[i] * inptrB[i];
103         if (r != outptr[i])
104         {
105             log_error("FP_MUL float test failed\n");
106             return -1;
107         }
108     }
109 
110     log_info("FP_MUL float test passed\n");
111     return 0;
112 }
113 
114 
115 int
test_fpmath_float(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)116 test_fpmath_float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
117 {
118     cl_mem streams[4];
119     cl_program program[3];
120     cl_kernel kernel[3];
121 
122     float *input_ptr[3], *output_ptr, *p;
123     size_t threads[1];
124     int err, i;
125     MTdata d = init_genrand( gRandomSeed );
126     size_t length = sizeof(cl_float) * num_elements;
127     int isRTZ = 0;
128     RoundingMode oldMode = kDefaultRoundingMode;
129 
130     // check for floating point capabilities
131     cl_device_fp_config single_config = 0;
132     err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
133     if (err) {
134       log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
135       return -1;
136     }
137     //If we only support rtz mode
138     if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
139     {
140         //Check to make sure we are an embedded device
141         char profile[32];
142         err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
143         if( err )
144         {
145             log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
146               return -1;
147         }
148         if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
149         {
150             log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
151             return -1;
152         }
153 
154         isRTZ = 1;
155         oldMode = get_round();
156     }
157 
158 
159     input_ptr[0] = (cl_float*)malloc(length);
160     input_ptr[1] = (cl_float*)malloc(length);
161     input_ptr[2] = (cl_float*)malloc(length);
162     output_ptr   = (cl_float*)malloc(length);
163 
164     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
165     test_error( err, "clCreateBuffer failed.");
166     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
167     test_error( err, "clCreateBuffer failed.");
168     streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
169     test_error( err, "clCreateBuffer failed.");
170     streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
171     test_error( err, "clCreateBuffer failed.");
172 
173     p = input_ptr[0];
174     for (i=0; i<num_elements; i++)
175         p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
176     p = input_ptr[1];
177     for (i=0; i<num_elements; i++)
178         p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
179     p = input_ptr[2];
180     for (i=0; i<num_elements; i++)
181         p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
182 
183     err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
184     test_error( err, "clEnqueueWriteBuffer failed.");
185 
186     err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
187     test_error( err, "clEnqueueWriteBuffer failed.");
188 
189     err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
190     test_error( err, "clEnqueueWriteBuffer failed.");
191 
192     err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
193     test_error( err, "create_single_kernel_helper failed");
194 
195     err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
196     test_error( err, "create_single_kernel_helper failed");
197 
198     err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
199     test_error( err, "create_single_kernel_helper failed");
200 
201 
202     err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
203     err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
204     err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
205     test_error( err, "clSetKernelArgs failed.");
206 
207     err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
208     err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
209     err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
210     test_error( err, "clSetKernelArgs failed.");
211 
212     err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
213     err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
214     err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
215     test_error( err, "clSetKernelArgs failed.");
216 
217     threads[0] = (unsigned int)num_elements;
218     for (i=0; i<3; i++)
219     {
220         err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
221         test_error( err, "clEnqueueNDRangeKernel failed.");
222 
223         err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
224         test_error( err, "clEnqueueReadBuffer failed.");
225 
226         if( isRTZ )
227             set_round( kRoundTowardZero, kfloat );
228 
229         switch (i)
230         {
231             case 0:
232                 err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements);
233                 break;
234             case 1:
235                 err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
236                 break;
237             case 2:
238                 err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
239                 break;
240         }
241 
242         if( isRTZ )
243             set_round( oldMode, kfloat );
244 
245         if (err)
246             break;
247     }
248 
249     // cleanup
250     clReleaseMemObject(streams[0]);
251     clReleaseMemObject(streams[1]);
252     clReleaseMemObject(streams[2]);
253     clReleaseMemObject(streams[3]);
254     for (i=0; i<3; i++)
255     {
256         clReleaseKernel(kernel[i]);
257         clReleaseProgram(program[i]);
258     }
259     free(input_ptr[0]);
260     free(input_ptr[1]);
261     free(input_ptr[2]);
262     free(output_ptr);
263     free_mtdata( d );
264 
265     return err;
266 }
267 
268 
269