1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23
24
25 #include "procs.h"
26
27 const char *constant_kernel_code =
28 "__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n"
29 "{\n"
30 " int tid = get_global_id(0);\n"
31 "\n"
32 " float ftmp = tmpF[tid]; \n"
33 " float Itmp = tmpI[tid]; \n"
34 " out[tid] = ftmp * Itmp; \n"
35 "}\n";
36
37 const char *loop_constant_kernel_code =
38 "kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n"
39 "{\n"
40 " int tid = get_global_id(0);\n"
41 " float sum = 0;\n"
42 " for (int i = 0; i < num; i++) {\n"
43 " float pos = i_pos[i*3];\n"
44 " sum += pos;\n"
45 " }\n"
46 " out[tid] = sum;\n"
47 "}\n";
48
49
50 static int
verify(cl_float * tmpF,cl_int * tmpI,cl_float * out,int n)51 verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n)
52 {
53 int i;
54
55 for (i=0; i < n; i++)
56 {
57 float f = tmpF[i] * tmpI[i];
58 if( out[i] != f )
59 {
60 log_error("CONSTANT test failed\n");
61 return -1;
62 }
63 }
64
65 log_info("CONSTANT test passed\n");
66 return 0;
67 }
68
69
70 static int
verify_loop_constant(const cl_float * tmp,cl_float * out,cl_int l,int n)71 verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n)
72 {
73 int i;
74 cl_int j;
75 for (i=0; i < n; i++)
76 {
77 float sum = 0;
78 for (j=0; j < l; ++j)
79 sum += tmp[j*3];
80
81 if( out[i] != sum )
82 {
83 log_error("loop CONSTANT test failed\n");
84 return -1;
85 }
86 }
87
88 log_info("loop CONSTANT test passed\n");
89 return 0;
90 }
91
92 int
test_constant(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)93 test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
94 {
95 cl_mem streams[3];
96 cl_int *tmpI;
97 cl_float *tmpF, *out;
98 cl_program program;
99 cl_kernel kernel;
100 size_t global_threads[3];
101 int err;
102 unsigned int i;
103 cl_ulong maxSize, maxGlobalSize, maxAllocSize;
104 size_t num_floats, num_ints, constant_values;
105 MTdata d;
106 RoundingMode oldRoundMode;
107 int isRTZ = 0;
108
109 /* Verify our test buffer won't be bigger than allowed */
110 err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
111 test_error( err, "Unable to get max constant buffer size" );
112
113 log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize);
114
115 // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE
116 err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
117 test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
118
119 if (maxSize > maxGlobalSize / 4)
120 maxSize = maxGlobalSize / 4;
121
122 err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
123 test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
124
125 if (maxSize > maxAllocSize)
126 maxSize = maxAllocSize;
127
128 maxSize/=4;
129 num_ints = (size_t)maxSize/sizeof(cl_int);
130 num_floats = (size_t)maxSize/sizeof(cl_float);
131 if (num_ints >= num_floats) {
132 constant_values = num_floats;
133 } else {
134 constant_values = num_ints;
135 }
136
137 log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n",
138 constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float));
139
140 tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
141 tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
142 out = (cl_float*)malloc(sizeof(cl_float) * constant_values);
143 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
144 sizeof(cl_float) * constant_values, NULL, NULL);
145 if (!streams[0])
146 {
147 log_error("clCreateBuffer failed\n");
148 return -1;
149 }
150 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
151 sizeof(cl_float) * constant_values, NULL, NULL);
152 if (!streams[1])
153 {
154 log_error("clCreateBuffer failed\n");
155 return -1;
156 }
157 streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
158 sizeof(cl_int) * constant_values, NULL, NULL);
159 if (!streams[2])
160 {
161 log_error("clCreateBuffer failed\n");
162 return -1;
163 }
164
165 d = init_genrand( gRandomSeed );
166 for (i=0; i<constant_values; i++) {
167 tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d);
168 tmpF[i] = get_random_float(-0x02000000, 0x02000000, d);
169 }
170 free_mtdata(d); d = NULL;
171
172 err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL);
173 if (err != CL_SUCCESS)
174 {
175 log_error("clWriteArray failed\n");
176 return -1;
177 }
178 err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*constant_values, (void *)tmpI, 0, NULL, NULL);
179 if (err != CL_SUCCESS)
180 {
181 log_error("clWriteArray failed\n");
182 return -1;
183 }
184
185 err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" );
186 if (err) {
187 log_error("Failed to create kernel and program: %d\n", err);
188 return -1;
189 }
190
191
192 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
193 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
194 err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
195 if (err != CL_SUCCESS)
196 {
197 log_error("clSetKernelArgs failed\n");
198 return -1;
199 }
200
201 global_threads[0] = constant_values;
202 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
203 if (err != CL_SUCCESS)
204 {
205 log_error("clEnqueueNDRangeKernel failed: %d\n", err);
206 return -1;
207 }
208 err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
209 if (err != CL_SUCCESS)
210 {
211 log_error("clEnqueueReadBuffer failed\n");
212 return -1;
213 }
214
215 //If we only support rtz mode
216 if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
217 {
218 oldRoundMode = set_round(kRoundTowardZero, kfloat);
219 isRTZ = 1;
220 }
221
222 err = verify(tmpF, tmpI, out, (int)constant_values);
223
224 if (isRTZ)
225 (void)set_round(oldRoundMode, kfloat);
226
227 // Loop constant buffer test
228 cl_program loop_program;
229 cl_kernel loop_kernel;
230 cl_int limit = 2;
231
232 memset(out, 0, sizeof(cl_float) * constant_values);
233 err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1,
234 &loop_constant_kernel_code, "loop_constant_kernel" );
235 if (err) {
236 log_error("Failed to create loop kernel and program: %d\n", err);
237 return -1;
238 }
239
240 err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]);
241 err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]);
242 err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit);
243 if (err != CL_SUCCESS) {
244 log_error("clSetKernelArgs for loop kernel failed\n");
245 return -1;
246 }
247
248 err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
249 if (err != CL_SUCCESS) {
250 log_error("clEnqueueNDRangeKernel failed: %d\n", err);
251 return -1;
252 }
253 err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
254 if (err != CL_SUCCESS) {
255 log_error("clEnqueueReadBuffer failed\n");
256 return -1;
257 }
258
259 err = verify_loop_constant(tmpF, out, limit, (int)constant_values);
260
261 // cleanup
262 clReleaseMemObject(streams[0]);
263 clReleaseMemObject(streams[1]);
264 clReleaseMemObject(streams[2]);
265 clReleaseKernel(kernel);
266 clReleaseProgram(program);
267 clReleaseKernel(loop_kernel);
268 clReleaseProgram(loop_program);
269 free(tmpI);
270 free(tmpF);
271 free(out);
272
273 return err;
274 }
275
276
277
278
279
280