• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 
25 #include "procs.h"
26 
27 const char *constant_kernel_code =
28 "__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n"
29 "{\n"
30 "    int  tid = get_global_id(0);\n"
31 "\n"
32 "    float ftmp = tmpF[tid]; \n"
33 "    float Itmp = tmpI[tid]; \n"
34 "    out[tid] = ftmp * Itmp; \n"
35 "}\n";
36 
37 const char *loop_constant_kernel_code =
38 "kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n"
39 "{\n"
40 "    int tid = get_global_id(0);\n"
41 "    float sum = 0;\n"
42 "    for (int i = 0; i < num; i++) {\n"
43 "        float  pos  = i_pos[i*3];\n"
44 "        sum += pos;\n"
45 "    }\n"
46 "    out[tid] = sum;\n"
47 "}\n";
48 
49 
50 static int
verify(cl_float * tmpF,cl_int * tmpI,cl_float * out,int n)51 verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n)
52 {
53     int         i;
54 
55     for (i=0; i < n; i++)
56     {
57         float f = tmpF[i] * tmpI[i];
58         if( out[i] != f )
59         {
60             log_error("CONSTANT test failed\n");
61             return -1;
62         }
63     }
64 
65     log_info("CONSTANT test passed\n");
66     return 0;
67 }
68 
69 
70 static int
verify_loop_constant(const cl_float * tmp,cl_float * out,cl_int l,int n)71 verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n)
72 {
73     int i;
74     cl_int j;
75     for (i=0; i < n; i++)
76     {
77         float sum = 0;
78         for (j=0; j < l; ++j)
79             sum += tmp[j*3];
80 
81         if( out[i] != sum )
82         {
83             log_error("loop CONSTANT test failed\n");
84             return -1;
85         }
86     }
87 
88     log_info("loop CONSTANT test passed\n");
89     return 0;
90 }
91 
92 int
test_constant(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)93 test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
94 {
95     cl_mem            streams[3];
96     cl_int            *tmpI;
97     cl_float        *tmpF, *out;
98     cl_program        program;
99     cl_kernel        kernel;
100     size_t    global_threads[3];
101     int                err;
102     unsigned int                i;
103     cl_ulong maxSize, maxGlobalSize, maxAllocSize;
104     size_t num_floats, num_ints, constant_values;
105     MTdata          d;
106     RoundingMode     oldRoundMode;
107     int isRTZ = 0;
108 
109   /* Verify our test buffer won't be bigger than allowed */
110     err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
111     test_error( err, "Unable to get max constant buffer size" );
112 
113   log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize);
114 
115   // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE
116   err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
117   test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
118 
119   if (maxSize > maxGlobalSize / 4)
120     maxSize = maxGlobalSize / 4;
121 
122   err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
123   test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
124 
125   if (maxSize > maxAllocSize)
126     maxSize = maxAllocSize;
127 
128   maxSize/=4;
129   num_ints = (size_t)maxSize/sizeof(cl_int);
130   num_floats = (size_t)maxSize/sizeof(cl_float);
131   if (num_ints >= num_floats) {
132     constant_values = num_floats;
133   } else {
134     constant_values = num_ints;
135   }
136 
137   log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n",
138            constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float));
139 
140     tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
141     tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
142     out  = (cl_float*)malloc(sizeof(cl_float) * constant_values);
143     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
144                                 sizeof(cl_float) * constant_values, NULL, NULL);
145     if (!streams[0])
146     {
147         log_error("clCreateBuffer failed\n");
148         return -1;
149     }
150     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
151                                 sizeof(cl_float) * constant_values, NULL, NULL);
152     if (!streams[1])
153     {
154         log_error("clCreateBuffer failed\n");
155         return -1;
156     }
157     streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
158                                 sizeof(cl_int) * constant_values, NULL, NULL);
159     if (!streams[2])
160     {
161         log_error("clCreateBuffer failed\n");
162         return -1;
163     }
164 
165     d = init_genrand( gRandomSeed );
166     for (i=0; i<constant_values; i++) {
167         tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d);
168         tmpF[i] = get_random_float(-0x02000000, 0x02000000, d);
169     }
170     free_mtdata(d); d = NULL;
171 
172     err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL);
173     if (err != CL_SUCCESS)
174     {
175         log_error("clWriteArray failed\n");
176         return -1;
177     }
178   err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*constant_values, (void *)tmpI, 0, NULL, NULL);
179     if (err != CL_SUCCESS)
180     {
181         log_error("clWriteArray failed\n");
182         return -1;
183     }
184 
185   err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" );
186     if (err) {
187     log_error("Failed to create kernel and program: %d\n", err);
188     return -1;
189   }
190 
191 
192     err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
193     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
194     err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
195     if (err != CL_SUCCESS)
196     {
197         log_error("clSetKernelArgs failed\n");
198         return -1;
199     }
200 
201     global_threads[0] = constant_values;
202     err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
203     if (err != CL_SUCCESS)
204     {
205         log_error("clEnqueueNDRangeKernel failed: %d\n", err);
206         return -1;
207     }
208     err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
209     if (err != CL_SUCCESS)
210     {
211         log_error("clEnqueueReadBuffer failed\n");
212         return -1;
213     }
214 
215     //If we only support rtz mode
216     if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
217     {
218         oldRoundMode = set_round(kRoundTowardZero, kfloat);
219         isRTZ = 1;
220     }
221 
222     err = verify(tmpF, tmpI, out, (int)constant_values);
223 
224     if (isRTZ)
225         (void)set_round(oldRoundMode, kfloat);
226 
227     // Loop constant buffer test
228     cl_program loop_program;
229     cl_kernel  loop_kernel;
230     cl_int limit = 2;
231 
232     memset(out, 0, sizeof(cl_float) * constant_values);
233     err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1,
234                                       &loop_constant_kernel_code, "loop_constant_kernel" );
235     if (err) {
236         log_error("Failed to create loop kernel and program: %d\n", err);
237         return -1;
238     }
239 
240     err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]);
241     err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]);
242     err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit);
243     if (err != CL_SUCCESS) {
244         log_error("clSetKernelArgs for loop kernel failed\n");
245         return -1;
246     }
247 
248     err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
249     if (err != CL_SUCCESS) {
250         log_error("clEnqueueNDRangeKernel failed: %d\n", err);
251         return -1;
252     }
253     err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
254     if (err != CL_SUCCESS) {
255         log_error("clEnqueueReadBuffer failed\n");
256         return -1;
257     }
258 
259     err = verify_loop_constant(tmpF, out, limit, (int)constant_values);
260 
261     // cleanup
262     clReleaseMemObject(streams[0]);
263     clReleaseMemObject(streams[1]);
264     clReleaseMemObject(streams[2]);
265     clReleaseKernel(kernel);
266     clReleaseProgram(program);
267     clReleaseKernel(loop_kernel);
268     clReleaseProgram(loop_program);
269     free(tmpI);
270     free(tmpF);
271     free(out);
272 
273     return err;
274 }
275 
276 
277 
278 
279 
280