1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "structs.h"
17
18
19 #include "defines.h"
20
21 #define DEBUG_MEM_ALLOC 0
22
23 /** typedef struct _bufferStruct
24 {
25 void * m_pIn;
26 void * m_pOut;
27
28 cl_mem m_outBuffer;
29 cl_mem m_inBuffer;
30
31 size_t m_bufSize;
32 } bufferStruct;
33 */
34
35
newClState(cl_device_id device,cl_context context,cl_command_queue queue)36 clState *newClState(cl_device_id device, cl_context context,
37 cl_command_queue queue)
38 {
39 clState *pResult = (clState *)malloc(sizeof(clState));
40 #if DEBUG_MEM_ALLOC
41 log_info("malloc clState * %x\n", pResult);
42 #endif
43
44 pResult->m_device = device;
45 pResult->m_context = context;
46 pResult->m_queue = queue;
47
48 pResult->m_kernel = NULL;
49 pResult->m_program = NULL;
50 return pResult;
51 }
52
destroyClState(clState * pState)53 clState *destroyClState(clState *pState)
54 {
55 clStateDestroyProgramAndKernel(pState);
56 #if DEBUG_MEM_ALLOC
57 log_info("delete (free) clState * %x\n", pState);
58 #endif
59 free(pState);
60 return NULL;
61 }
62
63
clStateMakeProgram(clState * pState,const char * prog,const char * kernelName)64 int clStateMakeProgram(clState *pState, const char *prog,
65 const char *kernelName)
66 {
67 const char *srcArr[1] = { NULL };
68 srcArr[0] = prog;
69 int err =
70 create_single_kernel_helper(pState->m_context, &(pState->m_program),
71 &(pState->m_kernel), 1, srcArr, kernelName);
72 #if DEBUG_MEM_ALLOC
73 log_info("create program and kernel\n");
74 #endif
75 return err;
76 }
77
runKernel(clState * pState,size_t numThreads)78 int runKernel(clState *pState, size_t numThreads)
79 {
80 int err;
81 pState->m_numThreads = numThreads;
82 err = clEnqueueNDRangeKernel(pState->m_queue, pState->m_kernel, 1, NULL,
83 &(pState->m_numThreads), NULL, 0, NULL, NULL);
84 if (err != CL_SUCCESS)
85 {
86 log_error("clEnqueueNDRangeKernel returned %d (%x)\n", err, err);
87 return -1;
88 }
89 return 0;
90 }
91
92
clStateDestroyProgramAndKernel(clState * pState)93 void clStateDestroyProgramAndKernel(clState *pState)
94 {
95 #if DEBUG_MEM_ALLOC
96 log_info("destroy program and kernel\n");
97 #endif
98 if (pState->m_kernel != NULL)
99 {
100 clReleaseKernel(pState->m_kernel);
101 pState->m_kernel = NULL;
102 }
103 if (pState->m_program != NULL)
104 {
105 clReleaseProgram(pState->m_program);
106 pState->m_program = NULL;
107 }
108 }
109
newBufferStruct(size_t inSize,size_t outSize,clState * pClState)110 bufferStruct *newBufferStruct(size_t inSize, size_t outSize, clState *pClState)
111 {
112 int error;
113 bufferStruct *pResult = (bufferStruct *)malloc(sizeof(bufferStruct));
114 #if DEBUG_MEM_ALLOC
115 log_info("malloc bufferStruct * %x\n", pResult);
116 #endif
117
118 pResult->m_bufSizeIn = inSize;
119 pResult->m_bufSizeOut = outSize;
120
121 pResult->m_pIn = malloc(inSize);
122 pResult->m_pOut = malloc(outSize);
123 #if DEBUG_MEM_ALLOC
124 log_info("malloc m_pIn %x\n", pResult->m_pIn);
125 log_info("malloc m_pOut %x\n", pResult->m_pOut);
126 #endif
127
128 pResult->m_inBuffer = clCreateBuffer(pClState->m_context, CL_MEM_READ_ONLY,
129 inSize, NULL, &error);
130 if (pResult->m_inBuffer == NULL)
131 {
132 vlog_error("clCreateArray failed for input (%d)\n", error);
133 return destroyBufferStruct(pResult, pClState);
134 }
135 #if DEBUG_MEM_ALLOC
136 log_info("clCreateBuffer %x\n", pResult->m_inBuffer);
137 #endif
138
139 pResult->m_outBuffer = clCreateBuffer(
140 pClState->m_context, CL_MEM_WRITE_ONLY, outSize, NULL, &error);
141 if (pResult->m_outBuffer == NULL)
142 {
143 vlog_error("clCreateArray failed for output (%d)\n", error);
144 return destroyBufferStruct(pResult, pClState);
145 }
146 #if DEBUG_MEM_ALLOC
147 log_info("clCreateBuffer %x\n", pResult->m_outBuffer);
148 #endif
149
150 pResult->m_bufferUploaded = false;
151
152 return pResult;
153 }
154
destroyBufferStruct(bufferStruct * destroyMe,clState * pClState)155 bufferStruct *destroyBufferStruct(bufferStruct *destroyMe, clState *pClState)
156 {
157 if (destroyMe)
158 {
159 if (destroyMe->m_outBuffer != NULL)
160 {
161 #if DEBUG_MEM_ALLOC
162 log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
163 #endif
164 clReleaseMemObject(destroyMe->m_outBuffer);
165 destroyMe->m_outBuffer = NULL;
166 }
167 if (destroyMe->m_inBuffer != NULL)
168 {
169 #if DEBUG_MEM_ALLOC
170 log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
171 #endif
172 clReleaseMemObject(destroyMe->m_inBuffer);
173 destroyMe->m_inBuffer = NULL;
174 }
175 if (destroyMe->m_pIn != NULL)
176 {
177 #if DEBUG_MEM_ALLOC
178 log_info("delete (free) m_pIn %x\n", destroyMe->m_pIn);
179 #endif
180 free(destroyMe->m_pIn);
181 destroyMe->m_pIn = NULL;
182 }
183 if (destroyMe->m_pOut != NULL)
184 {
185 #if DEBUG_MEM_ALLOC
186 log_info("delete (free) m_pOut %x\n", destroyMe->m_pOut);
187 #endif
188 free(destroyMe->m_pOut);
189 destroyMe->m_pOut = NULL;
190 }
191 #if DEBUG_MEM_ALLOC
192 log_info("delete (free) bufferStruct * %x\n", destroyMe);
193 #endif
194 free((void *)destroyMe);
195 destroyMe = NULL;
196 }
197 return destroyMe;
198 }
199
initContents(bufferStruct * pBufferStruct,clState * pClState,size_t typeSize,size_t countIn,size_t countOut)200 void initContents(bufferStruct *pBufferStruct, clState *pClState,
201 size_t typeSize, size_t countIn, size_t countOut)
202 {
203 size_t i;
204
205 uint64_t start = 0;
206
207 switch (typeSize)
208 {
209 case 1: {
210 uint8_t *ub = (uint8_t *)(pBufferStruct->m_pIn);
211 for (i = 0; i < countIn; ++i)
212 {
213 ub[i] = (uint8_t)start++;
214 }
215 break;
216 }
217 case 2: {
218 uint16_t *us = (uint16_t *)(pBufferStruct->m_pIn);
219 for (i = 0; i < countIn; ++i)
220 {
221 us[i] = (uint16_t)start++;
222 }
223 break;
224 }
225 case 4: {
226 if (!g_wimpyMode)
227 {
228 uint32_t *ui = (uint32_t *)(pBufferStruct->m_pIn);
229 for (i = 0; i < countIn; ++i)
230 {
231 ui[i] = (uint32_t)start++;
232 }
233 }
234 else
235 {
236 // The short test doesn't iterate over the entire 32 bit space
237 // so we alternate between positive and negative values
238 int32_t *ui = (int32_t *)(pBufferStruct->m_pIn);
239 int32_t sign = 1;
240 for (i = 0; i < countIn; ++i, ++start)
241 {
242 ui[i] = (int32_t)start * sign;
243 sign = sign * -1;
244 }
245 }
246 break;
247 }
248 case 8: {
249 // We don't iterate over the entire space of 64 bit so for the
250 // selects, we want to test positive and negative values
251 int64_t *ll = (int64_t *)(pBufferStruct->m_pIn);
252 int64_t sign = 1;
253 for (i = 0; i < countIn; ++i, ++start)
254 {
255 ll[i] = start * sign;
256 sign = sign * -1;
257 }
258 break;
259 }
260 default: {
261 log_error("invalid type size %x\n", (int)typeSize);
262 }
263 }
264 // pBufferStruct->m_bufSizeIn
265 // pBufferStruct->m_bufSizeOut
266 }
267
pushArgs(bufferStruct * pBufferStruct,clState * pClState)268 int pushArgs(bufferStruct *pBufferStruct, clState *pClState)
269 {
270 int err;
271 if (!pBufferStruct->m_bufferUploaded)
272 {
273 err = clEnqueueWriteBuffer(pClState->m_queue, pBufferStruct->m_inBuffer,
274 CL_TRUE, 0, pBufferStruct->m_bufSizeIn,
275 pBufferStruct->m_pIn, 0, NULL, NULL);
276 #if DEBUG_MEM_ALLOC
277 log_info("clEnqueueWriteBuffer %x\n", pBufferStruct->m_inBuffer);
278 #endif
279 if (err != CL_SUCCESS)
280 {
281 log_error("clEnqueueWriteBuffer failed\n");
282 return -1;
283 }
284 pBufferStruct->m_bufferUploaded = true;
285 }
286
287 err = clSetKernelArg(
288 pClState->m_kernel, 0,
289 sizeof(pBufferStruct->m_inBuffer), // pBufferStruct->m_bufSizeIn,
290 &(pBufferStruct->m_inBuffer));
291 #if DEBUG_MEM_ALLOC
292 // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_inBuffer);
293 #endif
294 if (err != CL_SUCCESS)
295 {
296 log_error("clSetKernelArgs failed, first arg (0)\n");
297 return -1;
298 }
299
300 err = clSetKernelArg(
301 pClState->m_kernel, 1,
302 sizeof(pBufferStruct->m_outBuffer), // pBufferStruct->m_bufSizeOut,
303 &(pBufferStruct->m_outBuffer));
304 if (err != CL_SUCCESS)
305 {
306 log_error("clSetKernelArgs failed, second arg (1)\n");
307 return -1;
308 }
309
310 #if DEBUG_MEM_ALLOC
311 // log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_outBuffer);
312 #endif
313
314 return 0;
315 }
316
retrieveResults(bufferStruct * pBufferStruct,clState * pClState)317 int retrieveResults(bufferStruct *pBufferStruct, clState *pClState)
318 {
319 int err;
320 err = clEnqueueReadBuffer(pClState->m_queue, pBufferStruct->m_outBuffer,
321 CL_TRUE, 0, pBufferStruct->m_bufSizeOut,
322 pBufferStruct->m_pOut, 0, NULL, NULL);
323 if (err != CL_SUCCESS)
324 {
325 log_error("clEnqueueReadBuffer failed\n");
326 return -1;
327 }
328 return 0;
329 }
330
331 // vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
332 // and g_arrVecSizes
checkCorrectnessAlign(bufferStruct * pBufferStruct,clState * pClState,size_t minAlign)333 int checkCorrectnessAlign(bufferStruct *pBufferStruct, clState *pClState,
334 size_t minAlign)
335 {
336 size_t i;
337 cl_uint *targetArr = (cl_uint *)(pBufferStruct->m_pOut);
338 for (i = 0; i < pClState->m_numThreads; ++i)
339 {
340 if ((targetArr[i]) % minAlign != (cl_uint)0)
341 {
342 vlog_error("Error %d (of %d). Expected a multple of %x, got %x\n",
343 i, pClState->m_numThreads, minAlign, targetArr[i]);
344 return -1;
345 }
346 }
347
348 /* log_info("\n");
349 for(i = 0; i < 4; ++i) {
350 log_info("%lx, ", targetArr[i]);
351 }
352 log_info("\n");
353 fflush(stdout); */
354 return 0;
355 }
356
checkCorrectnessStep(bufferStruct * pBufferStruct,clState * pClState,size_t typeSize,size_t vecWidth)357 int checkCorrectnessStep(bufferStruct *pBufferStruct, clState *pClState,
358 size_t typeSize, size_t vecWidth)
359 {
360 size_t i;
361 cl_int targetSize = (cl_int)vecWidth;
362 cl_int *targetArr = (cl_int *)(pBufferStruct->m_pOut);
363 if (targetSize == 3)
364 {
365 targetSize = 4; // hack for 4-aligned vec3 types
366 }
367 for (i = 0; i < pClState->m_numThreads; ++i)
368 {
369 if (targetArr[i] != targetSize)
370 {
371 vlog_error("Error %ld (of %ld). Expected %d, got %d\n", i,
372 pClState->m_numThreads, targetSize, targetArr[i]);
373 return -1;
374 }
375 }
376 return 0;
377 }
378
379 // vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
380 // and g_arrVecSizes
checkPackedCorrectness(bufferStruct * pBufferStruct,clState * pClState,size_t totSize,size_t beforeSize)381 int checkPackedCorrectness(bufferStruct *pBufferStruct, clState *pClState,
382 size_t totSize, size_t beforeSize)
383 {
384 size_t i;
385 cl_uint *targetArr = (cl_uint *)(pBufferStruct->m_pOut);
386 for (i = 0; i < pClState->m_numThreads; ++i)
387 {
388 if ((targetArr[i] - beforeSize) % totSize != (cl_uint)0)
389 {
390 vlog_error("Error %d (of %d). Expected %d more than a multple of "
391 "%d, got %d \n",
392 i, pClState->m_numThreads, beforeSize, totSize,
393 targetArr[i]);
394 return -1;
395 }
396 }
397
398 /* log_info("\n");
399 for(i = 0; i < 4; ++i) {
400 log_info("%lx, ", targetArr[i]);
401 }
402 log_info("\n");
403 fflush(stdout); */
404 return 0;
405 }
406