1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17
18
19 #include "harness/conversions.h"
20 #include "harness/typeWrappers.h"
21 #include "harness/testHarness.h"
22
23 #include "structs.h"
24
25 #include "defines.h"
26
27 #include "type_replacer.h"
28
29
get_align(size_t vecSize)30 size_t get_align(size_t vecSize)
31 {
32 if(vecSize == 3)
33 {
34 return 4;
35 }
36 return vecSize;
37 }
38
39 /* // Lots of conditionals means this is not gonna be an optimal min on intel. */
40 /* // That's okay, make sure we only call a few times per test, not for every */
41 /* // element */
42 /* size_t min_of_nonzero(size_t a, size_t b) */
43 /* { */
44 /* if(a != 0 && (a<=b || b==0)) */
45 /* { */
46 /* return a; */
47 /* } */
48 /* if(b != 0 && (b<a || a==0)) */
49 /* { */
50 /* return b; */
51 /* } */
52 /* return 0; */
53 /* } */
54
55
56 /* size_t get_min_packed_alignment(size_t preSize, size_t typeMultiplePreSize, */
57 /* size_t postSize, size_t typeMultiplePostSize, */
58 /* ExplicitType kType, size_t vecSize) */
59 /* { */
60 /* size_t pre_min = min_of_nonzero(preSize, */
61 /* typeMultiplePreSize* */
62 /* get_explicit_type_size(kType)); */
63 /* size_t post_min = min_of_nonzero(postSize, */
64 /* typeMultiplePostSize* */
65 /* get_explicit_type_size(kType)); */
66 /* size_t struct_min = min_of_nonzero(pre_min, post_min); */
67 /* size_t result = min_of_nonzero(struct_min, get_align(vecSize) */
68 /* *get_explicit_type_size(kType)); */
69 /* return result; */
70
71 /* } */
72
73
74
test_vec_internal(cl_device_id deviceID,cl_context context,cl_command_queue queue,const char * pattern,const char * testName,size_t bufSize,size_t preSize,size_t typeMultiplePreSize,size_t postSize,size_t typeMultiplePostSize)75 int test_vec_internal(cl_device_id deviceID, cl_context context,
76 cl_command_queue queue, const char * pattern,
77 const char * testName, size_t bufSize,
78 size_t preSize, size_t typeMultiplePreSize,
79 size_t postSize, size_t typeMultiplePostSize)
80 {
81 int err;
82 int typeIdx, vecSizeIdx;
83
84 char tmpBuffer[2048];
85 char srcBuffer[2048];
86
87 size_t preSizeBytes, postSizeBytes, typeSize, totSize;
88
89 clState * pClState = newClState(deviceID, context, queue);
90 bufferStruct * pBuffers =
91 newBufferStruct(bufSize, bufSize*sizeof(cl_uint)/sizeof(cl_char), pClState);
92
93 if(pBuffers == NULL) {
94 destroyClState(pClState);
95 vlog_error("%s : Could not create buffer\n", testName);
96 return -1;
97 }
98
99 for(typeIdx = 0; types[typeIdx] != kNumExplicitTypes; ++typeIdx)
100 {
101
102 // Skip doubles if it is not supported otherwise enable pragma
103 if (types[typeIdx] == kDouble) {
104 if (!is_extension_available(deviceID, "cl_khr_fp64")) {
105 continue;
106 } else {
107 doReplace(tmpBuffer, 2048, pattern,
108 ".PRAGMA.", "#pragma OPENCL EXTENSION cl_khr_fp64: ",
109 ".STATE.", "enable");
110 }
111 } else {
112 if (types[typeIdx] == kLong || types[typeIdx] == kULong) {
113 if (gIsEmbedded)
114 continue;
115 }
116
117 doReplace(tmpBuffer, 2048, pattern,
118 ".PRAGMA.", " ",
119 ".STATE.", " ");
120 }
121
122 typeSize = get_explicit_type_size(types[typeIdx]);
123 preSizeBytes = preSize + typeSize*typeMultiplePreSize;
124 postSizeBytes = postSize + typeSize*typeMultiplePostSize;
125
126
127
128 for(vecSizeIdx = 1; vecSizeIdx < NUM_VECTOR_SIZES; ++vecSizeIdx) {
129
130 totSize = preSizeBytes + postSizeBytes +
131 typeSize*get_align(g_arrVecSizes[vecSizeIdx]);
132
133 doReplace(srcBuffer, 2048, tmpBuffer,
134 ".TYPE.", g_arrTypeNames[typeIdx],
135 ".NUM.", g_arrVecSizeNames[vecSizeIdx]);
136
137 if(srcBuffer[0] == '\0') {
138 vlog_error("%s: failed to fill source buf for type %s%s\n",
139 testName,
140 g_arrTypeNames[typeIdx],
141 g_arrVecSizeNames[vecSizeIdx]);
142 destroyBufferStruct(pBuffers, pClState);
143 destroyClState(pClState);
144 return -1;
145 }
146
147 // log_info("Buffer is \"\n%s\n\"\n", srcBuffer);
148 // fflush(stdout);
149
150 err = clStateMakeProgram(pClState, srcBuffer, testName );
151 if (err) {
152 vlog_error("%s: Error compiling \"\n%s\n\"",
153 testName, srcBuffer);
154 destroyBufferStruct(pBuffers, pClState);
155 destroyClState(pClState);
156 return -1;
157 }
158
159 err = pushArgs(pBuffers, pClState);
160 if(err != 0) {
161 vlog_error("%s: failed to push args %s%s\n",
162 testName,
163 g_arrTypeNames[typeIdx],
164 g_arrVecSizeNames[vecSizeIdx]);
165 destroyBufferStruct(pBuffers, pClState);
166 destroyClState(pClState);
167 return -1;
168 }
169
170 // log_info("About to Run kernel\n"); fflush(stdout);
171 // now we run the kernel
172 err = runKernel(pClState,
173 bufSize/(g_arrVecSizes[vecSizeIdx]* g_arrTypeSizes[typeIdx]));
174 if(err != 0) {
175 vlog_error("%s: runKernel fail (%ld threads) %s%s\n",
176 testName, pClState->m_numThreads,
177 g_arrTypeNames[typeIdx],
178 g_arrVecSizeNames[vecSizeIdx]);
179 destroyBufferStruct(pBuffers, pClState);
180 destroyClState(pClState);
181 return -1;
182 }
183
184 // log_info("About to retrieve results\n"); fflush(stdout);
185 err = retrieveResults(pBuffers, pClState);
186 if(err != 0) {
187 vlog_error("%s: failed to retrieve results %s%s\n",
188 testName,
189 g_arrTypeNames[typeIdx],
190 g_arrVecSizeNames[vecSizeIdx]);
191 destroyBufferStruct(pBuffers, pClState);
192 destroyClState(pClState);
193 return -1;
194 }
195
196
197
198 if(preSizeBytes+postSizeBytes == 0)
199 {
200 // log_info("About to Check Correctness\n"); fflush(stdout);
201 err = checkCorrectness(pBuffers, pClState,
202 get_align(g_arrVecSizes[vecSizeIdx])*
203 typeSize);
204 }
205 else
206 {
207 // we're checking for an aligned struct
208 err = checkPackedCorrectness(pBuffers, pClState, totSize,
209 preSizeBytes);
210 }
211
212 if(err != 0) {
213 vlog_error("%s: incorrect results %s%s\n",
214 testName,
215 g_arrTypeNames[typeIdx],
216 g_arrVecSizeNames[vecSizeIdx]);
217 vlog_error("%s: Source was \"\n%s\n\"",
218 testName, srcBuffer);
219 destroyBufferStruct(pBuffers, pClState);
220 destroyClState(pClState);
221 return -1;
222 }
223
224 clStateDestroyProgramAndKernel(pClState);
225
226 }
227 }
228
229 destroyBufferStruct(pBuffers, pClState);
230
231 destroyClState(pClState);
232
233
234 // vlog_error("%s : implementation incomplete : FAIL\n", testName);
235 return 0; // -1; // fails on account of not being written.
236 }
237
238
239
240 const char * patterns[] = {
241 ".PRAGMA..STATE.\n"
242 "__kernel void test_vec_align_array(.SRC_SCOPE. .TYPE..NUM. *source, .DST_SCOPE. uint *dest)\n"
243 "{\n"
244 " int tid = get_global_id(0);\n"
245 " dest[tid] = (uint)((.SRC_SCOPE. uchar *)(source+tid));\n"
246 "}\n",
247 ".PRAGMA..STATE.\n"
248 "typedef struct myUnpackedStruct { \n"
249 ".PRE."
250 " .TYPE..NUM. vec;\n"
251 ".POST."
252 "} testStruct;\n"
253 "__kernel void test_vec_align_struct(__constant .TYPE..NUM. *source, .DST_SCOPE. uint *dest)\n"
254 "{\n"
255 " .SRC_SCOPE. testStruct test;\n"
256 " int tid = get_global_id(0);\n"
257 " dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec));\n"
258 "}\n",
259 ".PRAGMA..STATE.\n"
260 "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
261 ".PRE."
262 " .TYPE..NUM. vec;\n"
263 ".POST."
264 "} testStruct;\n"
265 "__kernel void test_vec_align_packed_struct(__constant .TYPE..NUM. *source, .DST_SCOPE. uint *dest)\n"
266 "{\n"
267 " .SRC_SCOPE. testStruct test;\n"
268 " int tid = get_global_id(0);\n"
269 " dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec) - (.SRC_SCOPE. uchar *)&test);\n"
270 "}\n",
271 ".PRAGMA..STATE.\n"
272 "typedef struct myStruct { \n"
273 ".PRE."
274 " .TYPE..NUM. vec;\n"
275 ".POST."
276 "} testStruct;\n"
277 "__kernel void test_vec_align_struct_arr(.SRC_SCOPE. testStruct *source, .DST_SCOPE. uint *dest)\n"
278 "{\n"
279 " int tid = get_global_id(0);\n"
280 " dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec));\n"
281 "}\n",
282 ".PRAGMA..STATE.\n"
283 "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
284 ".PRE."
285 " .TYPE..NUM. vec;\n"
286 ".POST."
287 "} testStruct;\n"
288 "__kernel void test_vec_align_packed_struct_arr(.SRC_SCOPE. testStruct *source, .DST_SCOPE. uint *dest)\n"
289 "{\n"
290 " int tid = get_global_id(0);\n"
291 " dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec) - (.SRC_SCOPE. uchar *)&(source[0]));\n"
292 "}\n",
293 // __attribute__ ((packed))
294 };
295
296
297
298 const char * pre_substitution_arr[] = {
299 "",
300 "char c;\n",
301 "short3 s;",
302 ".TYPE.3 tPre;\n",
303 ".TYPE. arrPre[5];\n",
304 ".TYPE. arrPre[12];\n",
305 NULL
306 };
307
308
309 // alignments of everything in pre_substitution_arr as raw alignments
310 // 0 if such a thing is meaningless
311 size_t pre_align_arr[] = {
312 0,
313 sizeof(cl_char),
314 4*sizeof(cl_short),
315 0, // taken care of in type_multiple_pre_align_arr
316 0,
317 0
318 };
319
320 // alignments of everything in pre_substitution_arr as multiples of
321 // sizeof(.TYPE.)
322 // 0 if such a thing is meaningless
323 size_t type_multiple_pre_align_arr[] = {
324 0,
325 0,
326 0,
327 4,
328 5,
329 12
330 };
331
332 const char * post_substitution_arr[] = {
333 "",
334 "char cPost;\n",
335 ".TYPE. arrPost[3];\n",
336 ".TYPE. arrPost[5];\n",
337 ".TYPE.3 arrPost;\n",
338 ".TYPE. arrPost[12];\n",
339 NULL
340 };
341
342
343 // alignments of everything in post_substitution_arr as raw alignments
344 // 0 if such a thing is meaningless
345 size_t post_align_arr[] = {
346 0,
347 sizeof(cl_char),
348 0, // taken care of in type_multiple_post_align_arr
349 0,
350 0,
351 0
352 };
353
354 // alignments of everything in post_substitution_arr as multiples of
355 // sizeof(.TYPE.)
356 // 0 if such a thing is meaningless
357 size_t type_multiple_post_align_arr[] = {
358 0,
359 0,
360 3,
361 5,
362 4,
363 12
364 };
365
366 // there hsould be a packed version of this?
test_vec_align_array(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)367 int test_vec_align_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
368 {
369 char tmp[2048];
370 int result;
371
372 log_info("Testing global\n");
373 doReplace(tmp, (size_t)2048, patterns[0],
374 ".SRC_SCOPE.", "__global",
375 ".DST_SCOPE.", "__global"); //
376 result = test_vec_internal(deviceID, context, queue, tmp,
377 "test_vec_align_array",
378 BUFFER_SIZE, 0, 0, 0, 0);
379 return result;
380 }
381
382
test_vec_align_struct(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)383 int test_vec_align_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
384 {
385 char tmp1[2048], tmp2[2048];
386 int result = 0;
387 int preIdx, postIdx;
388
389 log_info("testing __private\n");
390 doReplace(tmp2, (size_t)2048, patterns[1],
391 ".SRC_SCOPE.", "__private",
392 ".DST_SCOPE.", "__global"); //
393
394 for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
395 for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
396 doReplace(tmp1, (size_t)2048, tmp2,
397 ".PRE.", pre_substitution_arr[preIdx],
398 ".POST.", post_substitution_arr[postIdx]);
399
400 result = test_vec_internal(deviceID, context, queue, tmp1,
401 "test_vec_align_struct",
402 512, 0, 0, 0, 0);
403 if (result != 0) {
404 return result;
405 }
406 }
407 }
408
409 log_info("testing __local\n");
410 doReplace(tmp2, (size_t)2048, patterns[1],
411 ".SRC_SCOPE.", "__local",
412 ".DST_SCOPE.", "__global"); //
413
414 for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
415 for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
416 doReplace(tmp1, (size_t)2048, tmp2,
417 ".PRE.", pre_substitution_arr[preIdx],
418 ".POST.", post_substitution_arr[postIdx]);
419
420 result = test_vec_internal(deviceID, context, queue, tmp1,
421 "test_vec_align_struct",
422 512, 0, 0, 0, 0);
423 if(result != 0) {
424 return result;
425 }
426 }
427 }
428 return 0;
429 }
430
test_vec_align_packed_struct(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)431 int test_vec_align_packed_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
432 {
433 char tmp1[2048], tmp2[2048];
434 int result = 0;
435 int preIdx, postIdx;
436
437
438 log_info("Testing __private\n");
439 doReplace(tmp2, (size_t)2048, patterns[2],
440 ".SRC_SCOPE.", "__private",
441 ".DST_SCOPE.", "__global"); //
442
443 for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
444 for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
445 doReplace(tmp1, (size_t)2048, tmp2,
446 ".PRE.", pre_substitution_arr[preIdx],
447 ".POST.", post_substitution_arr[postIdx]);
448
449 result = test_vec_internal(deviceID, context, queue, tmp1,
450 "test_vec_align_packed_struct",
451 512, pre_align_arr[preIdx],
452 type_multiple_pre_align_arr[preIdx],
453 post_align_arr[postIdx],
454 type_multiple_post_align_arr[postIdx]);
455 if(result != 0) {
456 return result;
457 }
458 }
459 }
460
461 log_info("testing __local\n");
462 doReplace(tmp2, (size_t)2048, patterns[2],
463 ".SRC_SCOPE.", "__local",
464 ".DST_SCOPE.", "__global"); //
465
466 for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
467 for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
468 doReplace(tmp1, (size_t)2048, tmp2,
469 ".PRE.", pre_substitution_arr[preIdx],
470 ".POST.", post_substitution_arr[postIdx]);
471
472 result = test_vec_internal(deviceID, context, queue, tmp1,
473 "test_vec_align_packed_struct",
474 512, pre_align_arr[preIdx],
475 type_multiple_pre_align_arr[preIdx],
476 post_align_arr[postIdx],
477 type_multiple_post_align_arr[postIdx]);
478 if (result != 0) {
479 return result;
480 }
481 }
482 }
483 return 0;
484 }
485
test_vec_align_struct_arr(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)486 int test_vec_align_struct_arr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
487 {
488 char tmp1[2048], tmp2[2048];
489 int result = 0;
490 int preIdx, postIdx;
491
492
493 log_info("testing __global\n");
494 doReplace(tmp2, (size_t)2048, patterns[3],
495 ".SRC_SCOPE.", "__global",
496 ".DST_SCOPE.", "__global"); //
497
498 for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
499 for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
500 doReplace(tmp1, (size_t)2048, tmp2,
501 ".PRE.", pre_substitution_arr[preIdx],
502 ".POST.", post_substitution_arr[postIdx]);
503
504 result = test_vec_internal(deviceID, context, queue, tmp1,
505 "test_vec_align_struct_arr",
506 BUFFER_SIZE, 0, 0, 0, 0);
507 if(result != 0) {
508 return result;
509 }
510 }
511 }
512 return 0;
513 }
514
test_vec_align_packed_struct_arr(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)515 int test_vec_align_packed_struct_arr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
516 {
517 char tmp1[2048], tmp2[2048];
518 int result = 0;
519 int preIdx, postIdx;
520
521
522 log_info("Testing __global\n");
523 doReplace(tmp2, (size_t)2048, patterns[4],
524 ".SRC_SCOPE.", "__global",
525 ".DST_SCOPE.", "__global"); //
526
527 for(preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx) {
528 for(postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx) {
529 doReplace(tmp1, (size_t)2048, tmp2,
530 ".PRE.", pre_substitution_arr[preIdx],
531 ".POST.", post_substitution_arr[postIdx]);
532
533 result = test_vec_internal(deviceID, context, queue, tmp1,
534 "test_vec_align_packed_struct_arr",
535 BUFFER_SIZE, pre_align_arr[preIdx],
536 type_multiple_pre_align_arr[preIdx],
537 post_align_arr[postIdx],
538 type_multiple_post_align_arr[postIdx]);
539 if(result != 0)
540 return result;
541 }
542 }
543 return 0;
544 }
545
546