1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <time.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24
25 #include "procs.h"
26
27 //#define HK_DO_NOT_RUN_SHORT_ASYNC 1
28 //#define HK_DO_NOT_RUN_USHORT_ASYNC 1
29 //#define HK_DO_NOT_RUN_CHAR_ASYNC 1
30 //#define HK_DO_NOT_RUN_UCHAR_ASYNC 1
31
32 #define TEST_PRIME_INT ((1<<16)+1)
33 #define TEST_PRIME_UINT ((1U<<16)+1U)
34 #define TEST_PRIME_LONG ((1LL<<32)+1LL)
35 #define TEST_PRIME_ULONG ((1ULL<<32)+1ULL)
36 #define TEST_PRIME_SHORT ((1S<<8)+1S)
37 #define TEST_PRIME_FLOAT (float)3.40282346638528860e+38
38 #define TEST_PRIME_HALF 119.f
39 #define TEST_BOOL true
40 #define TEST_PRIME_CHAR 0x77
41
42 #ifndef ulong
43 typedef unsigned long ulong;
44 #endif
45
46 #ifndef uchar
47 typedef unsigned char uchar;
48 #endif
49
50 #ifndef TestStruct
51 typedef struct{
52 int a;
53 float b;
54 } TestStruct;
55 #endif
56
57 //--- the code for the kernel executables
58 static const char *buffer_read_int_kernel_code[] = {
59 "__kernel void test_buffer_read_int(__global int *dst)\n"
60 "{\n"
61 " int tid = get_global_id(0);\n"
62 "\n"
63 " dst[tid] = ((1<<16)+1);\n"
64 "}\n",
65
66 "__kernel void test_buffer_read_int2(__global int2 *dst)\n"
67 "{\n"
68 " int tid = get_global_id(0);\n"
69 "\n"
70 " dst[tid] = ((1<<16)+1);\n"
71 "}\n",
72
73 "__kernel void test_buffer_read_int4(__global int4 *dst)\n"
74 "{\n"
75 " int tid = get_global_id(0);\n"
76 "\n"
77 " dst[tid] = ((1<<16)+1);\n"
78 "}\n",
79
80 "__kernel void test_buffer_read_int8(__global int8 *dst)\n"
81 "{\n"
82 " int tid = get_global_id(0);\n"
83 "\n"
84 " dst[tid] = ((1<<16)+1);\n"
85 "}\n",
86
87 "__kernel void test_buffer_read_int16(__global int16 *dst)\n"
88 "{\n"
89 " int tid = get_global_id(0);\n"
90 "\n"
91 " dst[tid] = ((1<<16)+1);\n"
92 "}\n" };
93
94 static const char *int_kernel_name[] = { "test_buffer_read_int", "test_buffer_read_int2", "test_buffer_read_int4", "test_buffer_read_int8", "test_buffer_read_int16" };
95
96 static const char *buffer_read_uint_kernel_code[] = {
97 "__kernel void test_buffer_read_uint(__global uint *dst)\n"
98 "{\n"
99 " int tid = get_global_id(0);\n"
100 "\n"
101 " dst[tid] = ((1U<<16)+1U);\n"
102 "}\n",
103
104 "__kernel void test_buffer_read_uint2(__global uint2 *dst)\n"
105 "{\n"
106 " int tid = get_global_id(0);\n"
107 "\n"
108 " dst[tid] = ((1U<<16)+1U);\n"
109 "}\n",
110
111 "__kernel void test_buffer_read_uint4(__global uint4 *dst)\n"
112 "{\n"
113 " int tid = get_global_id(0);\n"
114 "\n"
115 " dst[tid] = ((1U<<16)+1U);\n"
116 "}\n",
117
118 "__kernel void test_buffer_read_uint8(__global uint8 *dst)\n"
119 "{\n"
120 " int tid = get_global_id(0);\n"
121 "\n"
122 " dst[tid] = ((1U<<16)+1U);\n"
123 "}\n",
124
125 "__kernel void test_buffer_read_uint16(__global uint16 *dst)\n"
126 "{\n"
127 " int tid = get_global_id(0);\n"
128 "\n"
129 " dst[tid] = ((1U<<16)+1U);\n"
130 "}\n" };
131
132 static const char *uint_kernel_name[] = { "test_buffer_read_uint", "test_buffer_read_uint2", "test_buffer_read_uint4", "test_buffer_read_uint8", "test_buffer_read_uint16" };
133
134 static const char *buffer_read_long_kernel_code[] = {
135 "__kernel void test_buffer_read_long(__global long *dst)\n"
136 "{\n"
137 " int tid = get_global_id(0);\n"
138 "\n"
139 " dst[tid] = ((1L<<32)+1L);\n"
140 "}\n",
141
142 "__kernel void test_buffer_read_long2(__global long2 *dst)\n"
143 "{\n"
144 " int tid = get_global_id(0);\n"
145 "\n"
146 " dst[tid] = ((1L<<32)+1L);\n"
147 "}\n",
148
149 "__kernel void test_buffer_read_long4(__global long4 *dst)\n"
150 "{\n"
151 " int tid = get_global_id(0);\n"
152 "\n"
153 " dst[tid] = ((1L<<32)+1L);\n"
154 "}\n",
155
156 "__kernel void test_buffer_read_long8(__global long8 *dst)\n"
157 "{\n"
158 " int tid = get_global_id(0);\n"
159 "\n"
160 " dst[tid] = ((1L<<32)+1L);\n"
161 "}\n",
162
163 "__kernel void test_buffer_read_long16(__global long16 *dst)\n"
164 "{\n"
165 " int tid = get_global_id(0);\n"
166 "\n"
167 " dst[tid] = ((1L<<32)+1L);\n"
168 "}\n" };
169
170 static const char *long_kernel_name[] = { "test_buffer_read_long", "test_buffer_read_long2", "test_buffer_read_long4", "test_buffer_read_long8", "test_buffer_read_long16" };
171
172 static const char *buffer_read_ulong_kernel_code[] = {
173 "__kernel void test_buffer_read_ulong(__global ulong *dst)\n"
174 "{\n"
175 " int tid = get_global_id(0);\n"
176 "\n"
177 " dst[tid] = ((1UL<<32)+1UL);\n"
178 "}\n",
179
180 "__kernel void test_buffer_read_ulong2(__global ulong2 *dst)\n"
181 "{\n"
182 " int tid = get_global_id(0);\n"
183 "\n"
184 " dst[tid] = ((1UL<<32)+1UL);\n"
185 "}\n",
186
187 "__kernel void test_buffer_read_ulong4(__global ulong4 *dst)\n"
188 "{\n"
189 " int tid = get_global_id(0);\n"
190 "\n"
191 " dst[tid] = ((1UL<<32)+1UL);\n"
192 "}\n",
193
194 "__kernel void test_buffer_read_ulong8(__global ulong8 *dst)\n"
195 "{\n"
196 " int tid = get_global_id(0);\n"
197 "\n"
198 " dst[tid] = ((1UL<<32)+1UL);\n"
199 "}\n",
200
201 "__kernel void test_buffer_read_ulong16(__global ulong16 *dst)\n"
202 "{\n"
203 " int tid = get_global_id(0);\n"
204 "\n"
205 " dst[tid] = ((1UL<<32)+1UL);\n"
206 "}\n" };
207
208 static const char *ulong_kernel_name[] = { "test_buffer_read_ulong", "test_buffer_read_ulong2", "test_buffer_read_ulong4", "test_buffer_read_ulong8", "test_buffer_read_ulong16" };
209
210 static const char *buffer_read_short_kernel_code[] = {
211 "__kernel void test_buffer_read_short(__global short *dst)\n"
212 "{\n"
213 " int tid = get_global_id(0);\n"
214 "\n"
215 " dst[tid] = (short)((1<<8)+1);\n"
216 "}\n",
217
218 "__kernel void test_buffer_read_short2(__global short2 *dst)\n"
219 "{\n"
220 " int tid = get_global_id(0);\n"
221 "\n"
222 " dst[tid] = (short)((1<<8)+1);\n"
223 "}\n",
224
225 "__kernel void test_buffer_read_short4(__global short4 *dst)\n"
226 "{\n"
227 " int tid = get_global_id(0);\n"
228 "\n"
229 " dst[tid] = (short)((1<<8)+1);\n"
230 "}\n",
231
232 "__kernel void test_buffer_read_short8(__global short8 *dst)\n"
233 "{\n"
234 " int tid = get_global_id(0);\n"
235 "\n"
236 " dst[tid] = (short)((1<<8)+1);\n"
237 "}\n",
238
239 "__kernel void test_buffer_read_short16(__global short16 *dst)\n"
240 "{\n"
241 " int tid = get_global_id(0);\n"
242 "\n"
243 " dst[tid] = (short)((1<<8)+1);\n"
244 "}\n" };
245
246 static const char *short_kernel_name[] = { "test_buffer_read_short", "test_buffer_read_short2", "test_buffer_read_short4", "test_buffer_read_short8", "test_buffer_read_short16" };
247
248
249 static const char *buffer_read_ushort_kernel_code[] = {
250 "__kernel void test_buffer_read_ushort(__global ushort *dst)\n"
251 "{\n"
252 " int tid = get_global_id(0);\n"
253 "\n"
254 " dst[tid] = (ushort)((1<<8)+1);\n"
255 "}\n",
256
257 "__kernel void test_buffer_read_ushort2(__global ushort2 *dst)\n"
258 "{\n"
259 " int tid = get_global_id(0);\n"
260 "\n"
261 " dst[tid] = (ushort)((1<<8)+1);\n"
262 "}\n",
263
264 "__kernel void test_buffer_read_ushort4(__global ushort4 *dst)\n"
265 "{\n"
266 " int tid = get_global_id(0);\n"
267 "\n"
268 " dst[tid] = (ushort)((1<<8)+1);\n"
269 "}\n",
270
271 "__kernel void test_buffer_read_ushort8(__global ushort8 *dst)\n"
272 "{\n"
273 " int tid = get_global_id(0);\n"
274 "\n"
275 " dst[tid] = (ushort)((1<<8)+1);\n"
276 "}\n",
277
278 "__kernel void test_buffer_read_ushort16(__global ushort16 *dst)\n"
279 "{\n"
280 " int tid = get_global_id(0);\n"
281 "\n"
282 " dst[tid] = (ushort)((1<<8)+1);\n"
283 "}\n" };
284
285 static const char *ushort_kernel_name[] = { "test_buffer_read_ushort", "test_buffer_read_ushort2", "test_buffer_read_ushort4", "test_buffer_read_ushort8", "test_buffer_read_ushort16" };
286
287
288 static const char *buffer_read_float_kernel_code[] = {
289 "__kernel void test_buffer_read_float(__global float *dst)\n"
290 "{\n"
291 " int tid = get_global_id(0);\n"
292 "\n"
293 " dst[tid] = (float)3.40282346638528860e+38;\n"
294 "}\n",
295
296 "__kernel void test_buffer_read_float2(__global float2 *dst)\n"
297 "{\n"
298 " int tid = get_global_id(0);\n"
299 "\n"
300 " dst[tid] = (float)3.40282346638528860e+38;\n"
301 "}\n",
302
303 "__kernel void test_buffer_read_float4(__global float4 *dst)\n"
304 "{\n"
305 " int tid = get_global_id(0);\n"
306 "\n"
307 " dst[tid] = (float)3.40282346638528860e+38;\n"
308 "}\n",
309
310 "__kernel void test_buffer_read_float8(__global float8 *dst)\n"
311 "{\n"
312 " int tid = get_global_id(0);\n"
313 "\n"
314 " dst[tid] = (float)3.40282346638528860e+38;\n"
315 "}\n",
316
317 "__kernel void test_buffer_read_float16(__global float16 *dst)\n"
318 "{\n"
319 " int tid = get_global_id(0);\n"
320 "\n"
321 " dst[tid] = (float)3.40282346638528860e+38;\n"
322 "}\n" };
323
324 static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffer_read_float2", "test_buffer_read_float4", "test_buffer_read_float8", "test_buffer_read_float16" };
325
326
327 static const char *buffer_read_half_kernel_code[] = {
328 "__kernel void test_buffer_read_half(__global half *dst)\n"
329 "{\n"
330 " int tid = get_global_id(0);\n"
331 "\n"
332 " dst[tid] = (half)119;\n"
333 "}\n",
334
335 "__kernel void test_buffer_read_half2(__global half2 *dst)\n"
336 "{\n"
337 " int tid = get_global_id(0);\n"
338 "\n"
339 " dst[tid] = (half)119;\n"
340 "}\n",
341
342 "__kernel void test_buffer_read_half4(__global half4 *dst)\n"
343 "{\n"
344 " int tid = get_global_id(0);\n"
345 "\n"
346 " dst[tid] = (half)119;\n"
347 "}\n",
348
349 "__kernel void test_buffer_read_half8(__global half8 *dst)\n"
350 "{\n"
351 " int tid = get_global_id(0);\n"
352 "\n"
353 " dst[tid] = (half)119;\n"
354 "}\n",
355
356 "__kernel void test_buffer_read_half16(__global half16 *dst)\n"
357 "{\n"
358 " int tid = get_global_id(0);\n"
359 "\n"
360 " dst[tid] = (half)119;\n"
361 "}\n" };
362
363 static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" };
364
365
366 static const char *buffer_read_char_kernel_code[] = {
367 "__kernel void test_buffer_read_char(__global char *dst)\n"
368 "{\n"
369 " int tid = get_global_id(0);\n"
370 "\n"
371 " dst[tid] = (char)'w';\n"
372 "}\n",
373
374 "__kernel void test_buffer_read_char2(__global char2 *dst)\n"
375 "{\n"
376 " int tid = get_global_id(0);\n"
377 "\n"
378 " dst[tid] = (char)'w';\n"
379 "}\n",
380
381 "__kernel void test_buffer_read_char4(__global char4 *dst)\n"
382 "{\n"
383 " int tid = get_global_id(0);\n"
384 "\n"
385 " dst[tid] = (char)'w';\n"
386 "}\n",
387
388 "__kernel void test_buffer_read_char8(__global char8 *dst)\n"
389 "{\n"
390 " int tid = get_global_id(0);\n"
391 "\n"
392 " dst[tid] = (char)'w';\n"
393 "}\n",
394
395 "__kernel void test_buffer_read_char16(__global char16 *dst)\n"
396 "{\n"
397 " int tid = get_global_id(0);\n"
398 "\n"
399 " dst[tid] = (char)'w';\n"
400 "}\n" };
401
402 static const char *char_kernel_name[] = { "test_buffer_read_char", "test_buffer_read_char2", "test_buffer_read_char4", "test_buffer_read_char8", "test_buffer_read_char16" };
403
404
405 static const char *buffer_read_uchar_kernel_code[] = {
406 "__kernel void test_buffer_read_uchar(__global uchar *dst)\n"
407 "{\n"
408 " int tid = get_global_id(0);\n"
409 "\n"
410 " dst[tid] = 'w';\n"
411 "}\n",
412
413 "__kernel void test_buffer_read_uchar2(__global uchar2 *dst)\n"
414 "{\n"
415 " int tid = get_global_id(0);\n"
416 "\n"
417 " dst[tid] = (uchar)'w';\n"
418 "}\n",
419
420 "__kernel void test_buffer_read_uchar4(__global uchar4 *dst)\n"
421 "{\n"
422 " int tid = get_global_id(0);\n"
423 "\n"
424 " dst[tid] = (uchar)'w';\n"
425 "}\n",
426
427 "__kernel void test_buffer_read_uchar8(__global uchar8 *dst)\n"
428 "{\n"
429 " int tid = get_global_id(0);\n"
430 "\n"
431 " dst[tid] = (uchar)'w';\n"
432 "}\n",
433
434 "__kernel void test_buffer_read_uchar16(__global uchar16 *dst)\n"
435 "{\n"
436 " int tid = get_global_id(0);\n"
437 "\n"
438 " dst[tid] = (uchar)'w';\n"
439 "}\n" };
440
441 static const char *uchar_kernel_name[] = { "test_buffer_read_uchar", "test_buffer_read_uchar2", "test_buffer_read_uchar4", "test_buffer_read_uchar8", "test_buffer_read_uchar16" };
442
443
444 static const char *buffer_read_struct_kernel_code =
445 "typedef struct{\n"
446 "int a;\n"
447 "float b;\n"
448 "} TestStruct;\n"
449 "__kernel void test_buffer_read_struct(__global TestStruct *dst)\n"
450 "{\n"
451 " int tid = get_global_id(0);\n"
452 "\n"
453 " dst[tid].a = ((1<<16)+1);\n"
454 " dst[tid].b = (float)3.40282346638528860e+38;\n"
455 "}\n";
456
457
458 //--- the verify functions
verify_read_int(void * ptr,int n)459 static int verify_read_int(void *ptr, int n)
460 {
461 int i;
462 cl_int *outptr = (cl_int *)ptr;
463
464 for (i=0; i<n; i++){
465 if ( outptr[i] != TEST_PRIME_INT )
466 return -1;
467 }
468
469 return 0;
470 }
471
472
verify_read_uint(void * ptr,int n)473 static int verify_read_uint(void *ptr, int n)
474 {
475 int i;
476 cl_uint *outptr = (cl_uint *)ptr;
477
478 for (i=0; i<n; i++){
479 if ( outptr[i] != TEST_PRIME_UINT )
480 return -1;
481 }
482
483 return 0;
484 }
485
486
verify_read_long(void * ptr,int n)487 static int verify_read_long(void *ptr, int n)
488 {
489 int i;
490 cl_long *outptr = (cl_long *)ptr;
491
492 for (i=0; i<n; i++){
493 if ( outptr[i] != TEST_PRIME_LONG )
494 return -1;
495 }
496
497 return 0;
498 }
499
500
verify_read_ulong(void * ptr,int n)501 static int verify_read_ulong(void *ptr, int n)
502 {
503 int i;
504 cl_ulong *outptr = (cl_ulong *)ptr;
505
506 for (i=0; i<n; i++){
507 if ( outptr[i] != TEST_PRIME_ULONG )
508 return -1;
509 }
510
511 return 0;
512 }
513
514
verify_read_short(void * ptr,int n)515 static int verify_read_short(void *ptr, int n)
516 {
517 int i;
518 cl_short *outptr = (cl_short *)ptr;
519
520 for (i=0; i<n; i++){
521 if ( outptr[i] != (cl_short)((1<<8)+1) )
522 return -1;
523 }
524
525 return 0;
526 }
527
528
verify_read_ushort(void * ptr,int n)529 static int verify_read_ushort(void *ptr, int n)
530 {
531 int i;
532 cl_ushort *outptr = (cl_ushort *)ptr;
533
534 for (i=0; i<n; i++){
535 if ( outptr[i] != (cl_ushort)((1<<8)+1) )
536 return -1;
537 }
538
539 return 0;
540 }
541
542
verify_read_float(void * ptr,int n)543 static int verify_read_float( void *ptr, int n )
544 {
545 int i;
546 cl_float *outptr = (cl_float *)ptr;
547
548 for (i=0; i<n; i++){
549 if ( outptr[i] != TEST_PRIME_FLOAT )
550 return -1;
551 }
552
553 return 0;
554 }
555
556
verify_read_half(void * ptr,int n)557 static int verify_read_half( void *ptr, int n )
558 {
559 int i;
560 float *outptr = (float *)ptr; // FIXME: should this be cl_half_float?
561
562 for ( i = 0; i < n / 2; i++ ){
563 if ( outptr[i] != TEST_PRIME_HALF )
564 return -1;
565 }
566
567 return 0;
568 }
569
570
verify_read_char(void * ptr,int n)571 static int verify_read_char(void *ptr, int n)
572 {
573 int i;
574 cl_char *outptr = (cl_char *)ptr;
575
576 for (i=0; i<n; i++){
577 if ( outptr[i] != TEST_PRIME_CHAR )
578 return -1;
579 }
580
581 return 0;
582 }
583
584
verify_read_uchar(void * ptr,int n)585 static int verify_read_uchar(void *ptr, int n)
586 {
587 int i;
588 cl_uchar *outptr = (cl_uchar *)ptr;
589
590 for (i=0; i<n; i++){
591 if ( outptr[i] != TEST_PRIME_CHAR )
592 return -1;
593 }
594
595 return 0;
596 }
597
598
verify_read_struct(TestStruct * outptr,int n)599 static int verify_read_struct(TestStruct *outptr, int n)
600 {
601 int i;
602
603 for (i=0; i<n; i++)
604 {
605 if ( ( outptr[i].a != TEST_PRIME_INT ) ||
606 ( outptr[i].b != TEST_PRIME_FLOAT ) )
607 return -1;
608 }
609
610 return 0;
611 }
612
613 //----- the test functions
test_buffer_read(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))614 int test_buffer_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
615 const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
616 {
617 cl_mem buffers[5];
618 void *outptr[5];
619 void *inptr[5];
620 cl_program program[5];
621 cl_kernel kernel[5];
622 size_t global_work_size[3];
623 #ifdef USE_LOCAL_WORK_GROUP
624 size_t local_work_size[3];
625 #endif
626 cl_int err;
627 int i;
628 size_t ptrSizes[5];
629 int src_flag_id;
630 int total_errors = 0;
631
632 size_t min_alignment = get_min_alignment(context);
633
634 global_work_size[0] = (cl_uint)num_elements;
635
636 ptrSizes[0] = size;
637 ptrSizes[1] = ptrSizes[0] << 1;
638 ptrSizes[2] = ptrSizes[1] << 1;
639 ptrSizes[3] = ptrSizes[2] << 1;
640 ptrSizes[4] = ptrSizes[3] << 1;
641
642 //skip devices that don't support long
643 if (! gHasLong && strstr(type,"long") )
644 {
645 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
646 return CL_SUCCESS;
647 }
648
649 for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
650 log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
651
652 for ( i = 0; i < loops; i++ ){
653 outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
654 if ( ! outptr[i] ){
655 log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
656 return -1;
657 }
658 inptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
659 if ( ! inptr[i] ){
660 log_error( " unable to allocate %d bytes for inptr\n", (int)( ptrSizes[i] * num_elements ) );
661 return -1;
662 }
663
664
665 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
666 buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, inptr[i], &err);
667 else
668 buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, NULL, &err);
669 if ( err != CL_SUCCESS ){
670 print_error(err, " clCreateBuffer failed\n" );
671 align_free( outptr[i] );
672 align_free( inptr[i] );
673 return -1;
674 }
675
676 err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
677 if ( err ){
678 log_error("Creating program for %s\n", type);
679 print_error(err, " Error creating program " );
680 clReleaseMemObject(buffers[i]);
681 align_free( outptr[i] );
682 align_free( inptr[i] );
683 return -1;
684 }
685
686 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
687 if ( err != CL_SUCCESS ){
688 print_error( err, "clSetKernelArg failed" );
689 clReleaseMemObject( buffers[i] );
690 clReleaseKernel( kernel[i] );
691 clReleaseProgram( program[i] );
692 align_free( outptr[i] );
693 align_free( inptr[i] );
694 return -1;
695 }
696
697 #ifdef USE_LOCAL_WORK_GROUP
698 err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
699 test_error( err, "Unable to get work group size to use" );
700
701 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
702 #else
703 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
704 #endif
705 if ( err != CL_SUCCESS ){
706 print_error( err, "clEnqueueNDRangeKernel failed" );
707 clReleaseMemObject( buffers[i] );
708 clReleaseKernel( kernel[i] );
709 clReleaseProgram( program[i] );
710 align_free( outptr[i] );
711 align_free( inptr[i] );
712 return -1;
713 }
714
715 err = clEnqueueReadBuffer( queue, buffers[i], CL_TRUE, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
716 if ( err != CL_SUCCESS ){
717 print_error( err, "clEnqueueReadBuffer failed" );
718 clReleaseMemObject( buffers[i] );
719 clReleaseKernel( kernel[i] );
720 clReleaseProgram( program[i] );
721 align_free( outptr[i] );
722 align_free( inptr[i] );
723 return -1;
724 }
725
726 if (fn(outptr[i], num_elements*(1<<i))){
727 log_error( " %s%d test failed\n", type, 1<<i );
728 total_errors++;
729 }
730 else{
731 log_info( " %s%d test passed\n", type, 1<<i );
732 }
733
734 err = clEnqueueReadBuffer( queue, buffers[i], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL );
735 if ( err != CL_SUCCESS ){
736 print_error( err, "clEnqueueReadBuffer failed" );
737 clReleaseMemObject( buffers[i] );
738 clReleaseKernel( kernel[i] );
739 clReleaseProgram( program[i] );
740 align_free( outptr[i] );
741 align_free( inptr[i] );
742 return -1;
743 }
744
745 if (fn(inptr[i], num_elements*(1<<i))){
746 log_error( " %s%d test failed in-place readback\n", type, 1<<i );
747 total_errors++;
748 }
749 else{
750 log_info( " %s%d test passed in-place readback\n", type, 1<<i );
751 }
752
753
754 // cleanup
755 clReleaseMemObject( buffers[i] );
756 clReleaseKernel( kernel[i] );
757 clReleaseProgram( program[i] );
758 align_free( outptr[i] );
759 align_free( inptr[i] );
760 }
761 } // mem flag
762
763 return total_errors;
764
765 } // end test_buffer_read()
766
test_buffer_read_async(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))767 int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
768 const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
769 {
770 cl_mem buffers[5];
771 cl_program program[5];
772 cl_kernel kernel[5];
773 cl_event event;
774 void *outptr[5];
775 void *inptr[5];
776 size_t global_work_size[3];
777 #ifdef USE_LOCAL_WORK_GROUP
778 size_t local_work_size[3];
779 #endif
780 cl_int err;
781 int i;
782 size_t lastIndex;
783 size_t ptrSizes[5];
784 int src_flag_id;
785 int total_errors = 0;
786
787 size_t min_alignment = get_min_alignment(context);
788
789 global_work_size[0] = (cl_uint)num_elements;
790
791 ptrSizes[0] = size;
792 ptrSizes[1] = ptrSizes[0] << 1;
793 ptrSizes[2] = ptrSizes[1] << 1;
794 ptrSizes[3] = ptrSizes[2] << 1;
795 ptrSizes[4] = ptrSizes[3] << 1;
796
797 //skip devices that don't support long
798 if (! gHasLong && strstr(type,"long") )
799 {
800 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
801 return CL_SUCCESS;
802 }
803
804 for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
805 log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
806
807 for ( i = 0; i < loops; i++ ){
808 outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
809 if ( ! outptr[i] ){
810 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
811 return -1;
812 }
813 memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
814 inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
815 if ( ! inptr[i] ){
816 log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
817 return -1;
818 }
819 memset( inptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
820
821
822 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
823 buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, inptr[i], &err);
824 else
825 buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, NULL, &err);
826 if ( err != CL_SUCCESS ){
827 print_error(err, " clCreateBuffer failed\n" );
828 align_free( outptr[i] );
829 align_free( inptr[i] );
830 return -1;
831 }
832
833 err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i]);
834 if ( err ){
835 log_error( " Error creating program for %s\n", type );
836 clReleaseMemObject( buffers[i] );
837 align_free( outptr[i] );
838 align_free( inptr[i] );
839 return -1;
840 }
841
842 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
843 if ( err != CL_SUCCESS ){
844 print_error( err, "clSetKernelArg failed" );
845 clReleaseMemObject( buffers[i] );
846 clReleaseKernel( kernel[i] );
847 clReleaseProgram( program[i] );
848 align_free( outptr[i] );
849 align_free( inptr[i] );
850 return -1;
851 }
852
853 #ifdef USE_LOCAL_WORK_GROUP
854 err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
855 test_error( err, "Unable to get work group size to use" );
856
857 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
858 #else
859 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
860 #endif
861 if ( err != CL_SUCCESS ){
862 print_error( err, "clEnqueueNDRangeKernel failed" );
863 clReleaseMemObject( buffers[i] );
864 clReleaseKernel( kernel[i] );
865 clReleaseProgram( program[i] );
866 align_free( outptr[i] );
867 align_free( inptr[i] );
868 return -1;
869 }
870
871 lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
872 err = clEnqueueReadBuffer( queue, buffers[i], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &event );
873 #ifdef CHECK_FOR_NON_WAIT
874 if ( ((uchar *)outptr[i])[lastIndex] ){
875 log_error( " clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
876 log_error( " Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
877 }
878 #endif
879 if ( err != CL_SUCCESS ){
880 print_error( err, "clEnqueueReadBuffer failed" );
881 clReleaseMemObject( buffers[i] );
882 clReleaseKernel( kernel[i] );
883 clReleaseProgram( program[i] );
884 align_free( outptr[i] );
885 align_free( inptr[i] );
886 return -1;
887 }
888 err = clWaitForEvents(1, &event );
889 if ( err != CL_SUCCESS ){
890 print_error( err, "clWaitForEvents() failed" );
891 clReleaseMemObject( buffers[i] );
892 clReleaseKernel( kernel[i] );
893 clReleaseProgram( program[i] );
894 align_free( outptr[i] );
895 align_free( inptr[i] );
896 return -1;
897 }
898
899 if ( fn(outptr[i], num_elements*(1<<i)) ){
900 log_error( " %s%d test failed\n", type, 1<<i );
901 total_errors++;
902 }
903 else{
904 log_info( " %s%d test passed\n", type, 1<<i );
905 }
906
907 // cleanup
908 clReleaseEvent( event );
909 clReleaseMemObject( buffers[i] );
910 clReleaseKernel( kernel[i] );
911 clReleaseProgram( program[i] );
912 align_free( outptr[i] );
913 align_free( inptr[i] );
914 }
915 } // mem flags
916
917
918 return total_errors;
919
920 } // end test_buffer_read_array_async()
921
922
test_buffer_read_array_barrier(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))923 int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
924 const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
925 {
926 cl_mem buffers[5];
927 cl_program program[5];
928 cl_kernel kernel[5];
929 cl_event event;
930 void *outptr[5], *inptr[5];
931 size_t global_work_size[3];
932 #ifdef USE_LOCAL_WORK_GROUP
933 size_t local_work_size[3];
934 #endif
935 cl_int err;
936 int i;
937 size_t lastIndex;
938 size_t ptrSizes[5];
939 int src_flag_id;
940 int total_errors = 0;
941
942 size_t min_alignment = get_min_alignment(context);
943
944 global_work_size[0] = (cl_uint)num_elements;
945
946 ptrSizes[0] = size;
947 ptrSizes[1] = ptrSizes[0] << 1;
948 ptrSizes[2] = ptrSizes[1] << 1;
949 ptrSizes[3] = ptrSizes[2] << 1;
950 ptrSizes[4] = ptrSizes[3] << 1;
951
952 //skip devices that don't support long
953 if (! gHasLong && strstr(type,"long") )
954 {
955 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
956 return CL_SUCCESS;
957 }
958
959 for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
960 log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
961
962 for ( i = 0; i < loops; i++ ){
963 outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
964 if ( ! outptr[i] ){
965 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
966 return -1;
967 }
968 memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
969 inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
970 if ( ! inptr[i] ){
971 log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
972 return -1;
973 }
974 memset( inptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
975
976 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
977 buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, inptr[i], &err);
978 else
979 buffers[i] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, NULL, &err);
980 if ( err != CL_SUCCESS ){
981 print_error(err, " clCreateBuffer failed\n" );
982 align_free( outptr[i] );
983 align_free( inptr[i] );
984 return -1;
985 }
986
987 err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
988 if ( err ){
989 log_error( " Error creating program for %s\n", type );
990 clReleaseMemObject( buffers[i] );
991 align_free( outptr[i] );
992 align_free( inptr[i] );
993 return -1;
994 }
995
996 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
997 if ( err != CL_SUCCESS ){
998 print_error( err, "clSetKernelArgs failed" );
999 clReleaseMemObject( buffers[i] );
1000 clReleaseKernel( kernel[i] );
1001 clReleaseProgram( program[i] );
1002 align_free( outptr[i] );
1003 align_free( inptr[i] );
1004 return -1;
1005 }
1006
1007 #ifdef USE_LOCAL_WORK_GROUP
1008 err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
1009 test_error( err, "Unable to get work group size to use" );
1010
1011 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
1012 #else
1013 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1014 #endif
1015 if ( err != CL_SUCCESS ){
1016 print_error( err, "clEnqueueNDRangeKernel failed" );
1017 clReleaseMemObject( buffers[i] );
1018 clReleaseKernel( kernel[i] );
1019 clReleaseProgram( program[i] );
1020 align_free( outptr[i] );
1021 align_free( inptr[i] );
1022 return -1;
1023 }
1024
1025 lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
1026 err = clEnqueueReadBuffer( queue, buffers[i], false, 0, ptrSizes[i]*num_elements, (void *)(outptr[i]), 0, NULL, &event );
1027 #ifdef CHECK_FOR_NON_WAIT
1028 if ( ((uchar *)outptr[i])[lastIndex] ){
1029 log_error( " clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
1030 log_error( " Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
1031 }
1032 #endif
1033 if ( err != CL_SUCCESS ){
1034 print_error( err, "clEnqueueReadBuffer failed" );
1035 clReleaseMemObject( buffers[i] );
1036 clReleaseKernel( kernel[i] );
1037 clReleaseProgram( program[i] );
1038 align_free( outptr[i] );
1039 align_free( inptr[i] );
1040 return -1;
1041 }
1042 err = clEnqueueBarrierWithWaitList(queue, 0, NULL, NULL);
1043 if ( err != CL_SUCCESS ){
1044 print_error( err, "clEnqueueBarrierWithWaitList() failed" );
1045 clReleaseMemObject( buffers[i] );
1046 clReleaseKernel( kernel[i] );
1047 clReleaseProgram( program[i] );
1048 align_free( outptr[i] );
1049 return -1;
1050 }
1051
1052 err = clWaitForEvents(1, &event);
1053 if ( err != CL_SUCCESS ){
1054 print_error( err, "clWaitForEvents() failed" );
1055 clReleaseMemObject( buffers[i] );
1056 clReleaseKernel( kernel[i] );
1057 clReleaseProgram( program[i] );
1058 align_free( outptr[i] );
1059 align_free( inptr[i] );
1060 return -1;
1061 }
1062
1063 if ( fn(outptr[i], num_elements*(1<<i)) ){
1064 log_error(" %s%d test failed\n", type, 1<<i);
1065 total_errors++;
1066 }
1067 else{
1068 log_info(" %s%d test passed\n", type, 1<<i);
1069 }
1070
1071 // cleanup
1072 clReleaseEvent( event );
1073 clReleaseMemObject( buffers[i] );
1074 clReleaseKernel( kernel[i] );
1075 clReleaseProgram( program[i] );
1076 align_free( outptr[i] );
1077 align_free( inptr[i] );
1078 }
1079 } // cl_mem flags
1080 return total_errors;
1081
1082 } // end test_buffer_read_array_barrier()
1083
1084
1085 #define DECLARE_READ_TEST(type, realType) \
1086 int test_buffer_read_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) \
1087 { \
1088 return test_buffer_read( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1089 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1090 }
1091
DECLARE_READ_TEST(int,cl_int)1092 DECLARE_READ_TEST(int, cl_int)
1093 DECLARE_READ_TEST(uint, cl_uint)
1094 DECLARE_READ_TEST(long, cl_long)
1095 DECLARE_READ_TEST(ulong, cl_ulong)
1096 DECLARE_READ_TEST(short, cl_short)
1097 DECLARE_READ_TEST(ushort, cl_ushort)
1098 DECLARE_READ_TEST(float, cl_float)
1099 DECLARE_READ_TEST(char, cl_char)
1100 DECLARE_READ_TEST(uchar, cl_uchar)
1101
1102 int test_buffer_half_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1103 {
1104 return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5,
1105 buffer_read_half_kernel_code, half_kernel_name, verify_read_half );
1106 }
1107
1108
1109 #define DECLARE_ASYNC_TEST(type, realType) \
1110 int test_buffer_read_async_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) \
1111 { \
1112 return test_buffer_read_async( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1113 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1114 }
1115
DECLARE_ASYNC_TEST(char,cl_char)1116 DECLARE_ASYNC_TEST(char, cl_char)
1117 DECLARE_ASYNC_TEST(uchar, cl_uchar)
1118 DECLARE_ASYNC_TEST(short, cl_short)
1119 DECLARE_ASYNC_TEST(ushort, cl_ushort)
1120 DECLARE_ASYNC_TEST(int, cl_int)
1121 DECLARE_ASYNC_TEST(uint, cl_uint)
1122 DECLARE_ASYNC_TEST(long, cl_long)
1123 DECLARE_ASYNC_TEST(ulong, cl_ulong)
1124 DECLARE_ASYNC_TEST(float, cl_float)
1125
1126
1127 #define DECLARE_BARRIER_TEST(type, realType) \
1128 int test_buffer_read_array_barrier_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) \
1129 { \
1130 return test_buffer_read_array_barrier( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1131 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1132 }
1133
1134 DECLARE_BARRIER_TEST(int, cl_int)
1135 DECLARE_BARRIER_TEST(uint, cl_uint)
1136 DECLARE_BARRIER_TEST(long, cl_long)
1137 DECLARE_BARRIER_TEST(ulong, cl_ulong)
1138 DECLARE_BARRIER_TEST(short, cl_short)
1139 DECLARE_BARRIER_TEST(ushort, cl_ushort)
1140 DECLARE_BARRIER_TEST(char, cl_char)
1141 DECLARE_BARRIER_TEST(uchar, cl_uchar)
1142 DECLARE_BARRIER_TEST(float, cl_float)
1143
1144 /*
1145 int test_buffer_half_read(cl_device_group device, cl_device id, cl_context context, int num_elements)
1146 {
1147 cl_mem buffers[1];
1148 float *outptr;
1149 cl_program program[1];
1150 cl_kernel kernel[1];
1151 void *values[1];
1152 size_t sizes[1] = { sizeof(cl_buffer) };
1153 uint threads[1];
1154 int err;
1155 int i;
1156 size_t ptrSize; // sizeof(half)
1157
1158 ptrSize = sizeof(cl_float)/2;
1159 outptr = (float *)malloc(ptrSize * num_elements);
1160 buffers[0] = clCreateBuffer(device, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSize * num_elements, NULL);
1161 if( !buffers[0] ){
1162 log_error("clCreateBuffer failed\n");
1163 return -1;
1164 }
1165
1166 err = create_program_and_kernel(device, buffer_read_half_kernel_code, "test_buffer_read_half", &program[0], &kernel[0]);
1167 if( err ){
1168 log_error( " Error creating program for half\n" );
1169 clReleaseMemObject(buffers[0]);
1170 free( (void *)outptr );
1171 return -1;
1172 }
1173
1174 values[0] = buffers[0];
1175 err = clSetKernelArgs(context, kernel[0], 1, NULL, &(values[i]), sizes);
1176 if( err != CL_SUCCESS ){
1177 log_error("clSetKernelArgs failed\n");
1178 return -1;
1179 }
1180
1181 global_work_size[0] = (cl_uint)num_elements;
1182 err = clEnqueueNDRangeKernel(queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
1183 if( err != CL_SUCCESS ){
1184 log_error("clEnqueueNDRangeKernel failed\n");
1185 return -1;
1186 }
1187
1188 err = clEnqueueReadBuffer( queue, buffers[0], true, 0, ptrSize*num_elements, (void *)outptr, 0, NULL, NULL );
1189 if( err != CL_SUCCESS ){
1190 log_error("clEnqueueReadBuffer failed: %d\n", err);
1191 return -1;
1192 }
1193
1194 if( verify_read_half( outptr, num_elements >> 1 ) ){
1195 log_error( "buffer_READ half test failed\n" );
1196 err = -1;
1197 }
1198 else{
1199 log_info( "buffer_READ half test passed\n" );
1200 err = 0;
1201 }
1202
1203 // cleanup
1204 clReleaseMemObject( buffers[0] );
1205 clReleaseKernel( kernel[0] );
1206 clReleaseProgram( program[0] );
1207 free( (void *)outptr );
1208
1209 return err;
1210
1211 } // end test_buffer_half_read()
1212 */
1213
1214 int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1215 {
1216 cl_mem buffers[1];
1217 TestStruct *output_ptr;
1218 cl_program program[1];
1219 cl_kernel kernel[1];
1220 size_t global_work_size[3];
1221 #ifdef USE_LOCAL_WORK_GROUP
1222 size_t local_work_size[3];
1223 #endif
1224 cl_int err;
1225 size_t objSize = sizeof(TestStruct);
1226
1227 size_t min_alignment = get_min_alignment(context);
1228
1229 global_work_size[0] = (cl_uint)num_elements;
1230
1231 output_ptr = (TestStruct*)align_malloc(objSize * num_elements, min_alignment);
1232 if ( ! output_ptr ){
1233 log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
1234 return -1;
1235 }
1236 buffers[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), objSize * num_elements, NULL , &err);
1237 if ( err != CL_SUCCESS ){
1238 print_error( err, " clCreateBuffer failed\n" );
1239 align_free( output_ptr );
1240 return -1;
1241 }
1242
1243 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &buffer_read_struct_kernel_code, "test_buffer_read_struct" );
1244 if ( err ){
1245 clReleaseProgram( program[0] );
1246 align_free( output_ptr );
1247 return -1;
1248 }
1249
1250 err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
1251 if ( err != CL_SUCCESS){
1252 print_error( err, "clSetKernelArg failed" );
1253 clReleaseMemObject( buffers[0] );
1254 clReleaseKernel( kernel[0] );
1255 clReleaseProgram( program[0] );
1256 align_free( output_ptr );
1257 return -1;
1258 }
1259
1260 #ifdef USE_LOCAL_WORK_GROUP
1261 err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
1262 test_error( err, "Unable to get work group size to use" );
1263
1264 err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
1265 #else
1266 err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1267 #endif
1268 if ( err != CL_SUCCESS ){
1269 print_error( err, "clEnqueueNDRangeKernel failed" );
1270 clReleaseMemObject( buffers[0] );
1271 clReleaseKernel( kernel[0] );
1272 clReleaseProgram( program[0] );
1273 align_free( output_ptr );
1274 return -1;
1275 }
1276
1277 err = clEnqueueReadBuffer( queue, buffers[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
1278 if ( err != CL_SUCCESS){
1279 print_error( err, "clEnqueueReadBuffer failed" );
1280 clReleaseMemObject( buffers[0] );
1281 clReleaseKernel( kernel[0] );
1282 clReleaseProgram( program[0] );
1283 align_free( output_ptr );
1284 return -1;
1285 }
1286
1287 if (verify_read_struct(output_ptr, num_elements)){
1288 log_error(" struct test failed\n");
1289 err = -1;
1290 }
1291 else{
1292 log_info(" struct test passed\n");
1293 err = 0;
1294 }
1295
1296 // cleanup
1297 clReleaseMemObject( buffers[0] );
1298 clReleaseKernel( kernel[0] );
1299 clReleaseProgram( program[0] );
1300 align_free( output_ptr );
1301
1302 return err;
1303 }
1304
1305
testRandomReadSize(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,cl_uint startOfRead,size_t sizeOfRead)1306 static int testRandomReadSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_uint startOfRead, size_t sizeOfRead )
1307 {
1308 cl_mem buffers[3];
1309 int *outptr[3];
1310 cl_program program[3];
1311 cl_kernel kernel[3];
1312 size_t global_work_size[3];
1313 #ifdef USE_LOCAL_WORK_GROUP
1314 size_t local_work_size[3];
1315 #endif
1316 cl_int err;
1317 int i, j;
1318 size_t ptrSizes[3]; // sizeof(int), sizeof(int2), sizeof(int4)
1319 int total_errors = 0;
1320 size_t min_alignment = get_min_alignment(context);
1321
1322 global_work_size[0] = (cl_uint)num_elements;
1323
1324 ptrSizes[0] = sizeof(cl_int);
1325 ptrSizes[1] = ptrSizes[0] << 1;
1326 ptrSizes[2] = ptrSizes[1] << 1;
1327 for ( i = 0; i < 3; i++ ){
1328 outptr[i] = (int *)align_malloc( ptrSizes[i] * num_elements, min_alignment);
1329 if ( ! outptr[i] ){
1330 log_error( " Unable to allocate %d bytes for outptr[%d]\n", (int)(ptrSizes[i] * num_elements), i );
1331 for ( j = 0; j < i; j++ ){
1332 clReleaseMemObject( buffers[j] );
1333 align_free( outptr[j] );
1334 }
1335 return -1;
1336 }
1337 buffers[i] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSizes[i] * num_elements, NULL, &err);
1338 if ( err != CL_SUCCESS ){
1339 print_error(err, " clCreateBuffer failed\n" );
1340 for ( j = 0; j < i; j++ ){
1341 clReleaseMemObject( buffers[j] );
1342 align_free( outptr[j] );
1343 }
1344 align_free( outptr[i] );
1345 return -1;
1346 }
1347 }
1348
1349 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &buffer_read_int_kernel_code[0], "test_buffer_read_int" );
1350 if ( err ){
1351 log_error( " Error creating program for int\n" );
1352 for ( i = 0; i < 3; i++ ){
1353 clReleaseMemObject( buffers[i] );
1354 align_free( outptr[i] );
1355 }
1356 return -1;
1357 }
1358
1359 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &buffer_read_int_kernel_code[1], "test_buffer_read_int2" );
1360 if ( err ){
1361 log_error( " Error creating program for int2\n" );
1362 clReleaseKernel( kernel[0] );
1363 clReleaseProgram( program[0] );
1364 for ( i = 0; i < 3; i++ ){
1365 clReleaseMemObject( buffers[i] );
1366 align_free( outptr[i] );
1367 }
1368 return -1;
1369 }
1370
1371 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &buffer_read_int_kernel_code[2], "test_buffer_read_int4" );
1372 if ( err ){
1373 log_error( " Error creating program for int4\n" );
1374 clReleaseKernel( kernel[0] );
1375 clReleaseProgram( program[0] );
1376 clReleaseKernel( kernel[1] );
1377 clReleaseProgram( program[1] );
1378 for ( i = 0; i < 3; i++ ){
1379 clReleaseMemObject( buffers[i] );
1380 align_free( outptr[i] );
1381 }
1382 return -1;
1383 }
1384
1385 for (i=0; i<3; i++){
1386 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
1387 if ( err != CL_SUCCESS ){
1388 print_error( err, "clSetKernelArgs failed" );
1389 clReleaseMemObject( buffers[i] );
1390 clReleaseKernel( kernel[i] );
1391 clReleaseProgram( program[i] );
1392 align_free( outptr[i] );
1393 return -1;
1394 }
1395
1396 #ifdef USE_LOCAL_WORK_GROUP
1397 err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
1398 test_error( err, "Unable to get work group size to use" );
1399
1400 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
1401 #else
1402 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1403 #endif
1404 if ( err != CL_SUCCESS ){
1405 print_error( err, "clEnqueueNDRangeKernel failed" );
1406 clReleaseMemObject( buffers[i] );
1407 clReleaseKernel( kernel[i] );
1408 clReleaseProgram( program[i] );
1409 align_free( outptr[i] );
1410 return -1;
1411 }
1412
1413 err = clEnqueueReadBuffer( queue, buffers[i], true, startOfRead*ptrSizes[i], ptrSizes[i]*sizeOfRead, (void *)(outptr[i]), 0, NULL, NULL );
1414 if ( err != CL_SUCCESS ){
1415 print_error( err, "clEnqueueReadBuffer failed" );
1416 clReleaseMemObject( buffers[i] );
1417 clReleaseKernel( kernel[i] );
1418 clReleaseProgram( program[i] );
1419 align_free( outptr[i] );
1420 return -1;
1421 }
1422
1423 if ( verify_read_int( outptr[i], (int)sizeOfRead*(1<<i) ) ){
1424 log_error(" random size from %d, size: %d test failed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
1425 total_errors++;
1426 }
1427 else{
1428 log_info(" random size from %d, size: %d test passed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
1429 }
1430
1431 // cleanup
1432 clReleaseMemObject( buffers[i] );
1433 clReleaseKernel( kernel[i] );
1434 clReleaseProgram( program[i] );
1435 align_free( outptr[i] );
1436 }
1437
1438 return total_errors;
1439
1440 } // end testRandomReadSize()
1441
1442
test_buffer_read_random_size(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1443 int test_buffer_read_random_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1444 {
1445 int err = 0;
1446 int i;
1447 cl_uint start;
1448 size_t size;
1449 MTdata d = init_genrand( gRandomSeed );
1450
1451 // now test for random sizes of array being read
1452 for ( i = 0; i < 8; i++ ){
1453 start = (cl_uint)get_random_float( 0.f, (float)(num_elements - 8), d );
1454 size = (size_t)get_random_float( 8.f, (float)(num_elements - start), d );
1455 if (testRandomReadSize( deviceID, context, queue, num_elements, start, size ))
1456 err++;
1457 }
1458
1459 free_mtdata(d);
1460
1461 return err;
1462 }
1463
1464