• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <time.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 
25 #include "procs.h"
26 
27 //#define HK_DO_NOT_RUN_SHORT_ASYNC    1
28 //#define HK_DO_NOT_RUN_USHORT_ASYNC    1
29 //#define HK_DO_NOT_RUN_CHAR_ASYNC    1
30 //#define HK_DO_NOT_RUN_UCHAR_ASYNC    1
31 
32 #define TEST_PRIME_INT        ((1<<16)+1)
33 #define TEST_PRIME_UINT        ((1U<<16)+1U)
34 #define TEST_PRIME_LONG        ((1LL<<32)+1LL)
35 #define TEST_PRIME_ULONG    ((1ULL<<32)+1ULL)
36 #define TEST_PRIME_SHORT    ((1S<<8)+1S)
37 #define TEST_PRIME_FLOAT    (float)3.40282346638528860e+38
38 #define TEST_PRIME_HALF        119.f
39 #define TEST_BOOL            true
40 #define TEST_PRIME_CHAR        0x77
41 
42 #ifndef ulong
43 typedef unsigned long ulong;
44 #endif
45 
46 #ifndef uchar
47 typedef unsigned char uchar;
48 #endif
49 
50 #ifndef TestStruct
51 typedef struct{
52     int     a;
53     float   b;
54 } TestStruct;
55 #endif
56 
57 //--- the code for the kernel executables
58 static const char *buffer_read_int_kernel_code[] = {
59     "__kernel void test_buffer_read_int(__global int *dst)\n"
60     "{\n"
61     "    int  tid = get_global_id(0);\n"
62     "\n"
63     "    dst[tid] = ((1<<16)+1);\n"
64     "}\n",
65 
66     "__kernel void test_buffer_read_int2(__global int2 *dst)\n"
67     "{\n"
68     "    int  tid = get_global_id(0);\n"
69     "\n"
70     "    dst[tid] = ((1<<16)+1);\n"
71     "}\n",
72 
73     "__kernel void test_buffer_read_int4(__global int4 *dst)\n"
74     "{\n"
75     "    int  tid = get_global_id(0);\n"
76     "\n"
77     "    dst[tid] = ((1<<16)+1);\n"
78     "}\n",
79 
80     "__kernel void test_buffer_read_int8(__global int8 *dst)\n"
81     "{\n"
82     "    int  tid = get_global_id(0);\n"
83     "\n"
84     "    dst[tid] = ((1<<16)+1);\n"
85     "}\n",
86 
87     "__kernel void test_buffer_read_int16(__global int16 *dst)\n"
88     "{\n"
89     "    int  tid = get_global_id(0);\n"
90     "\n"
91     "    dst[tid] = ((1<<16)+1);\n"
92     "}\n" };
93 
94 static const char *int_kernel_name[] = { "test_buffer_read_int", "test_buffer_read_int2", "test_buffer_read_int4", "test_buffer_read_int8", "test_buffer_read_int16" };
95 
96 static const char *buffer_read_uint_kernel_code[] = {
97     "__kernel void test_buffer_read_uint(__global uint *dst)\n"
98     "{\n"
99     "    int  tid = get_global_id(0);\n"
100     "\n"
101     "    dst[tid] = ((1U<<16)+1U);\n"
102     "}\n",
103 
104     "__kernel void test_buffer_read_uint2(__global uint2 *dst)\n"
105     "{\n"
106     "    int  tid = get_global_id(0);\n"
107     "\n"
108     "    dst[tid] = ((1U<<16)+1U);\n"
109     "}\n",
110 
111     "__kernel void test_buffer_read_uint4(__global uint4 *dst)\n"
112     "{\n"
113     "    int  tid = get_global_id(0);\n"
114     "\n"
115     "    dst[tid] = ((1U<<16)+1U);\n"
116     "}\n",
117 
118     "__kernel void test_buffer_read_uint8(__global uint8 *dst)\n"
119     "{\n"
120     "    int  tid = get_global_id(0);\n"
121     "\n"
122     "    dst[tid] = ((1U<<16)+1U);\n"
123     "}\n",
124 
125     "__kernel void test_buffer_read_uint16(__global uint16 *dst)\n"
126     "{\n"
127     "    int  tid = get_global_id(0);\n"
128     "\n"
129     "    dst[tid] = ((1U<<16)+1U);\n"
130     "}\n" };
131 
132 static const char *uint_kernel_name[] = { "test_buffer_read_uint", "test_buffer_read_uint2", "test_buffer_read_uint4", "test_buffer_read_uint8", "test_buffer_read_uint16" };
133 
134 static const char *buffer_read_long_kernel_code[] = {
135     "__kernel void test_buffer_read_long(__global long *dst)\n"
136     "{\n"
137     "    int  tid = get_global_id(0);\n"
138     "\n"
139     "    dst[tid] = ((1L<<32)+1L);\n"
140     "}\n",
141 
142     "__kernel void test_buffer_read_long2(__global long2 *dst)\n"
143     "{\n"
144     "    int  tid = get_global_id(0);\n"
145     "\n"
146     "    dst[tid] = ((1L<<32)+1L);\n"
147     "}\n",
148 
149     "__kernel void test_buffer_read_long4(__global long4 *dst)\n"
150     "{\n"
151     "    int  tid = get_global_id(0);\n"
152     "\n"
153     "    dst[tid] = ((1L<<32)+1L);\n"
154     "}\n",
155 
156     "__kernel void test_buffer_read_long8(__global long8 *dst)\n"
157     "{\n"
158     "    int  tid = get_global_id(0);\n"
159     "\n"
160     "    dst[tid] = ((1L<<32)+1L);\n"
161     "}\n",
162 
163     "__kernel void test_buffer_read_long16(__global long16 *dst)\n"
164     "{\n"
165     "    int  tid = get_global_id(0);\n"
166     "\n"
167     "    dst[tid] = ((1L<<32)+1L);\n"
168     "}\n" };
169 
170 static const char *long_kernel_name[] = { "test_buffer_read_long", "test_buffer_read_long2", "test_buffer_read_long4", "test_buffer_read_long8", "test_buffer_read_long16" };
171 
172 static const char *buffer_read_ulong_kernel_code[] = {
173     "__kernel void test_buffer_read_ulong(__global ulong *dst)\n"
174     "{\n"
175     "    int  tid = get_global_id(0);\n"
176     "\n"
177     "    dst[tid] = ((1UL<<32)+1UL);\n"
178     "}\n",
179 
180     "__kernel void test_buffer_read_ulong2(__global ulong2 *dst)\n"
181     "{\n"
182     "    int  tid = get_global_id(0);\n"
183     "\n"
184     "    dst[tid] = ((1UL<<32)+1UL);\n"
185     "}\n",
186 
187     "__kernel void test_buffer_read_ulong4(__global ulong4 *dst)\n"
188     "{\n"
189     "    int  tid = get_global_id(0);\n"
190     "\n"
191     "    dst[tid] = ((1UL<<32)+1UL);\n"
192     "}\n",
193 
194     "__kernel void test_buffer_read_ulong8(__global ulong8 *dst)\n"
195     "{\n"
196     "    int  tid = get_global_id(0);\n"
197     "\n"
198     "    dst[tid] = ((1UL<<32)+1UL);\n"
199     "}\n",
200 
201     "__kernel void test_buffer_read_ulong16(__global ulong16 *dst)\n"
202     "{\n"
203     "    int  tid = get_global_id(0);\n"
204     "\n"
205     "    dst[tid] = ((1UL<<32)+1UL);\n"
206     "}\n" };
207 
208 static const char *ulong_kernel_name[] = { "test_buffer_read_ulong", "test_buffer_read_ulong2", "test_buffer_read_ulong4", "test_buffer_read_ulong8", "test_buffer_read_ulong16" };
209 
210 static const char *buffer_read_short_kernel_code[] = {
211     "__kernel void test_buffer_read_short(__global short *dst)\n"
212     "{\n"
213     "    int  tid = get_global_id(0);\n"
214     "\n"
215     "    dst[tid] = (short)((1<<8)+1);\n"
216     "}\n",
217 
218     "__kernel void test_buffer_read_short2(__global short2 *dst)\n"
219     "{\n"
220     "    int  tid = get_global_id(0);\n"
221     "\n"
222     "    dst[tid] = (short)((1<<8)+1);\n"
223     "}\n",
224 
225     "__kernel void test_buffer_read_short4(__global short4 *dst)\n"
226     "{\n"
227     "    int  tid = get_global_id(0);\n"
228     "\n"
229     "    dst[tid] = (short)((1<<8)+1);\n"
230     "}\n",
231 
232     "__kernel void test_buffer_read_short8(__global short8 *dst)\n"
233     "{\n"
234     "    int  tid = get_global_id(0);\n"
235     "\n"
236     "    dst[tid] = (short)((1<<8)+1);\n"
237     "}\n",
238 
239     "__kernel void test_buffer_read_short16(__global short16 *dst)\n"
240     "{\n"
241     "    int  tid = get_global_id(0);\n"
242     "\n"
243     "    dst[tid] = (short)((1<<8)+1);\n"
244     "}\n" };
245 
246 static const char *short_kernel_name[] = { "test_buffer_read_short", "test_buffer_read_short2", "test_buffer_read_short4", "test_buffer_read_short8", "test_buffer_read_short16" };
247 
248 
249 static const char *buffer_read_ushort_kernel_code[] = {
250     "__kernel void test_buffer_read_ushort(__global ushort *dst)\n"
251     "{\n"
252     "    int  tid = get_global_id(0);\n"
253     "\n"
254     "    dst[tid] = (ushort)((1<<8)+1);\n"
255     "}\n",
256 
257     "__kernel void test_buffer_read_ushort2(__global ushort2 *dst)\n"
258     "{\n"
259     "    int  tid = get_global_id(0);\n"
260     "\n"
261     "    dst[tid] = (ushort)((1<<8)+1);\n"
262     "}\n",
263 
264     "__kernel void test_buffer_read_ushort4(__global ushort4 *dst)\n"
265     "{\n"
266     "    int  tid = get_global_id(0);\n"
267     "\n"
268     "    dst[tid] = (ushort)((1<<8)+1);\n"
269     "}\n",
270 
271     "__kernel void test_buffer_read_ushort8(__global ushort8 *dst)\n"
272     "{\n"
273     "    int  tid = get_global_id(0);\n"
274     "\n"
275     "    dst[tid] = (ushort)((1<<8)+1);\n"
276     "}\n",
277 
278     "__kernel void test_buffer_read_ushort16(__global ushort16 *dst)\n"
279     "{\n"
280     "    int  tid = get_global_id(0);\n"
281     "\n"
282     "    dst[tid] = (ushort)((1<<8)+1);\n"
283     "}\n" };
284 
285 static const char *ushort_kernel_name[] = { "test_buffer_read_ushort", "test_buffer_read_ushort2", "test_buffer_read_ushort4", "test_buffer_read_ushort8", "test_buffer_read_ushort16" };
286 
287 
288 static const char *buffer_read_float_kernel_code[] = {
289     "__kernel void test_buffer_read_float(__global float *dst)\n"
290     "{\n"
291     "    int  tid = get_global_id(0);\n"
292     "\n"
293     "    dst[tid] = (float)3.40282346638528860e+38;\n"
294     "}\n",
295 
296     "__kernel void test_buffer_read_float2(__global float2 *dst)\n"
297     "{\n"
298     "    int  tid = get_global_id(0);\n"
299     "\n"
300     "    dst[tid] = (float)3.40282346638528860e+38;\n"
301     "}\n",
302 
303     "__kernel void test_buffer_read_float4(__global float4 *dst)\n"
304     "{\n"
305     "    int  tid = get_global_id(0);\n"
306     "\n"
307     "    dst[tid] = (float)3.40282346638528860e+38;\n"
308     "}\n",
309 
310     "__kernel void test_buffer_read_float8(__global float8 *dst)\n"
311     "{\n"
312     "    int  tid = get_global_id(0);\n"
313     "\n"
314     "    dst[tid] = (float)3.40282346638528860e+38;\n"
315     "}\n",
316 
317     "__kernel void test_buffer_read_float16(__global float16 *dst)\n"
318     "{\n"
319     "    int  tid = get_global_id(0);\n"
320     "\n"
321     "    dst[tid] = (float)3.40282346638528860e+38;\n"
322     "}\n" };
323 
324 static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffer_read_float2", "test_buffer_read_float4", "test_buffer_read_float8", "test_buffer_read_float16" };
325 
326 
327 static const char *buffer_read_half_kernel_code[] = {
328     "__kernel void test_buffer_read_half(__global half *dst)\n"
329     "{\n"
330     "    int  tid = get_global_id(0);\n"
331     "\n"
332     "    dst[tid] = (half)119;\n"
333     "}\n",
334 
335     "__kernel void test_buffer_read_half2(__global half2 *dst)\n"
336     "{\n"
337     "    int  tid = get_global_id(0);\n"
338     "\n"
339     "    dst[tid] = (half)119;\n"
340     "}\n",
341 
342     "__kernel void test_buffer_read_half4(__global half4 *dst)\n"
343     "{\n"
344     "    int  tid = get_global_id(0);\n"
345     "\n"
346     "    dst[tid] = (half)119;\n"
347     "}\n",
348 
349     "__kernel void test_buffer_read_half8(__global half8 *dst)\n"
350     "{\n"
351     "    int  tid = get_global_id(0);\n"
352     "\n"
353     "    dst[tid] = (half)119;\n"
354     "}\n",
355 
356     "__kernel void test_buffer_read_half16(__global half16 *dst)\n"
357     "{\n"
358     "    int  tid = get_global_id(0);\n"
359     "\n"
360     "    dst[tid] = (half)119;\n"
361     "}\n" };
362 
363 static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" };
364 
365 
366 static const char *buffer_read_char_kernel_code[] = {
367     "__kernel void test_buffer_read_char(__global char *dst)\n"
368     "{\n"
369     "    int  tid = get_global_id(0);\n"
370     "\n"
371     "    dst[tid] = (char)'w';\n"
372     "}\n",
373 
374     "__kernel void test_buffer_read_char2(__global char2 *dst)\n"
375     "{\n"
376     "    int  tid = get_global_id(0);\n"
377     "\n"
378     "    dst[tid] = (char)'w';\n"
379     "}\n",
380 
381     "__kernel void test_buffer_read_char4(__global char4 *dst)\n"
382     "{\n"
383     "    int  tid = get_global_id(0);\n"
384     "\n"
385     "    dst[tid] = (char)'w';\n"
386     "}\n",
387 
388     "__kernel void test_buffer_read_char8(__global char8 *dst)\n"
389     "{\n"
390     "    int  tid = get_global_id(0);\n"
391     "\n"
392     "    dst[tid] = (char)'w';\n"
393     "}\n",
394 
395     "__kernel void test_buffer_read_char16(__global char16 *dst)\n"
396     "{\n"
397     "    int  tid = get_global_id(0);\n"
398     "\n"
399     "    dst[tid] = (char)'w';\n"
400     "}\n" };
401 
402 static const char *char_kernel_name[] = { "test_buffer_read_char", "test_buffer_read_char2", "test_buffer_read_char4", "test_buffer_read_char8", "test_buffer_read_char16" };
403 
404 
405 static const char *buffer_read_uchar_kernel_code[] = {
406     "__kernel void test_buffer_read_uchar(__global uchar *dst)\n"
407     "{\n"
408     "    int  tid = get_global_id(0);\n"
409     "\n"
410     "    dst[tid] = 'w';\n"
411     "}\n",
412 
413     "__kernel void test_buffer_read_uchar2(__global uchar2 *dst)\n"
414     "{\n"
415     "    int  tid = get_global_id(0);\n"
416     "\n"
417     "    dst[tid] = (uchar)'w';\n"
418     "}\n",
419 
420     "__kernel void test_buffer_read_uchar4(__global uchar4 *dst)\n"
421     "{\n"
422     "    int  tid = get_global_id(0);\n"
423     "\n"
424     "    dst[tid] = (uchar)'w';\n"
425     "}\n",
426 
427     "__kernel void test_buffer_read_uchar8(__global uchar8 *dst)\n"
428     "{\n"
429     "    int  tid = get_global_id(0);\n"
430     "\n"
431     "    dst[tid] = (uchar)'w';\n"
432     "}\n",
433 
434     "__kernel void test_buffer_read_uchar16(__global uchar16 *dst)\n"
435     "{\n"
436     "    int  tid = get_global_id(0);\n"
437     "\n"
438     "    dst[tid] = (uchar)'w';\n"
439     "}\n" };
440 
441 static const char *uchar_kernel_name[] = { "test_buffer_read_uchar", "test_buffer_read_uchar2", "test_buffer_read_uchar4", "test_buffer_read_uchar8", "test_buffer_read_uchar16" };
442 
443 
444 static const char *buffer_read_struct_kernel_code =
445 "typedef struct{\n"
446 "int    a;\n"
447 "float    b;\n"
448 "} TestStruct;\n"
449 "__kernel void test_buffer_read_struct(__global TestStruct *dst)\n"
450 "{\n"
451 "    int  tid = get_global_id(0);\n"
452 "\n"
453 "    dst[tid].a = ((1<<16)+1);\n"
454 "     dst[tid].b = (float)3.40282346638528860e+38;\n"
455 "}\n";
456 
457 
458 //--- the verify functions
verify_read_int(void * ptr,int n)459 static int verify_read_int(void *ptr, int n)
460 {
461     int     i;
462     cl_int  *outptr = (cl_int *)ptr;
463 
464     for (i=0; i<n; i++){
465         if ( outptr[i] != TEST_PRIME_INT )
466             return -1;
467     }
468 
469     return 0;
470 }
471 
472 
verify_read_uint(void * ptr,int n)473 static int verify_read_uint(void *ptr, int n)
474 {
475     int     i;
476     cl_uint *outptr = (cl_uint *)ptr;
477 
478     for (i=0; i<n; i++){
479         if ( outptr[i] != TEST_PRIME_UINT )
480             return -1;
481     }
482 
483     return 0;
484 }
485 
486 
verify_read_long(void * ptr,int n)487 static int verify_read_long(void *ptr, int n)
488 {
489     int     i;
490     cl_long *outptr = (cl_long *)ptr;
491 
492     for (i=0; i<n; i++){
493         if ( outptr[i] != TEST_PRIME_LONG )
494             return -1;
495     }
496 
497     return 0;
498 }
499 
500 
verify_read_ulong(void * ptr,int n)501 static int verify_read_ulong(void *ptr, int n)
502 {
503     int      i;
504     cl_ulong *outptr = (cl_ulong *)ptr;
505 
506     for (i=0; i<n; i++){
507         if ( outptr[i] != TEST_PRIME_ULONG )
508             return -1;
509     }
510 
511     return 0;
512 }
513 
514 
verify_read_short(void * ptr,int n)515 static int verify_read_short(void *ptr, int n)
516 {
517     int      i;
518     cl_short *outptr = (cl_short *)ptr;
519 
520     for (i=0; i<n; i++){
521         if ( outptr[i] != (cl_short)((1<<8)+1) )
522             return -1;
523     }
524 
525     return 0;
526 }
527 
528 
verify_read_ushort(void * ptr,int n)529 static int verify_read_ushort(void *ptr, int n)
530 {
531     int       i;
532     cl_ushort *outptr = (cl_ushort *)ptr;
533 
534     for (i=0; i<n; i++){
535         if ( outptr[i] != (cl_ushort)((1<<8)+1) )
536             return -1;
537     }
538 
539     return 0;
540 }
541 
542 
verify_read_float(void * ptr,int n)543 static int verify_read_float( void *ptr, int n )
544 {
545     int      i;
546     cl_float *outptr = (cl_float *)ptr;
547 
548     for (i=0; i<n; i++){
549         if ( outptr[i] != TEST_PRIME_FLOAT )
550             return -1;
551     }
552 
553     return 0;
554 }
555 
556 
verify_read_half(void * ptr,int n)557 static int verify_read_half( void *ptr, int n )
558 {
559     int     i;
560     float   *outptr = (float *)ptr; // FIXME: should this be cl_half_float?
561 
562     for ( i = 0; i < n / 2; i++ ){
563         if ( outptr[i] != TEST_PRIME_HALF )
564             return -1;
565     }
566 
567     return 0;
568 }
569 
570 
verify_read_char(void * ptr,int n)571 static int verify_read_char(void *ptr, int n)
572 {
573     int     i;
574     cl_char *outptr = (cl_char *)ptr;
575 
576     for (i=0; i<n; i++){
577         if ( outptr[i] != TEST_PRIME_CHAR )
578             return -1;
579     }
580 
581     return 0;
582 }
583 
584 
verify_read_uchar(void * ptr,int n)585 static int verify_read_uchar(void *ptr, int n)
586 {
587     int      i;
588     cl_uchar *outptr = (cl_uchar *)ptr;
589 
590     for (i=0; i<n; i++){
591         if ( outptr[i] != TEST_PRIME_CHAR )
592             return -1;
593     }
594 
595     return 0;
596 }
597 
598 
verify_read_struct(TestStruct * outptr,int n)599 static int verify_read_struct(TestStruct *outptr, int n)
600 {
601     int     i;
602 
603     for (i=0; i<n; i++)
604     {
605         if ( ( outptr[i].a != TEST_PRIME_INT ) ||
606              ( outptr[i].b != TEST_PRIME_FLOAT ) )
607             return -1;
608     }
609 
610     return 0;
611 }
612 
613 //----- the test functions
test_buffer_read(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))614 int test_buffer_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
615                       const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
616 {
617     cl_mem      buffers[5];
618     void        *outptr[5];
619     void        *inptr[5];
620     cl_program  program[5];
621     cl_kernel   kernel[5];
622     size_t      global_work_size[3];
623 #ifdef USE_LOCAL_WORK_GROUP
624     size_t      local_work_size[3];
625 #endif
626     cl_int      err;
627     int         i;
628     size_t      ptrSizes[5];
629     int         src_flag_id;
630     int         total_errors = 0;
631 
632     size_t      min_alignment = get_min_alignment(context);
633 
634     global_work_size[0] = (cl_uint)num_elements;
635 
636     ptrSizes[0] = size;
637     ptrSizes[1] = ptrSizes[0] << 1;
638     ptrSizes[2] = ptrSizes[1] << 1;
639     ptrSizes[3] = ptrSizes[2] << 1;
640     ptrSizes[4] = ptrSizes[3] << 1;
641 
642     //skip devices that don't support long
643     if (! gHasLong && strstr(type,"long") )
644     {
645         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
646         return CL_SUCCESS;
647     }
648 
649     for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
650         log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
651 
652         for ( i = 0; i < loops; i++ ){
653             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
654             if ( ! outptr[i] ){
655                 log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
656                 return -1;
657             }
658             inptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
659             if ( ! inptr[i] ){
660                 log_error( " unable to allocate %d bytes for inptr\n", (int)( ptrSizes[i] * num_elements ) );
661                 return -1;
662             }
663 
664 
665             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
666                 buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
667             else
668                 buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
669             if ( err != CL_SUCCESS ){
670                 print_error(err, " clCreateBuffer failed\n" );
671                 align_free( outptr[i] );
672                 align_free( inptr[i] );
673                 return -1;
674             }
675 
676             err = create_single_kernel_helper(  context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
677             if ( err ){
678                 log_error("Creating program for %s\n", type);
679                 print_error(err,  " Error creating program " );
680                 clReleaseMemObject(buffers[i]);
681                 align_free( outptr[i] );
682                 align_free( inptr[i] );
683                 return -1;
684             }
685 
686             err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
687             if ( err != CL_SUCCESS ){
688                 print_error( err, "clSetKernelArg failed" );
689                 clReleaseMemObject( buffers[i] );
690                 clReleaseKernel( kernel[i] );
691                 clReleaseProgram( program[i] );
692                 align_free( outptr[i] );
693                 align_free( inptr[i] );
694                 return -1;
695             }
696 
697 #ifdef USE_LOCAL_WORK_GROUP
698             err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
699             test_error( err, "Unable to get work group size to use" );
700 
701             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
702 #else
703             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
704 #endif
705             if ( err != CL_SUCCESS ){
706                 print_error( err, "clEnqueueNDRangeKernel failed" );
707                 clReleaseMemObject( buffers[i] );
708                 clReleaseKernel( kernel[i] );
709                 clReleaseProgram( program[i] );
710                 align_free( outptr[i] );
711                 align_free( inptr[i] );
712                 return -1;
713             }
714 
715             err = clEnqueueReadBuffer( queue, buffers[i], CL_TRUE, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
716             if ( err != CL_SUCCESS ){
717                 print_error( err, "clEnqueueReadBuffer failed" );
718                 clReleaseMemObject( buffers[i] );
719                 clReleaseKernel( kernel[i] );
720                 clReleaseProgram( program[i] );
721                 align_free( outptr[i] );
722                 align_free( inptr[i] );
723                 return -1;
724             }
725 
726             if (fn(outptr[i], num_elements*(1<<i))){
727                 log_error( " %s%d test failed\n", type, 1<<i );
728                 total_errors++;
729             }
730             else{
731                 log_info( " %s%d test passed\n", type, 1<<i );
732             }
733 
734             err = clEnqueueReadBuffer( queue, buffers[i], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL );
735             if ( err != CL_SUCCESS ){
736                 print_error( err, "clEnqueueReadBuffer failed" );
737                 clReleaseMemObject( buffers[i] );
738                 clReleaseKernel( kernel[i] );
739                 clReleaseProgram( program[i] );
740                 align_free( outptr[i] );
741                 align_free( inptr[i] );
742                 return -1;
743             }
744 
745             if (fn(inptr[i], num_elements*(1<<i))){
746                 log_error( " %s%d test failed in-place readback\n", type, 1<<i );
747                 total_errors++;
748             }
749             else{
750                 log_info( " %s%d test passed in-place readback\n", type, 1<<i );
751             }
752 
753 
754             // cleanup
755             clReleaseMemObject( buffers[i] );
756             clReleaseKernel( kernel[i] );
757             clReleaseProgram( program[i] );
758             align_free( outptr[i] );
759             align_free( inptr[i] );
760         }
761     } // mem flag
762 
763     return total_errors;
764 
765 }   // end test_buffer_read()
766 
test_buffer_read_async(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))767 int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
768                             const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
769 {
770     cl_mem      buffers[5];
771     cl_program  program[5];
772     cl_kernel   kernel[5];
773     cl_event    event;
774     void        *outptr[5];
775     void        *inptr[5];
776     size_t      global_work_size[3];
777 #ifdef USE_LOCAL_WORK_GROUP
778     size_t      local_work_size[3];
779 #endif
780     cl_int      err;
781     int         i;
782     size_t      lastIndex;
783     size_t      ptrSizes[5];
784     int         src_flag_id;
785     int         total_errors = 0;
786 
787     size_t      min_alignment = get_min_alignment(context);
788 
789     global_work_size[0] = (cl_uint)num_elements;
790 
791     ptrSizes[0] = size;
792     ptrSizes[1] = ptrSizes[0] << 1;
793     ptrSizes[2] = ptrSizes[1] << 1;
794     ptrSizes[3] = ptrSizes[2] << 1;
795     ptrSizes[4] = ptrSizes[3] << 1;
796 
797     //skip devices that don't support long
798     if (! gHasLong && strstr(type,"long") )
799     {
800         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
801         return CL_SUCCESS;
802     }
803 
804     for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
805         log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
806 
807         for ( i = 0; i < loops; i++ ){
808             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
809             if ( ! outptr[i] ){
810                 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
811                 return -1;
812             }
813             memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
814             inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
815             if ( ! inptr[i] ){
816                 log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
817                 return -1;
818             }
819             memset( inptr[i], 0, ptrSizes[i] * num_elements );  // initialize to zero to tell difference
820 
821 
822             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
823                 buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
824             else
825                 buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
826             if ( err != CL_SUCCESS ){
827                 print_error(err, " clCreateBuffer failed\n" );
828                 align_free( outptr[i] );
829                 align_free( inptr[i] );
830                 return -1;
831             }
832 
833             err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i]);
834             if ( err ){
835                 log_error( " Error creating program for %s\n", type );
836                 clReleaseMemObject( buffers[i] );
837                 align_free( outptr[i] );
838                 align_free( inptr[i] );
839                 return -1;
840             }
841 
842             err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
843             if ( err != CL_SUCCESS ){
844                 print_error( err, "clSetKernelArg failed" );
845                 clReleaseMemObject( buffers[i] );
846                 clReleaseKernel( kernel[i] );
847                 clReleaseProgram( program[i] );
848                 align_free( outptr[i] );
849                 align_free( inptr[i] );
850                 return -1;
851             }
852 
853 #ifdef USE_LOCAL_WORK_GROUP
854             err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
855             test_error( err, "Unable to get work group size to use" );
856 
857             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
858 #else
859             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
860 #endif
861             if ( err != CL_SUCCESS ){
862                 print_error( err, "clEnqueueNDRangeKernel failed" );
863                 clReleaseMemObject( buffers[i] );
864                 clReleaseKernel( kernel[i] );
865                 clReleaseProgram( program[i] );
866                 align_free( outptr[i] );
867                 align_free( inptr[i] );
868                 return -1;
869             }
870 
871             lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
872             err = clEnqueueReadBuffer( queue, buffers[i], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &event );
873 #ifdef CHECK_FOR_NON_WAIT
874             if ( ((uchar *)outptr[i])[lastIndex] ){
875                 log_error( "    clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
876                 log_error( "    Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
877             }
878 #endif
879             if ( err != CL_SUCCESS ){
880                 print_error( err, "clEnqueueReadBuffer failed" );
881                 clReleaseMemObject( buffers[i] );
882                 clReleaseKernel( kernel[i] );
883                 clReleaseProgram( program[i] );
884                 align_free( outptr[i] );
885                 align_free( inptr[i] );
886                 return -1;
887             }
888             err = clWaitForEvents(1, &event );
889             if ( err != CL_SUCCESS ){
890                 print_error( err, "clWaitForEvents() failed" );
891                 clReleaseMemObject( buffers[i] );
892                 clReleaseKernel( kernel[i] );
893                 clReleaseProgram( program[i] );
894                 align_free( outptr[i] );
895                 align_free( inptr[i] );
896                 return -1;
897             }
898 
899             if ( fn(outptr[i], num_elements*(1<<i)) ){
900                 log_error( " %s%d test failed\n", type, 1<<i );
901                 total_errors++;
902             }
903             else{
904                 log_info( " %s%d test passed\n", type, 1<<i );
905             }
906 
907             // cleanup
908             clReleaseEvent( event );
909             clReleaseMemObject( buffers[i] );
910             clReleaseKernel( kernel[i] );
911             clReleaseProgram( program[i] );
912             align_free( outptr[i] );
913             align_free( inptr[i] );
914         }
915     } // mem flags
916 
917 
918     return total_errors;
919 
920 }   // end test_buffer_read_array_async()
921 
922 
test_buffer_read_array_barrier(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))923 int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
924                                     const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
925 {
926     cl_mem      buffers[5];
927     cl_program  program[5];
928     cl_kernel   kernel[5];
929     cl_event    event;
930     void        *outptr[5], *inptr[5];
931     size_t      global_work_size[3];
932 #ifdef USE_LOCAL_WORK_GROUP
933     size_t      local_work_size[3];
934 #endif
935     cl_int      err;
936     int         i;
937     size_t      lastIndex;
938     size_t      ptrSizes[5];
939     int         src_flag_id;
940     int         total_errors = 0;
941 
942     size_t min_alignment = get_min_alignment(context);
943 
944     global_work_size[0] = (cl_uint)num_elements;
945 
946     ptrSizes[0] = size;
947     ptrSizes[1] = ptrSizes[0] << 1;
948     ptrSizes[2] = ptrSizes[1] << 1;
949     ptrSizes[3] = ptrSizes[2] << 1;
950     ptrSizes[4] = ptrSizes[3] << 1;
951 
952     //skip devices that don't support long
953     if (! gHasLong && strstr(type,"long") )
954     {
955         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
956         return CL_SUCCESS;
957     }
958 
959     for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
960         log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
961 
962         for ( i = 0; i < loops; i++ ){
963             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
964             if ( ! outptr[i] ){
965                 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
966                 return -1;
967             }
968             memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
969             inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
970             if ( ! inptr[i] ){
971                 log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
972                 return -1;
973             }
974             memset( inptr[i], 0, ptrSizes[i] * num_elements );  // initialize to zero to tell difference
975 
976             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
977                 buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
978             else
979                 buffers[i] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
980             if ( err != CL_SUCCESS ){
981                 print_error(err, " clCreateBuffer failed\n" );
982                 align_free( outptr[i] );
983                 align_free( inptr[i] );
984                 return -1;
985             }
986 
987             err = create_single_kernel_helper(  context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
988             if ( err ){
989                 log_error( " Error creating program for %s\n", type );
990                 clReleaseMemObject( buffers[i] );
991                 align_free( outptr[i] );
992                 align_free( inptr[i] );
993                 return -1;
994             }
995 
996             err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
997             if ( err != CL_SUCCESS ){
998                 print_error( err, "clSetKernelArgs failed" );
999                 clReleaseMemObject( buffers[i] );
1000                 clReleaseKernel( kernel[i] );
1001                 clReleaseProgram( program[i] );
1002                 align_free( outptr[i] );
1003                 align_free( inptr[i] );
1004                 return -1;
1005             }
1006 
1007 #ifdef USE_LOCAL_WORK_GROUP
1008             err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
1009             test_error( err, "Unable to get work group size to use" );
1010 
1011             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
1012 #else
1013             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1014 #endif
1015             if ( err != CL_SUCCESS ){
1016                 print_error( err, "clEnqueueNDRangeKernel failed" );
1017                 clReleaseMemObject( buffers[i] );
1018                 clReleaseKernel( kernel[i] );
1019                 clReleaseProgram( program[i] );
1020                 align_free( outptr[i] );
1021                 align_free( inptr[i] );
1022                 return -1;
1023             }
1024 
1025             lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
1026             err = clEnqueueReadBuffer( queue, buffers[i], false, 0, ptrSizes[i]*num_elements, (void *)(outptr[i]), 0, NULL, &event );
1027 #ifdef CHECK_FOR_NON_WAIT
1028             if ( ((uchar *)outptr[i])[lastIndex] ){
1029                 log_error( "    clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
1030                 log_error( "    Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
1031             }
1032 #endif
1033             if ( err != CL_SUCCESS ){
1034                 print_error( err, "clEnqueueReadBuffer failed" );
1035                 clReleaseMemObject( buffers[i] );
1036                 clReleaseKernel( kernel[i] );
1037                 clReleaseProgram( program[i] );
1038                 align_free( outptr[i] );
1039                 align_free( inptr[i] );
1040                 return -1;
1041             }
1042             err = clEnqueueBarrierWithWaitList(queue, 0, NULL, NULL);
1043             if ( err != CL_SUCCESS ){
1044                 print_error( err, "clEnqueueBarrierWithWaitList() failed" );
1045                 clReleaseMemObject( buffers[i] );
1046                 clReleaseKernel( kernel[i] );
1047                 clReleaseProgram( program[i] );
1048                 align_free( outptr[i] );
1049                 return -1;
1050             }
1051 
1052             err = clWaitForEvents(1, &event);
1053             if ( err != CL_SUCCESS ){
1054                 print_error( err, "clWaitForEvents() failed" );
1055                 clReleaseMemObject( buffers[i] );
1056                 clReleaseKernel( kernel[i] );
1057                 clReleaseProgram( program[i] );
1058                 align_free( outptr[i] );
1059                 align_free( inptr[i] );
1060                 return -1;
1061             }
1062 
1063             if ( fn(outptr[i], num_elements*(1<<i)) ){
1064                 log_error(" %s%d test failed\n", type, 1<<i);
1065                 total_errors++;
1066             }
1067             else{
1068                 log_info(" %s%d test passed\n", type, 1<<i);
1069             }
1070 
1071             // cleanup
1072             clReleaseEvent( event );
1073             clReleaseMemObject( buffers[i] );
1074             clReleaseKernel( kernel[i] );
1075             clReleaseProgram( program[i] );
1076             align_free( outptr[i] );
1077             align_free( inptr[i] );
1078         }
1079     } // cl_mem flags
1080     return total_errors;
1081 
1082 }   // end test_buffer_read_array_barrier()
1083 
1084 
1085 #define DECLARE_READ_TEST(type, realType) \
1086 int test_buffer_read_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )    \
1087 { \
1088 return test_buffer_read( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1089 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1090 }
1091 
DECLARE_READ_TEST(int,cl_int)1092 DECLARE_READ_TEST(int, cl_int)
1093 DECLARE_READ_TEST(uint, cl_uint)
1094 DECLARE_READ_TEST(long, cl_long)
1095 DECLARE_READ_TEST(ulong, cl_ulong)
1096 DECLARE_READ_TEST(short, cl_short)
1097 DECLARE_READ_TEST(ushort, cl_ushort)
1098 DECLARE_READ_TEST(float, cl_float)
1099 DECLARE_READ_TEST(char, cl_char)
1100 DECLARE_READ_TEST(uchar, cl_uchar)
1101 
1102 int test_buffer_half_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1103 {
1104     return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5,
1105                              buffer_read_half_kernel_code, half_kernel_name, verify_read_half );
1106 }
1107 
1108 
1109 #define DECLARE_ASYNC_TEST(type, realType) \
1110 int test_buffer_read_async_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )    \
1111 { \
1112 return test_buffer_read_async( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1113 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1114 }
1115 
DECLARE_ASYNC_TEST(char,cl_char)1116 DECLARE_ASYNC_TEST(char, cl_char)
1117 DECLARE_ASYNC_TEST(uchar, cl_uchar)
1118 DECLARE_ASYNC_TEST(short, cl_short)
1119 DECLARE_ASYNC_TEST(ushort, cl_ushort)
1120 DECLARE_ASYNC_TEST(int, cl_int)
1121 DECLARE_ASYNC_TEST(uint, cl_uint)
1122 DECLARE_ASYNC_TEST(long, cl_long)
1123 DECLARE_ASYNC_TEST(ulong, cl_ulong)
1124 DECLARE_ASYNC_TEST(float, cl_float)
1125 
1126 
1127 #define DECLARE_BARRIER_TEST(type, realType) \
1128 int test_buffer_read_array_barrier_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )    \
1129 { \
1130 return test_buffer_read_array_barrier( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1131 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1132 }
1133 
1134 DECLARE_BARRIER_TEST(int, cl_int)
1135 DECLARE_BARRIER_TEST(uint, cl_uint)
1136 DECLARE_BARRIER_TEST(long, cl_long)
1137 DECLARE_BARRIER_TEST(ulong, cl_ulong)
1138 DECLARE_BARRIER_TEST(short, cl_short)
1139 DECLARE_BARRIER_TEST(ushort, cl_ushort)
1140 DECLARE_BARRIER_TEST(char, cl_char)
1141 DECLARE_BARRIER_TEST(uchar, cl_uchar)
1142 DECLARE_BARRIER_TEST(float, cl_float)
1143 
1144 /*
1145  int test_buffer_half_read(cl_device_group device, cl_device id, cl_context context, int num_elements)
1146  {
1147  cl_mem        buffers[1];
1148  float        *outptr;
1149  cl_program program[1];
1150  cl_kernel    kernel[1];
1151  void        *values[1];
1152  size_t        sizes[1] = { sizeof(cl_buffer) };
1153  uint        threads[1];
1154  int        err;
1155  int        i;
1156  size_t        ptrSize;    // sizeof(half)
1157 
1158  ptrSize = sizeof(cl_float)/2;
1159  outptr = (float *)malloc(ptrSize * num_elements);
1160  buffers[0] = clCreateBuffer(device, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSize * num_elements, NULL);
1161  if( !buffers[0] ){
1162  log_error("clCreateBuffer failed\n");
1163  return -1;
1164  }
1165 
1166  err = create_program_and_kernel(device, buffer_read_half_kernel_code, "test_buffer_read_half", &program[0], &kernel[0]);
1167  if( err ){
1168  log_error( " Error creating program for half\n" );
1169  clReleaseMemObject(buffers[0]);
1170  free( (void *)outptr );
1171  return -1;
1172  }
1173 
1174  values[0] = buffers[0];
1175  err = clSetKernelArgs(context, kernel[0], 1, NULL, &(values[i]), sizes);
1176  if( err != CL_SUCCESS ){
1177  log_error("clSetKernelArgs failed\n");
1178  return -1;
1179  }
1180 
1181  global_work_size[0] = (cl_uint)num_elements;
1182  err = clEnqueueNDRangeKernel(queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
1183  if( err != CL_SUCCESS ){
1184  log_error("clEnqueueNDRangeKernel failed\n");
1185  return -1;
1186  }
1187 
1188  err = clEnqueueReadBuffer( queue, buffers[0], true, 0, ptrSize*num_elements, (void *)outptr, 0, NULL, NULL );
1189  if( err != CL_SUCCESS ){
1190  log_error("clEnqueueReadBuffer failed: %d\n", err);
1191  return -1;
1192  }
1193 
1194  if( verify_read_half( outptr, num_elements >> 1 ) ){
1195  log_error( "buffer_READ half test failed\n" );
1196  err = -1;
1197  }
1198  else{
1199  log_info( "buffer_READ half test passed\n" );
1200  err = 0;
1201  }
1202 
1203  // cleanup
1204  clReleaseMemObject( buffers[0] );
1205  clReleaseKernel( kernel[0] );
1206  clReleaseProgram( program[0] );
1207  free( (void *)outptr );
1208 
1209  return err;
1210 
1211  }    // end test_buffer_half_read()
1212  */
1213 
1214 int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1215 {
1216     cl_mem      buffers[1];
1217     TestStruct  *output_ptr;
1218     cl_program  program[1];
1219     cl_kernel   kernel[1];
1220     size_t      global_work_size[3];
1221 #ifdef USE_LOCAL_WORK_GROUP
1222     size_t      local_work_size[3];
1223 #endif
1224     cl_int      err;
1225     size_t      objSize = sizeof(TestStruct);
1226 
1227     size_t      min_alignment = get_min_alignment(context);
1228 
1229     global_work_size[0] = (cl_uint)num_elements;
1230 
1231     output_ptr = (TestStruct*)align_malloc(objSize * num_elements, min_alignment);
1232     if ( ! output_ptr ){
1233         log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
1234         return -1;
1235     }
1236     buffers[0] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  objSize * num_elements, NULL , &err);
1237     if ( err != CL_SUCCESS ){
1238         print_error( err, " clCreateBuffer failed\n" );
1239         align_free( output_ptr );
1240         return -1;
1241     }
1242 
1243     err = create_single_kernel_helper(  context, &program[0], &kernel[0], 1, &buffer_read_struct_kernel_code, "test_buffer_read_struct" );
1244     if ( err ){
1245         clReleaseProgram( program[0] );
1246         align_free( output_ptr );
1247         return -1;
1248     }
1249 
1250     err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
1251     if ( err != CL_SUCCESS){
1252         print_error( err, "clSetKernelArg failed" );
1253         clReleaseMemObject( buffers[0] );
1254         clReleaseKernel( kernel[0] );
1255         clReleaseProgram( program[0] );
1256         align_free( output_ptr );
1257         return -1;
1258     }
1259 
1260 #ifdef USE_LOCAL_WORK_GROUP
1261     err = get_max_common_work_group_size( context, kernel[0], global_work_size[0], &local_work_size[0] );
1262     test_error( err, "Unable to get work group size to use" );
1263 
1264     err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
1265 #else
1266     err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1267 #endif
1268     if ( err != CL_SUCCESS ){
1269         print_error( err, "clEnqueueNDRangeKernel failed" );
1270         clReleaseMemObject( buffers[0] );
1271         clReleaseKernel( kernel[0] );
1272         clReleaseProgram( program[0] );
1273         align_free( output_ptr );
1274         return -1;
1275     }
1276 
1277     err = clEnqueueReadBuffer( queue, buffers[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
1278     if ( err != CL_SUCCESS){
1279         print_error( err, "clEnqueueReadBuffer failed" );
1280         clReleaseMemObject( buffers[0] );
1281         clReleaseKernel( kernel[0] );
1282         clReleaseProgram( program[0] );
1283         align_free( output_ptr );
1284         return -1;
1285     }
1286 
1287     if (verify_read_struct(output_ptr, num_elements)){
1288         log_error(" struct test failed\n");
1289         err = -1;
1290     }
1291     else{
1292         log_info(" struct test passed\n");
1293         err = 0;
1294     }
1295 
1296     // cleanup
1297     clReleaseMemObject( buffers[0] );
1298     clReleaseKernel( kernel[0] );
1299     clReleaseProgram( program[0] );
1300     align_free( output_ptr );
1301 
1302     return err;
1303 }
1304 
1305 
testRandomReadSize(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,cl_uint startOfRead,size_t sizeOfRead)1306 static int testRandomReadSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_uint startOfRead, size_t sizeOfRead )
1307 {
1308     cl_mem      buffers[3];
1309     int         *outptr[3];
1310     cl_program  program[3];
1311     cl_kernel   kernel[3];
1312     size_t      global_work_size[3];
1313 #ifdef USE_LOCAL_WORK_GROUP
1314     size_t      local_work_size[3];
1315 #endif
1316     cl_int      err;
1317     int         i, j;
1318     size_t      ptrSizes[3];    // sizeof(int), sizeof(int2), sizeof(int4)
1319     int         total_errors = 0;
1320     size_t      min_alignment = get_min_alignment(context);
1321 
1322     global_work_size[0] = (cl_uint)num_elements;
1323 
1324     ptrSizes[0] = sizeof(cl_int);
1325     ptrSizes[1] = ptrSizes[0] << 1;
1326     ptrSizes[2] = ptrSizes[1] << 1;
1327     for ( i = 0; i < 3; i++ ){
1328         outptr[i] = (int *)align_malloc( ptrSizes[i] * num_elements, min_alignment);
1329         if ( ! outptr[i] ){
1330             log_error( " Unable to allocate %d bytes for outptr[%d]\n", (int)(ptrSizes[i] * num_elements), i );
1331             for ( j = 0; j < i; j++ ){
1332                 clReleaseMemObject( buffers[j] );
1333                 align_free( outptr[j] );
1334             }
1335             return -1;
1336         }
1337         buffers[i] = clCreateBuffer(context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL, &err);
1338         if ( err != CL_SUCCESS ){
1339             print_error(err, " clCreateBuffer failed\n" );
1340             for ( j = 0; j < i; j++ ){
1341                 clReleaseMemObject( buffers[j] );
1342                 align_free( outptr[j] );
1343             }
1344             align_free( outptr[i] );
1345             return -1;
1346         }
1347     }
1348 
1349     err = create_single_kernel_helper(  context, &program[0], &kernel[0], 1, &buffer_read_int_kernel_code[0], "test_buffer_read_int" );
1350     if ( err ){
1351         log_error( " Error creating program for int\n" );
1352         for ( i = 0; i < 3; i++ ){
1353             clReleaseMemObject( buffers[i] );
1354             align_free( outptr[i] );
1355         }
1356         return -1;
1357     }
1358 
1359     err = create_single_kernel_helper(  context, &program[1], &kernel[1], 1, &buffer_read_int_kernel_code[1], "test_buffer_read_int2" );
1360     if ( err ){
1361         log_error( " Error creating program for int2\n" );
1362         clReleaseKernel( kernel[0] );
1363         clReleaseProgram( program[0] );
1364         for ( i = 0; i < 3; i++ ){
1365             clReleaseMemObject( buffers[i] );
1366             align_free( outptr[i] );
1367         }
1368         return -1;
1369     }
1370 
1371     err = create_single_kernel_helper(  context, &program[2], &kernel[2], 1, &buffer_read_int_kernel_code[2], "test_buffer_read_int4" );
1372     if ( err ){
1373         log_error( " Error creating program for int4\n" );
1374         clReleaseKernel( kernel[0] );
1375         clReleaseProgram( program[0] );
1376         clReleaseKernel( kernel[1] );
1377         clReleaseProgram( program[1] );
1378         for ( i = 0; i < 3; i++ ){
1379             clReleaseMemObject( buffers[i] );
1380             align_free( outptr[i] );
1381         }
1382         return -1;
1383     }
1384 
1385     for (i=0; i<3; i++){
1386         err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
1387         if ( err != CL_SUCCESS ){
1388             print_error( err, "clSetKernelArgs failed" );
1389             clReleaseMemObject( buffers[i] );
1390             clReleaseKernel( kernel[i] );
1391             clReleaseProgram( program[i] );
1392             align_free( outptr[i] );
1393             return -1;
1394         }
1395 
1396 #ifdef USE_LOCAL_WORK_GROUP
1397         err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
1398         test_error( err, "Unable to get work group size to use" );
1399 
1400         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
1401 #else
1402         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1403 #endif
1404         if ( err != CL_SUCCESS ){
1405             print_error( err, "clEnqueueNDRangeKernel failed" );
1406             clReleaseMemObject( buffers[i] );
1407             clReleaseKernel( kernel[i] );
1408             clReleaseProgram( program[i] );
1409             align_free( outptr[i] );
1410             return -1;
1411         }
1412 
1413         err = clEnqueueReadBuffer( queue, buffers[i], true, startOfRead*ptrSizes[i], ptrSizes[i]*sizeOfRead, (void *)(outptr[i]), 0, NULL, NULL );
1414         if ( err != CL_SUCCESS ){
1415             print_error( err, "clEnqueueReadBuffer failed" );
1416             clReleaseMemObject( buffers[i] );
1417             clReleaseKernel( kernel[i] );
1418             clReleaseProgram( program[i] );
1419             align_free( outptr[i] );
1420             return -1;
1421         }
1422 
1423         if ( verify_read_int( outptr[i], (int)sizeOfRead*(1<<i) ) ){
1424             log_error(" random size from %d, size: %d test failed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
1425             total_errors++;
1426         }
1427         else{
1428             log_info(" random size from %d, size: %d test passed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
1429         }
1430 
1431         // cleanup
1432         clReleaseMemObject( buffers[i] );
1433         clReleaseKernel( kernel[i] );
1434         clReleaseProgram( program[i] );
1435         align_free( outptr[i] );
1436     }
1437 
1438     return total_errors;
1439 
1440 }   // end testRandomReadSize()
1441 
1442 
test_buffer_read_random_size(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1443 int test_buffer_read_random_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1444 {
1445     int     err = 0;
1446     int     i;
1447     cl_uint start;
1448     size_t  size;
1449     MTdata  d = init_genrand( gRandomSeed );
1450 
1451     // now test for random sizes of array being read
1452     for ( i = 0; i < 8; i++ ){
1453         start = (cl_uint)get_random_float( 0.f, (float)(num_elements - 8), d );
1454         size = (size_t)get_random_float( 8.f, (float)(num_elements - start), d );
1455         if (testRandomReadSize( deviceID, context, queue, num_elements, start, size ))
1456             err++;
1457     }
1458 
1459     free_mtdata(d);
1460 
1461     return err;
1462 }
1463 
1464