• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <time.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <CL/cl_half.h>
25 
26 #include "procs.h"
27 
28 //#define HK_DO_NOT_RUN_SHORT_ASYNC    1
29 //#define HK_DO_NOT_RUN_USHORT_ASYNC    1
30 //#define HK_DO_NOT_RUN_CHAR_ASYNC    1
31 //#define HK_DO_NOT_RUN_UCHAR_ASYNC    1
32 
33 #define TEST_PRIME_INT        ((1<<16)+1)
34 #define TEST_PRIME_UINT        ((1U<<16)+1U)
35 #define TEST_PRIME_LONG        ((1LL<<32)+1LL)
36 #define TEST_PRIME_ULONG    ((1ULL<<32)+1ULL)
37 #define TEST_PRIME_SHORT    ((1S<<8)+1S)
38 #define TEST_PRIME_FLOAT    (float)3.40282346638528860e+38
39 #define TEST_PRIME_HALF        119.f
40 #define TEST_BOOL            true
41 #define TEST_PRIME_CHAR        0x77
42 
43 #ifndef ulong
44 typedef unsigned long ulong;
45 #endif
46 
47 #ifndef uchar
48 typedef unsigned char uchar;
49 #endif
50 
51 #ifndef TestStruct
52 typedef struct{
53     int     a;
54     float   b;
55 } TestStruct;
56 #endif
57 
58 //--- the code for the kernel executables
59 static const char *buffer_read_int_kernel_code[] = {
60     "__kernel void test_buffer_read_int(__global int *dst)\n"
61     "{\n"
62     "    int  tid = get_global_id(0);\n"
63     "\n"
64     "    dst[tid] = ((1<<16)+1);\n"
65     "}\n",
66 
67     "__kernel void test_buffer_read_int2(__global int2 *dst)\n"
68     "{\n"
69     "    int  tid = get_global_id(0);\n"
70     "\n"
71     "    dst[tid] = ((1<<16)+1);\n"
72     "}\n",
73 
74     "__kernel void test_buffer_read_int4(__global int4 *dst)\n"
75     "{\n"
76     "    int  tid = get_global_id(0);\n"
77     "\n"
78     "    dst[tid] = ((1<<16)+1);\n"
79     "}\n",
80 
81     "__kernel void test_buffer_read_int8(__global int8 *dst)\n"
82     "{\n"
83     "    int  tid = get_global_id(0);\n"
84     "\n"
85     "    dst[tid] = ((1<<16)+1);\n"
86     "}\n",
87 
88     "__kernel void test_buffer_read_int16(__global int16 *dst)\n"
89     "{\n"
90     "    int  tid = get_global_id(0);\n"
91     "\n"
92     "    dst[tid] = ((1<<16)+1);\n"
93     "}\n" };
94 
95 static const char *int_kernel_name[] = { "test_buffer_read_int", "test_buffer_read_int2", "test_buffer_read_int4", "test_buffer_read_int8", "test_buffer_read_int16" };
96 
97 static const char *buffer_read_uint_kernel_code[] = {
98     "__kernel void test_buffer_read_uint(__global uint *dst)\n"
99     "{\n"
100     "    int  tid = get_global_id(0);\n"
101     "\n"
102     "    dst[tid] = ((1U<<16)+1U);\n"
103     "}\n",
104 
105     "__kernel void test_buffer_read_uint2(__global uint2 *dst)\n"
106     "{\n"
107     "    int  tid = get_global_id(0);\n"
108     "\n"
109     "    dst[tid] = ((1U<<16)+1U);\n"
110     "}\n",
111 
112     "__kernel void test_buffer_read_uint4(__global uint4 *dst)\n"
113     "{\n"
114     "    int  tid = get_global_id(0);\n"
115     "\n"
116     "    dst[tid] = ((1U<<16)+1U);\n"
117     "}\n",
118 
119     "__kernel void test_buffer_read_uint8(__global uint8 *dst)\n"
120     "{\n"
121     "    int  tid = get_global_id(0);\n"
122     "\n"
123     "    dst[tid] = ((1U<<16)+1U);\n"
124     "}\n",
125 
126     "__kernel void test_buffer_read_uint16(__global uint16 *dst)\n"
127     "{\n"
128     "    int  tid = get_global_id(0);\n"
129     "\n"
130     "    dst[tid] = ((1U<<16)+1U);\n"
131     "}\n" };
132 
133 static const char *uint_kernel_name[] = { "test_buffer_read_uint", "test_buffer_read_uint2", "test_buffer_read_uint4", "test_buffer_read_uint8", "test_buffer_read_uint16" };
134 
135 static const char *buffer_read_long_kernel_code[] = {
136     "__kernel void test_buffer_read_long(__global long *dst)\n"
137     "{\n"
138     "    int  tid = get_global_id(0);\n"
139     "\n"
140     "    dst[tid] = ((1L<<32)+1L);\n"
141     "}\n",
142 
143     "__kernel void test_buffer_read_long2(__global long2 *dst)\n"
144     "{\n"
145     "    int  tid = get_global_id(0);\n"
146     "\n"
147     "    dst[tid] = ((1L<<32)+1L);\n"
148     "}\n",
149 
150     "__kernel void test_buffer_read_long4(__global long4 *dst)\n"
151     "{\n"
152     "    int  tid = get_global_id(0);\n"
153     "\n"
154     "    dst[tid] = ((1L<<32)+1L);\n"
155     "}\n",
156 
157     "__kernel void test_buffer_read_long8(__global long8 *dst)\n"
158     "{\n"
159     "    int  tid = get_global_id(0);\n"
160     "\n"
161     "    dst[tid] = ((1L<<32)+1L);\n"
162     "}\n",
163 
164     "__kernel void test_buffer_read_long16(__global long16 *dst)\n"
165     "{\n"
166     "    int  tid = get_global_id(0);\n"
167     "\n"
168     "    dst[tid] = ((1L<<32)+1L);\n"
169     "}\n" };
170 
171 static const char *long_kernel_name[] = { "test_buffer_read_long", "test_buffer_read_long2", "test_buffer_read_long4", "test_buffer_read_long8", "test_buffer_read_long16" };
172 
173 static const char *buffer_read_ulong_kernel_code[] = {
174     "__kernel void test_buffer_read_ulong(__global ulong *dst)\n"
175     "{\n"
176     "    int  tid = get_global_id(0);\n"
177     "\n"
178     "    dst[tid] = ((1UL<<32)+1UL);\n"
179     "}\n",
180 
181     "__kernel void test_buffer_read_ulong2(__global ulong2 *dst)\n"
182     "{\n"
183     "    int  tid = get_global_id(0);\n"
184     "\n"
185     "    dst[tid] = ((1UL<<32)+1UL);\n"
186     "}\n",
187 
188     "__kernel void test_buffer_read_ulong4(__global ulong4 *dst)\n"
189     "{\n"
190     "    int  tid = get_global_id(0);\n"
191     "\n"
192     "    dst[tid] = ((1UL<<32)+1UL);\n"
193     "}\n",
194 
195     "__kernel void test_buffer_read_ulong8(__global ulong8 *dst)\n"
196     "{\n"
197     "    int  tid = get_global_id(0);\n"
198     "\n"
199     "    dst[tid] = ((1UL<<32)+1UL);\n"
200     "}\n",
201 
202     "__kernel void test_buffer_read_ulong16(__global ulong16 *dst)\n"
203     "{\n"
204     "    int  tid = get_global_id(0);\n"
205     "\n"
206     "    dst[tid] = ((1UL<<32)+1UL);\n"
207     "}\n" };
208 
209 static const char *ulong_kernel_name[] = { "test_buffer_read_ulong", "test_buffer_read_ulong2", "test_buffer_read_ulong4", "test_buffer_read_ulong8", "test_buffer_read_ulong16" };
210 
211 static const char *buffer_read_short_kernel_code[] = {
212     "__kernel void test_buffer_read_short(__global short *dst)\n"
213     "{\n"
214     "    int  tid = get_global_id(0);\n"
215     "\n"
216     "    dst[tid] = (short)((1<<8)+1);\n"
217     "}\n",
218 
219     "__kernel void test_buffer_read_short2(__global short2 *dst)\n"
220     "{\n"
221     "    int  tid = get_global_id(0);\n"
222     "\n"
223     "    dst[tid] = (short)((1<<8)+1);\n"
224     "}\n",
225 
226     "__kernel void test_buffer_read_short4(__global short4 *dst)\n"
227     "{\n"
228     "    int  tid = get_global_id(0);\n"
229     "\n"
230     "    dst[tid] = (short)((1<<8)+1);\n"
231     "}\n",
232 
233     "__kernel void test_buffer_read_short8(__global short8 *dst)\n"
234     "{\n"
235     "    int  tid = get_global_id(0);\n"
236     "\n"
237     "    dst[tid] = (short)((1<<8)+1);\n"
238     "}\n",
239 
240     "__kernel void test_buffer_read_short16(__global short16 *dst)\n"
241     "{\n"
242     "    int  tid = get_global_id(0);\n"
243     "\n"
244     "    dst[tid] = (short)((1<<8)+1);\n"
245     "}\n" };
246 
247 static const char *short_kernel_name[] = { "test_buffer_read_short", "test_buffer_read_short2", "test_buffer_read_short4", "test_buffer_read_short8", "test_buffer_read_short16" };
248 
249 
250 static const char *buffer_read_ushort_kernel_code[] = {
251     "__kernel void test_buffer_read_ushort(__global ushort *dst)\n"
252     "{\n"
253     "    int  tid = get_global_id(0);\n"
254     "\n"
255     "    dst[tid] = (ushort)((1<<8)+1);\n"
256     "}\n",
257 
258     "__kernel void test_buffer_read_ushort2(__global ushort2 *dst)\n"
259     "{\n"
260     "    int  tid = get_global_id(0);\n"
261     "\n"
262     "    dst[tid] = (ushort)((1<<8)+1);\n"
263     "}\n",
264 
265     "__kernel void test_buffer_read_ushort4(__global ushort4 *dst)\n"
266     "{\n"
267     "    int  tid = get_global_id(0);\n"
268     "\n"
269     "    dst[tid] = (ushort)((1<<8)+1);\n"
270     "}\n",
271 
272     "__kernel void test_buffer_read_ushort8(__global ushort8 *dst)\n"
273     "{\n"
274     "    int  tid = get_global_id(0);\n"
275     "\n"
276     "    dst[tid] = (ushort)((1<<8)+1);\n"
277     "}\n",
278 
279     "__kernel void test_buffer_read_ushort16(__global ushort16 *dst)\n"
280     "{\n"
281     "    int  tid = get_global_id(0);\n"
282     "\n"
283     "    dst[tid] = (ushort)((1<<8)+1);\n"
284     "}\n" };
285 
286 static const char *ushort_kernel_name[] = { "test_buffer_read_ushort", "test_buffer_read_ushort2", "test_buffer_read_ushort4", "test_buffer_read_ushort8", "test_buffer_read_ushort16" };
287 
288 
289 static const char *buffer_read_float_kernel_code[] = {
290     "__kernel void test_buffer_read_float(__global float *dst)\n"
291     "{\n"
292     "    int  tid = get_global_id(0);\n"
293     "\n"
294     "    dst[tid] = (float)3.40282346638528860e+38;\n"
295     "}\n",
296 
297     "__kernel void test_buffer_read_float2(__global float2 *dst)\n"
298     "{\n"
299     "    int  tid = get_global_id(0);\n"
300     "\n"
301     "    dst[tid] = (float)3.40282346638528860e+38;\n"
302     "}\n",
303 
304     "__kernel void test_buffer_read_float4(__global float4 *dst)\n"
305     "{\n"
306     "    int  tid = get_global_id(0);\n"
307     "\n"
308     "    dst[tid] = (float)3.40282346638528860e+38;\n"
309     "}\n",
310 
311     "__kernel void test_buffer_read_float8(__global float8 *dst)\n"
312     "{\n"
313     "    int  tid = get_global_id(0);\n"
314     "\n"
315     "    dst[tid] = (float)3.40282346638528860e+38;\n"
316     "}\n",
317 
318     "__kernel void test_buffer_read_float16(__global float16 *dst)\n"
319     "{\n"
320     "    int  tid = get_global_id(0);\n"
321     "\n"
322     "    dst[tid] = (float)3.40282346638528860e+38;\n"
323     "}\n" };
324 
325 static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffer_read_float2", "test_buffer_read_float4", "test_buffer_read_float8", "test_buffer_read_float16" };
326 
327 
328 static const char *buffer_read_half_kernel_code[] = {
329     "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
330     "__kernel void test_buffer_read_half(__global half *dst)\n"
331     "{\n"
332     "    int  tid = get_global_id(0);\n"
333     "\n"
334     "    dst[tid] = (half)119;\n"
335     "}\n",
336 
337     "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
338     "__kernel void test_buffer_read_half2(__global half2 *dst)\n"
339     "{\n"
340     "    int  tid = get_global_id(0);\n"
341     "\n"
342     "    dst[tid] = (half)119;\n"
343     "}\n",
344 
345     "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
346     "__kernel void test_buffer_read_half4(__global half4 *dst)\n"
347     "{\n"
348     "    int  tid = get_global_id(0);\n"
349     "\n"
350     "    dst[tid] = (half)119;\n"
351     "}\n",
352 
353     "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
354     "__kernel void test_buffer_read_half8(__global half8 *dst)\n"
355     "{\n"
356     "    int  tid = get_global_id(0);\n"
357     "\n"
358     "    dst[tid] = (half)119;\n"
359     "}\n",
360 
361     "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
362     "__kernel void test_buffer_read_half16(__global half16 *dst)\n"
363     "{\n"
364     "    int  tid = get_global_id(0);\n"
365     "\n"
366     "    dst[tid] = (half)119;\n"
367     "}\n"
368 };
369 
370 static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" };
371 
372 
373 static const char *buffer_read_char_kernel_code[] = {
374     "__kernel void test_buffer_read_char(__global char *dst)\n"
375     "{\n"
376     "    int  tid = get_global_id(0);\n"
377     "\n"
378     "    dst[tid] = (char)'w';\n"
379     "}\n",
380 
381     "__kernel void test_buffer_read_char2(__global char2 *dst)\n"
382     "{\n"
383     "    int  tid = get_global_id(0);\n"
384     "\n"
385     "    dst[tid] = (char)'w';\n"
386     "}\n",
387 
388     "__kernel void test_buffer_read_char4(__global char4 *dst)\n"
389     "{\n"
390     "    int  tid = get_global_id(0);\n"
391     "\n"
392     "    dst[tid] = (char)'w';\n"
393     "}\n",
394 
395     "__kernel void test_buffer_read_char8(__global char8 *dst)\n"
396     "{\n"
397     "    int  tid = get_global_id(0);\n"
398     "\n"
399     "    dst[tid] = (char)'w';\n"
400     "}\n",
401 
402     "__kernel void test_buffer_read_char16(__global char16 *dst)\n"
403     "{\n"
404     "    int  tid = get_global_id(0);\n"
405     "\n"
406     "    dst[tid] = (char)'w';\n"
407     "}\n" };
408 
409 static const char *char_kernel_name[] = { "test_buffer_read_char", "test_buffer_read_char2", "test_buffer_read_char4", "test_buffer_read_char8", "test_buffer_read_char16" };
410 
411 
412 static const char *buffer_read_uchar_kernel_code[] = {
413     "__kernel void test_buffer_read_uchar(__global uchar *dst)\n"
414     "{\n"
415     "    int  tid = get_global_id(0);\n"
416     "\n"
417     "    dst[tid] = 'w';\n"
418     "}\n",
419 
420     "__kernel void test_buffer_read_uchar2(__global uchar2 *dst)\n"
421     "{\n"
422     "    int  tid = get_global_id(0);\n"
423     "\n"
424     "    dst[tid] = (uchar)'w';\n"
425     "}\n",
426 
427     "__kernel void test_buffer_read_uchar4(__global uchar4 *dst)\n"
428     "{\n"
429     "    int  tid = get_global_id(0);\n"
430     "\n"
431     "    dst[tid] = (uchar)'w';\n"
432     "}\n",
433 
434     "__kernel void test_buffer_read_uchar8(__global uchar8 *dst)\n"
435     "{\n"
436     "    int  tid = get_global_id(0);\n"
437     "\n"
438     "    dst[tid] = (uchar)'w';\n"
439     "}\n",
440 
441     "__kernel void test_buffer_read_uchar16(__global uchar16 *dst)\n"
442     "{\n"
443     "    int  tid = get_global_id(0);\n"
444     "\n"
445     "    dst[tid] = (uchar)'w';\n"
446     "}\n" };
447 
448 static const char *uchar_kernel_name[] = { "test_buffer_read_uchar", "test_buffer_read_uchar2", "test_buffer_read_uchar4", "test_buffer_read_uchar8", "test_buffer_read_uchar16" };
449 
450 
451 static const char *buffer_read_struct_kernel_code =
452 "typedef struct{\n"
453 "int    a;\n"
454 "float    b;\n"
455 "} TestStruct;\n"
456 "__kernel void test_buffer_read_struct(__global TestStruct *dst)\n"
457 "{\n"
458 "    int  tid = get_global_id(0);\n"
459 "\n"
460 "    dst[tid].a = ((1<<16)+1);\n"
461 "     dst[tid].b = (float)3.40282346638528860e+38;\n"
462 "}\n";
463 
464 
465 //--- the verify functions
verify_read_int(void * ptr,int n)466 static int verify_read_int(void *ptr, int n)
467 {
468     int     i;
469     cl_int  *outptr = (cl_int *)ptr;
470 
471     for (i=0; i<n; i++){
472         if ( outptr[i] != TEST_PRIME_INT )
473             return -1;
474     }
475 
476     return 0;
477 }
478 
479 
verify_read_uint(void * ptr,int n)480 static int verify_read_uint(void *ptr, int n)
481 {
482     int     i;
483     cl_uint *outptr = (cl_uint *)ptr;
484 
485     for (i=0; i<n; i++){
486         if ( outptr[i] != TEST_PRIME_UINT )
487             return -1;
488     }
489 
490     return 0;
491 }
492 
493 
verify_read_long(void * ptr,int n)494 static int verify_read_long(void *ptr, int n)
495 {
496     int     i;
497     cl_long *outptr = (cl_long *)ptr;
498 
499     for (i=0; i<n; i++){
500         if ( outptr[i] != TEST_PRIME_LONG )
501             return -1;
502     }
503 
504     return 0;
505 }
506 
507 
verify_read_ulong(void * ptr,int n)508 static int verify_read_ulong(void *ptr, int n)
509 {
510     int      i;
511     cl_ulong *outptr = (cl_ulong *)ptr;
512 
513     for (i=0; i<n; i++){
514         if ( outptr[i] != TEST_PRIME_ULONG )
515             return -1;
516     }
517 
518     return 0;
519 }
520 
521 
verify_read_short(void * ptr,int n)522 static int verify_read_short(void *ptr, int n)
523 {
524     int      i;
525     cl_short *outptr = (cl_short *)ptr;
526 
527     for (i=0; i<n; i++){
528         if ( outptr[i] != (cl_short)((1<<8)+1) )
529             return -1;
530     }
531 
532     return 0;
533 }
534 
535 
verify_read_ushort(void * ptr,int n)536 static int verify_read_ushort(void *ptr, int n)
537 {
538     int       i;
539     cl_ushort *outptr = (cl_ushort *)ptr;
540 
541     for (i=0; i<n; i++){
542         if ( outptr[i] != (cl_ushort)((1<<8)+1) )
543             return -1;
544     }
545 
546     return 0;
547 }
548 
549 
verify_read_float(void * ptr,int n)550 static int verify_read_float( void *ptr, int n )
551 {
552     int      i;
553     cl_float *outptr = (cl_float *)ptr;
554 
555     for (i=0; i<n; i++){
556         if ( outptr[i] != TEST_PRIME_FLOAT )
557             return -1;
558     }
559 
560     return 0;
561 }
562 
563 
verify_read_half(void * ptr,int n)564 static int verify_read_half( void *ptr, int n )
565 {
566     int     i;
567     cl_half *outptr = (cl_half *)ptr;
568 
569     for (i = 0; i < n; i++)
570     {
571         if (cl_half_to_float(outptr[i]) != TEST_PRIME_HALF) return -1;
572     }
573 
574     return 0;
575 }
576 
577 
verify_read_char(void * ptr,int n)578 static int verify_read_char(void *ptr, int n)
579 {
580     int     i;
581     cl_char *outptr = (cl_char *)ptr;
582 
583     for (i=0; i<n; i++){
584         if ( outptr[i] != TEST_PRIME_CHAR )
585             return -1;
586     }
587 
588     return 0;
589 }
590 
591 
verify_read_uchar(void * ptr,int n)592 static int verify_read_uchar(void *ptr, int n)
593 {
594     int      i;
595     cl_uchar *outptr = (cl_uchar *)ptr;
596 
597     for (i=0; i<n; i++){
598         if ( outptr[i] != TEST_PRIME_CHAR )
599             return -1;
600     }
601 
602     return 0;
603 }
604 
605 
verify_read_struct(TestStruct * outptr,int n)606 static int verify_read_struct(TestStruct *outptr, int n)
607 {
608     int     i;
609 
610     for (i=0; i<n; i++)
611     {
612         if ( ( outptr[i].a != TEST_PRIME_INT ) ||
613              ( outptr[i].b != TEST_PRIME_FLOAT ) )
614             return -1;
615     }
616 
617     return 0;
618 }
619 
620 //----- the test functions
test_buffer_read(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))621 int test_buffer_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
622                       const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
623 {
624     void        *outptr[5];
625     void        *inptr[5];
626     clProgramWrapper program[5];
627     clKernelWrapper kernel[5];
628     size_t      global_work_size[3];
629     cl_int      err;
630     int         i;
631     size_t      ptrSizes[5];
632     int         src_flag_id;
633     int         total_errors = 0;
634 
635     size_t      min_alignment = get_min_alignment(context);
636 
637     global_work_size[0] = (cl_uint)num_elements;
638 
639     ptrSizes[0] = size;
640     ptrSizes[1] = ptrSizes[0] << 1;
641     ptrSizes[2] = ptrSizes[1] << 1;
642     ptrSizes[3] = ptrSizes[2] << 1;
643     ptrSizes[4] = ptrSizes[3] << 1;
644 
645     //skip devices that don't support long
646     if (! gHasLong && strstr(type,"long") )
647     {
648         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
649         return CL_SUCCESS;
650     }
651 
652     for (i = 0; i < loops; i++)
653     {
654 
655         err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
656                                           &kernelCode[i], kernelName[i]);
657         if (err)
658         {
659             log_error("Creating program for %s\n", type);
660             print_error(err, " Error creating program ");
661             return -1;
662         }
663 
664         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
665         {
666             clMemWrapper buffer;
667             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
668             if ( ! outptr[i] ){
669                 log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
670                 return -1;
671             }
672             inptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
673             if ( ! inptr[i] ){
674                 log_error( " unable to allocate %d bytes for inptr\n", (int)( ptrSizes[i] * num_elements ) );
675                 return -1;
676             }
677 
678 
679             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
680                 buffer =
681                     clCreateBuffer(context, flag_set[src_flag_id],
682                                    ptrSizes[i] * num_elements, inptr[i], &err);
683             else
684                 buffer = clCreateBuffer(context, flag_set[src_flag_id],
685                                         ptrSizes[i] * num_elements, NULL, &err);
686             if (err != CL_SUCCESS)
687             {
688                 print_error(err, " clCreateBuffer failed\n" );
689                 align_free( outptr[i] );
690                 align_free( inptr[i] );
691                 return -1;
692             }
693 
694             err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
695             if ( err != CL_SUCCESS ){
696                 print_error( err, "clSetKernelArg failed" );
697                 align_free( outptr[i] );
698                 align_free( inptr[i] );
699                 return -1;
700             }
701 
702             err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL,
703                                          global_work_size, NULL, 0, NULL, NULL);
704             if ( err != CL_SUCCESS ){
705                 print_error( err, "clEnqueueNDRangeKernel failed" );
706                 align_free( outptr[i] );
707                 align_free( inptr[i] );
708                 return -1;
709             }
710 
711             err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
712                                       ptrSizes[i] * num_elements, outptr[i], 0,
713                                       NULL, NULL);
714             if ( err != CL_SUCCESS ){
715                 print_error( err, "clEnqueueReadBuffer failed" );
716                 align_free( outptr[i] );
717                 align_free( inptr[i] );
718                 return -1;
719             }
720 
721             if (fn(outptr[i], num_elements*(1<<i))){
722                 log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
723                           1 << i, flag_set_names[src_flag_id]);
724                 total_errors++;
725             }
726             else{
727                 log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
728                          1 << i, flag_set_names[src_flag_id]);
729             }
730 
731             err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
732                                       ptrSizes[i] * num_elements, inptr[i], 0,
733                                       NULL, NULL);
734             if (err != CL_SUCCESS)
735             {
736                 print_error( err, "clEnqueueReadBuffer failed" );
737                 align_free( outptr[i] );
738                 align_free( inptr[i] );
739                 return -1;
740             }
741 
742             if (fn(inptr[i], num_elements*(1<<i))){
743                 log_error( " %s%d test failed in-place readback\n", type, 1<<i );
744                 total_errors++;
745             }
746             else{
747                 log_info( " %s%d test passed in-place readback\n", type, 1<<i );
748             }
749 
750 
751             // cleanup
752             align_free( outptr[i] );
753             align_free( inptr[i] );
754         }
755     } // mem flag
756 
757     return total_errors;
758 
759 }   // end test_buffer_read()
760 
test_buffer_read_async(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))761 int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
762                             const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
763 {
764     clProgramWrapper program[5];
765     clKernelWrapper kernel[5];
766     clEventWrapper event;
767     void        *outptr[5];
768     void        *inptr[5];
769     size_t      global_work_size[3];
770     cl_int      err;
771     int         i;
772     size_t      lastIndex;
773     size_t      ptrSizes[5];
774     int         src_flag_id;
775     int         total_errors = 0;
776 
777     size_t      min_alignment = get_min_alignment(context);
778 
779     global_work_size[0] = (cl_uint)num_elements;
780 
781     ptrSizes[0] = size;
782     ptrSizes[1] = ptrSizes[0] << 1;
783     ptrSizes[2] = ptrSizes[1] << 1;
784     ptrSizes[3] = ptrSizes[2] << 1;
785     ptrSizes[4] = ptrSizes[3] << 1;
786 
787     //skip devices that don't support long
788     if (! gHasLong && strstr(type,"long") )
789     {
790         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
791         return CL_SUCCESS;
792     }
793 
794     for (i = 0; i < loops; i++)
795     {
796 
797         err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
798                                           &kernelCode[i], kernelName[i]);
799         if (err)
800         {
801             log_error(" Error creating program for %s\n", type);
802             return -1;
803         }
804 
805         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
806         {
807             clMemWrapper buffer;
808             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
809             if ( ! outptr[i] ){
810                 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
811                 return -1;
812             }
813             memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
814             inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
815             if ( ! inptr[i] ){
816                 log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
817                 return -1;
818             }
819             memset( inptr[i], 0, ptrSizes[i] * num_elements );  // initialize to zero to tell difference
820 
821 
822             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
823                 buffer =
824                     clCreateBuffer(context, flag_set[src_flag_id],
825                                    ptrSizes[i] * num_elements, inptr[i], &err);
826             else
827                 buffer = clCreateBuffer(context, flag_set[src_flag_id],
828                                         ptrSizes[i] * num_elements, NULL, &err);
829             if ( err != CL_SUCCESS ){
830                 print_error(err, " clCreateBuffer failed\n" );
831                 align_free( outptr[i] );
832                 align_free( inptr[i] );
833                 return -1;
834             }
835 
836             err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
837             if ( err != CL_SUCCESS ){
838                 print_error( err, "clSetKernelArg failed" );
839                 align_free( outptr[i] );
840                 align_free( inptr[i] );
841                 return -1;
842             }
843 
844             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
845             if ( err != CL_SUCCESS ){
846                 print_error( err, "clEnqueueNDRangeKernel failed" );
847                 align_free( outptr[i] );
848                 align_free( inptr[i] );
849                 return -1;
850             }
851 
852             lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
853             err = clEnqueueReadBuffer(queue, buffer, false, 0,
854                                       ptrSizes[i] * num_elements, outptr[i], 0,
855                                       NULL, &event);
856 #ifdef CHECK_FOR_NON_WAIT
857             if ( ((uchar *)outptr[i])[lastIndex] ){
858                 log_error( "    clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
859                 log_error( "    Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
860             }
861 #endif
862             if ( err != CL_SUCCESS ){
863                 print_error( err, "clEnqueueReadBuffer failed" );
864                 align_free( outptr[i] );
865                 align_free( inptr[i] );
866                 return -1;
867             }
868             err = clWaitForEvents(1, &event );
869             if ( err != CL_SUCCESS ){
870                 print_error( err, "clWaitForEvents() failed" );
871                 align_free( outptr[i] );
872                 align_free( inptr[i] );
873                 return -1;
874             }
875 
876             if ( fn(outptr[i], num_elements*(1<<i)) ){
877                 log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
878                           1 << i, flag_set_names[src_flag_id]);
879                 total_errors++;
880             }
881             else{
882                 log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
883                          1 << i, flag_set_names[src_flag_id]);
884             }
885 
886             // cleanup
887             align_free( outptr[i] );
888             align_free( inptr[i] );
889         }
890     } // mem flags
891 
892 
893     return total_errors;
894 
895 }   // end test_buffer_read_array_async()
896 
897 
test_buffer_read_array_barrier(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))898 int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
899                                     const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
900 {
901     clProgramWrapper program[5];
902     clKernelWrapper kernel[5];
903     clEventWrapper event;
904     void        *outptr[5], *inptr[5];
905     size_t      global_work_size[3];
906     cl_int      err;
907     int         i;
908     size_t      lastIndex;
909     size_t      ptrSizes[5];
910     int         src_flag_id;
911     int         total_errors = 0;
912 
913     size_t min_alignment = get_min_alignment(context);
914 
915     global_work_size[0] = (cl_uint)num_elements;
916 
917     ptrSizes[0] = size;
918     ptrSizes[1] = ptrSizes[0] << 1;
919     ptrSizes[2] = ptrSizes[1] << 1;
920     ptrSizes[3] = ptrSizes[2] << 1;
921     ptrSizes[4] = ptrSizes[3] << 1;
922 
923     //skip devices that don't support long
924     if (! gHasLong && strstr(type,"long") )
925     {
926         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
927         return CL_SUCCESS;
928     }
929 
930     for (i = 0; i < loops; i++)
931     {
932 
933         err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
934                                           &kernelCode[i], kernelName[i]);
935         if (err)
936         {
937             log_error(" Error creating program for %s\n", type);
938             return -1;
939         }
940 
941         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
942         {
943             clMemWrapper buffer;
944             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
945             if ( ! outptr[i] ){
946                 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
947                 return -1;
948             }
949             memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
950             inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
951             if ( ! inptr[i] ){
952                 log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
953                 return -1;
954             }
955             memset( inptr[i], 0, ptrSizes[i] * num_elements );  // initialize to zero to tell difference
956 
957             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
958                 buffer =
959                     clCreateBuffer(context, flag_set[src_flag_id],
960                                    ptrSizes[i] * num_elements, inptr[i], &err);
961             else
962                 buffer = clCreateBuffer(context, flag_set[src_flag_id],
963                                         ptrSizes[i] * num_elements, NULL, &err);
964             if ( err != CL_SUCCESS ){
965                 print_error(err, " clCreateBuffer failed\n" );
966                 align_free( outptr[i] );
967                 align_free( inptr[i] );
968                 return -1;
969             }
970 
971             err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
972             if ( err != CL_SUCCESS ){
973                 print_error( err, "clSetKernelArgs failed" );
974                 align_free( outptr[i] );
975                 align_free( inptr[i] );
976                 return -1;
977             }
978 
979             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
980             if ( err != CL_SUCCESS ){
981                 print_error( err, "clEnqueueNDRangeKernel failed" );
982                 align_free( outptr[i] );
983                 align_free( inptr[i] );
984                 return -1;
985             }
986 
987             lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
988             err = clEnqueueReadBuffer(queue, buffer, false, 0,
989                                       ptrSizes[i] * num_elements,
990                                       (void *)(outptr[i]), 0, NULL, &event);
991 #ifdef CHECK_FOR_NON_WAIT
992             if ( ((uchar *)outptr[i])[lastIndex] ){
993                 log_error( "    clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
994                 log_error( "    Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
995             }
996 #endif
997             if ( err != CL_SUCCESS ){
998                 print_error( err, "clEnqueueReadBuffer failed" );
999                 align_free( outptr[i] );
1000                 align_free( inptr[i] );
1001                 return -1;
1002             }
1003             err = clEnqueueBarrierWithWaitList(queue, 0, NULL, NULL);
1004             if ( err != CL_SUCCESS ){
1005                 print_error( err, "clEnqueueBarrierWithWaitList() failed" );
1006                 align_free( outptr[i] );
1007                 return -1;
1008             }
1009 
1010             err = clWaitForEvents(1, &event);
1011             if ( err != CL_SUCCESS ){
1012                 print_error( err, "clWaitForEvents() failed" );
1013                 align_free( outptr[i] );
1014                 align_free( inptr[i] );
1015                 return -1;
1016             }
1017 
1018             if ( fn(outptr[i], num_elements*(1<<i)) ){
1019                 log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
1020                           1 << i, flag_set_names[src_flag_id]);
1021                 total_errors++;
1022             }
1023             else{
1024                 log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
1025                          1 << i, flag_set_names[src_flag_id]);
1026             }
1027 
1028             // cleanup
1029             align_free( outptr[i] );
1030             align_free( inptr[i] );
1031         }
1032     } // cl_mem flags
1033     return total_errors;
1034 
1035 }   // end test_buffer_read_array_barrier()
1036 
1037 
1038 #define DECLARE_READ_TEST(type, realType) \
1039 int test_buffer_read_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )    \
1040 { \
1041 return test_buffer_read( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1042 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1043 }
1044 
DECLARE_READ_TEST(int,cl_int)1045 DECLARE_READ_TEST(int, cl_int)
1046 DECLARE_READ_TEST(uint, cl_uint)
1047 DECLARE_READ_TEST(long, cl_long)
1048 DECLARE_READ_TEST(ulong, cl_ulong)
1049 DECLARE_READ_TEST(short, cl_short)
1050 DECLARE_READ_TEST(ushort, cl_ushort)
1051 DECLARE_READ_TEST(float, cl_float)
1052 DECLARE_READ_TEST(char, cl_char)
1053 DECLARE_READ_TEST(uchar, cl_uchar)
1054 
1055 int test_buffer_read_half(cl_device_id deviceID, cl_context context,
1056                           cl_command_queue queue, int num_elements)
1057 {
1058     PASSIVE_REQUIRE_FP16_SUPPORT(deviceID)
1059     return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5,
1060                              buffer_read_half_kernel_code, half_kernel_name, verify_read_half );
1061 }
1062 
1063 
1064 #define DECLARE_ASYNC_TEST(type, realType) \
1065 int test_buffer_read_async_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )    \
1066 { \
1067 return test_buffer_read_async( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1068 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1069 }
1070 
DECLARE_ASYNC_TEST(char,cl_char)1071 DECLARE_ASYNC_TEST(char, cl_char)
1072 DECLARE_ASYNC_TEST(uchar, cl_uchar)
1073 DECLARE_ASYNC_TEST(short, cl_short)
1074 DECLARE_ASYNC_TEST(ushort, cl_ushort)
1075 DECLARE_ASYNC_TEST(int, cl_int)
1076 DECLARE_ASYNC_TEST(uint, cl_uint)
1077 DECLARE_ASYNC_TEST(long, cl_long)
1078 DECLARE_ASYNC_TEST(ulong, cl_ulong)
1079 DECLARE_ASYNC_TEST(float, cl_float)
1080 
1081 
1082 #define DECLARE_BARRIER_TEST(type, realType) \
1083 int test_buffer_read_array_barrier_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )    \
1084 { \
1085 return test_buffer_read_array_barrier( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1086 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1087 }
1088 
1089 DECLARE_BARRIER_TEST(int, cl_int)
1090 DECLARE_BARRIER_TEST(uint, cl_uint)
1091 DECLARE_BARRIER_TEST(long, cl_long)
1092 DECLARE_BARRIER_TEST(ulong, cl_ulong)
1093 DECLARE_BARRIER_TEST(short, cl_short)
1094 DECLARE_BARRIER_TEST(ushort, cl_ushort)
1095 DECLARE_BARRIER_TEST(char, cl_char)
1096 DECLARE_BARRIER_TEST(uchar, cl_uchar)
1097 DECLARE_BARRIER_TEST(float, cl_float)
1098 
1099 int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1100 {
1101     cl_mem      buffers[1];
1102     TestStruct  *output_ptr;
1103     cl_program  program[1];
1104     cl_kernel   kernel[1];
1105     size_t      global_work_size[3];
1106     cl_int      err;
1107     size_t      objSize = sizeof(TestStruct);
1108 
1109     size_t      min_alignment = get_min_alignment(context);
1110 
1111     global_work_size[0] = (cl_uint)num_elements;
1112 
1113     output_ptr = (TestStruct*)align_malloc(objSize * num_elements, min_alignment);
1114     if ( ! output_ptr ){
1115         log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
1116         return -1;
1117     }
1118     buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
1119                                 objSize * num_elements, NULL, &err);
1120     if ( err != CL_SUCCESS ){
1121         print_error( err, " clCreateBuffer failed\n" );
1122         align_free( output_ptr );
1123         return -1;
1124     }
1125 
1126     err = create_single_kernel_helper(  context, &program[0], &kernel[0], 1, &buffer_read_struct_kernel_code, "test_buffer_read_struct" );
1127     if ( err ){
1128         clReleaseProgram( program[0] );
1129         align_free( output_ptr );
1130         return -1;
1131     }
1132 
1133     err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
1134     if ( err != CL_SUCCESS){
1135         print_error( err, "clSetKernelArg failed" );
1136         clReleaseMemObject( buffers[0] );
1137         clReleaseKernel( kernel[0] );
1138         clReleaseProgram( program[0] );
1139         align_free( output_ptr );
1140         return -1;
1141     }
1142 
1143     err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1144     if ( err != CL_SUCCESS ){
1145         print_error( err, "clEnqueueNDRangeKernel failed" );
1146         clReleaseMemObject( buffers[0] );
1147         clReleaseKernel( kernel[0] );
1148         clReleaseProgram( program[0] );
1149         align_free( output_ptr );
1150         return -1;
1151     }
1152 
1153     err = clEnqueueReadBuffer( queue, buffers[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
1154     if ( err != CL_SUCCESS){
1155         print_error( err, "clEnqueueReadBuffer failed" );
1156         clReleaseMemObject( buffers[0] );
1157         clReleaseKernel( kernel[0] );
1158         clReleaseProgram( program[0] );
1159         align_free( output_ptr );
1160         return -1;
1161     }
1162 
1163     if (verify_read_struct(output_ptr, num_elements)){
1164         log_error(" struct test failed\n");
1165         err = -1;
1166     }
1167     else{
1168         log_info(" struct test passed\n");
1169         err = 0;
1170     }
1171 
1172     // cleanup
1173     clReleaseMemObject( buffers[0] );
1174     clReleaseKernel( kernel[0] );
1175     clReleaseProgram( program[0] );
1176     align_free( output_ptr );
1177 
1178     return err;
1179 }
1180 
1181 
testRandomReadSize(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,cl_uint startOfRead,size_t sizeOfRead)1182 static int testRandomReadSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_uint startOfRead, size_t sizeOfRead )
1183 {
1184     cl_mem      buffers[3];
1185     int         *outptr[3];
1186     cl_program  program[3];
1187     cl_kernel   kernel[3];
1188     size_t      global_work_size[3];
1189     cl_int      err;
1190     int         i, j;
1191     size_t      ptrSizes[3];    // sizeof(int), sizeof(int2), sizeof(int4)
1192     int         total_errors = 0;
1193     size_t      min_alignment = get_min_alignment(context);
1194 
1195     global_work_size[0] = (cl_uint)num_elements;
1196 
1197     ptrSizes[0] = sizeof(cl_int);
1198     ptrSizes[1] = ptrSizes[0] << 1;
1199     ptrSizes[2] = ptrSizes[1] << 1;
1200     for ( i = 0; i < 3; i++ ){
1201         outptr[i] = (int *)align_malloc( ptrSizes[i] * num_elements, min_alignment);
1202         if ( ! outptr[i] ){
1203             log_error( " Unable to allocate %d bytes for outptr[%d]\n", (int)(ptrSizes[i] * num_elements), i );
1204             for ( j = 0; j < i; j++ ){
1205                 clReleaseMemObject( buffers[j] );
1206                 align_free( outptr[j] );
1207             }
1208             return -1;
1209         }
1210         buffers[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
1211                                     ptrSizes[i] * num_elements, NULL, &err);
1212         if ( err != CL_SUCCESS ){
1213             print_error(err, " clCreateBuffer failed\n" );
1214             for ( j = 0; j < i; j++ ){
1215                 clReleaseMemObject( buffers[j] );
1216                 align_free( outptr[j] );
1217             }
1218             align_free( outptr[i] );
1219             return -1;
1220         }
1221     }
1222 
1223     err = create_single_kernel_helper(  context, &program[0], &kernel[0], 1, &buffer_read_int_kernel_code[0], "test_buffer_read_int" );
1224     if ( err ){
1225         log_error( " Error creating program for int\n" );
1226         for ( i = 0; i < 3; i++ ){
1227             clReleaseMemObject( buffers[i] );
1228             align_free( outptr[i] );
1229         }
1230         return -1;
1231     }
1232 
1233     err = create_single_kernel_helper(  context, &program[1], &kernel[1], 1, &buffer_read_int_kernel_code[1], "test_buffer_read_int2" );
1234     if ( err ){
1235         log_error( " Error creating program for int2\n" );
1236         clReleaseKernel( kernel[0] );
1237         clReleaseProgram( program[0] );
1238         for ( i = 0; i < 3; i++ ){
1239             clReleaseMemObject( buffers[i] );
1240             align_free( outptr[i] );
1241         }
1242         return -1;
1243     }
1244 
1245     err = create_single_kernel_helper(  context, &program[2], &kernel[2], 1, &buffer_read_int_kernel_code[2], "test_buffer_read_int4" );
1246     if ( err ){
1247         log_error( " Error creating program for int4\n" );
1248         clReleaseKernel( kernel[0] );
1249         clReleaseProgram( program[0] );
1250         clReleaseKernel( kernel[1] );
1251         clReleaseProgram( program[1] );
1252         for ( i = 0; i < 3; i++ ){
1253             clReleaseMemObject( buffers[i] );
1254             align_free( outptr[i] );
1255         }
1256         return -1;
1257     }
1258 
1259     for (i=0; i<3; i++){
1260         err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
1261         if ( err != CL_SUCCESS ){
1262             print_error( err, "clSetKernelArgs failed" );
1263             clReleaseMemObject( buffers[i] );
1264             clReleaseKernel( kernel[i] );
1265             clReleaseProgram( program[i] );
1266             align_free( outptr[i] );
1267             return -1;
1268         }
1269 
1270         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1271         if ( err != CL_SUCCESS ){
1272             print_error( err, "clEnqueueNDRangeKernel failed" );
1273             clReleaseMemObject( buffers[i] );
1274             clReleaseKernel( kernel[i] );
1275             clReleaseProgram( program[i] );
1276             align_free( outptr[i] );
1277             return -1;
1278         }
1279 
1280         err = clEnqueueReadBuffer( queue, buffers[i], true, startOfRead*ptrSizes[i], ptrSizes[i]*sizeOfRead, (void *)(outptr[i]), 0, NULL, NULL );
1281         if ( err != CL_SUCCESS ){
1282             print_error( err, "clEnqueueReadBuffer failed" );
1283             clReleaseMemObject( buffers[i] );
1284             clReleaseKernel( kernel[i] );
1285             clReleaseProgram( program[i] );
1286             align_free( outptr[i] );
1287             return -1;
1288         }
1289 
1290         if ( verify_read_int( outptr[i], (int)sizeOfRead*(1<<i) ) ){
1291             log_error(" random size from %d, size: %d test failed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
1292             total_errors++;
1293         }
1294         else{
1295             log_info(" random size from %d, size: %d test passed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
1296         }
1297 
1298         // cleanup
1299         clReleaseMemObject( buffers[i] );
1300         clReleaseKernel( kernel[i] );
1301         clReleaseProgram( program[i] );
1302         align_free( outptr[i] );
1303     }
1304 
1305     return total_errors;
1306 
1307 }   // end testRandomReadSize()
1308 
1309 
test_buffer_read_random_size(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1310 int test_buffer_read_random_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1311 {
1312     int     err = 0;
1313     int     i;
1314     cl_uint start;
1315     size_t  size;
1316     MTdata  d = init_genrand( gRandomSeed );
1317 
1318     // now test for random sizes of array being read
1319     for ( i = 0; i < 8; i++ ){
1320         start = (cl_uint)get_random_float( 0.f, (float)(num_elements - 8), d );
1321         size = (size_t)get_random_float( 8.f, (float)(num_elements - start), d );
1322         if (testRandomReadSize( deviceID, context, queue, num_elements, start, size ))
1323             err++;
1324     }
1325 
1326     free_mtdata(d);
1327 
1328     return err;
1329 }
1330 
1331