• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include "procs.h"
25 #include "harness/errorHelpers.h"
26 
27 #define USE_LOCAL_WORK_GROUP    1
28 
29 #ifndef uchar
30 typedef unsigned char uchar;
31 #endif
32 
33 #ifndef TestStruct
34 typedef struct{
35     int     a;
36     float   b;
37 } TestStruct;
38 #endif
39 
40 // If this is set to 1 the writes are done via map/unmap
41 static int gTestMap = 0;
42 
43 const char *buffer_write_int_kernel_code[] = {
44     "__kernel void test_buffer_write_int(__global int *src, __global int *dst)\n"
45     "{\n"
46     "    int  tid = get_global_id(0);\n"
47     "\n"
48     "    dst[tid] = src[tid];\n"
49     "}\n",
50 
51     "__kernel void test_buffer_write_int2(__global int2 *src, __global int2 *dst)\n"
52     "{\n"
53     "    int  tid = get_global_id(0);\n"
54     "\n"
55     "    dst[tid] = src[tid];\n"
56     "}\n",
57 
58     "__kernel void test_buffer_write_int4(__global int4 *src, __global int4 *dst)\n"
59     "{\n"
60     "    int  tid = get_global_id(0);\n"
61     "\n"
62     "    dst[tid] = src[tid];\n"
63     "}\n",
64 
65     "__kernel void test_buffer_write_int8(__global int8 *src, __global int8 *dst)\n"
66     "{\n"
67     "    int  tid = get_global_id(0);\n"
68     "\n"
69     "    dst[tid] = src[tid];\n"
70     "}\n",
71 
72     "__kernel void test_buffer_write_int16(__global int16 *src, __global int16 *dst)\n"
73     "{\n"
74     "    int  tid = get_global_id(0);\n"
75     "\n"
76     "    dst[tid] = src[tid];\n"
77     "}\n" };
78 
79 static const char *int_kernel_name[] = { "test_buffer_write_int", "test_buffer_write_int2", "test_buffer_write_int4", "test_buffer_write_int8", "test_buffer_write_int16" };
80 
81 
82 const char *buffer_write_uint_kernel_code[] = {
83     "__kernel void test_buffer_write_uint(__global uint *src, __global uint *dst)\n"
84     "{\n"
85     "    int  tid = get_global_id(0);\n"
86     "\n"
87     "    dst[tid] = src[tid];\n"
88     "}\n",
89 
90     "__kernel void test_buffer_write_uint2(__global uint2 *src, __global uint2 *dst)\n"
91     "{\n"
92     "    int  tid = get_global_id(0);\n"
93     "\n"
94     "    dst[tid] = src[tid];\n"
95     "}\n",
96 
97     "__kernel void test_buffer_write_uint4(__global uint4 *src, __global uint4 *dst)\n"
98     "{\n"
99     "    int  tid = get_global_id(0);\n"
100     "\n"
101     "    dst[tid] = src[tid];\n"
102     "}\n",
103 
104     "__kernel void test_buffer_write_uint8(__global uint8 *src, __global uint8 *dst)\n"
105     "{\n"
106     "    int  tid = get_global_id(0);\n"
107     "\n"
108     "    dst[tid] = src[tid];\n"
109     "}\n",
110 
111     "__kernel void test_buffer_write_uint16(__global uint16 *src, __global uint16 *dst)\n"
112     "{\n"
113     "    int  tid = get_global_id(0);\n"
114     "\n"
115     "    dst[tid] = src[tid];\n"
116     "}\n" };
117 
118 static const char *uint_kernel_name[] = { "test_buffer_write_uint", "test_buffer_write_uint2", "test_buffer_write_uint4", "test_buffer_write_uint8", "test_buffer_write_uint16" };
119 
120 
121 const char *buffer_write_ushort_kernel_code[] = {
122     "__kernel void test_buffer_write_ushort(__global ushort *src, __global ushort *dst)\n"
123     "{\n"
124     "    int  tid = get_global_id(0);\n"
125     "\n"
126     "    dst[tid] = src[tid];\n"
127     "}\n",
128 
129     "__kernel void test_buffer_write_ushort2(__global ushort2 *src, __global ushort2 *dst)\n"
130     "{\n"
131     "    int  tid = get_global_id(0);\n"
132     "\n"
133     "    dst[tid] = src[tid];\n"
134     "}\n",
135 
136     "__kernel void test_buffer_write_ushort4(__global ushort4 *src, __global ushort4 *dst)\n"
137     "{\n"
138     "    int  tid = get_global_id(0);\n"
139     "\n"
140     "    dst[tid] = src[tid];\n"
141     "}\n",
142 
143     "__kernel void test_buffer_write_ushort8(__global ushort8 *src, __global ushort8 *dst)\n"
144     "{\n"
145     "    int  tid = get_global_id(0);\n"
146     "\n"
147     "    dst[tid] = src[tid];\n"
148     "}\n",
149 
150     "__kernel void test_buffer_write_ushort16(__global ushort16 *src, __global ushort16 *dst)\n"
151     "{\n"
152     "    int  tid = get_global_id(0);\n"
153     "\n"
154     "    dst[tid] = src[tid];\n"
155     "}\n" };
156 
157 static const char *ushort_kernel_name[] = { "test_buffer_write_ushort", "test_buffer_write_ushort2", "test_buffer_write_ushort4", "test_buffer_write_ushort8", "test_buffer_write_ushort16" };
158 
159 
160 
161 const char *buffer_write_short_kernel_code[] = {
162     "__kernel void test_buffer_write_short(__global short *src, __global short *dst)\n"
163     "{\n"
164     "    int  tid = get_global_id(0);\n"
165     "\n"
166     "    dst[tid] = src[tid];\n"
167     "}\n",
168 
169     "__kernel void test_buffer_write_short2(__global short2 *src, __global short2 *dst)\n"
170     "{\n"
171     "    int  tid = get_global_id(0);\n"
172     "\n"
173     "    dst[tid] = src[tid];\n"
174     "}\n",
175 
176     "__kernel void test_buffer_write_short4(__global short4 *src, __global short4 *dst)\n"
177     "{\n"
178     "    int  tid = get_global_id(0);\n"
179     "\n"
180     "    dst[tid] = src[tid];\n"
181     "}\n",
182 
183     "__kernel void test_buffer_write_short8(__global short8 *src, __global short8 *dst)\n"
184     "{\n"
185     "    int  tid = get_global_id(0);\n"
186     "\n"
187     "    dst[tid] = src[tid];\n"
188     "}\n",
189 
190     "__kernel void test_buffer_write_short16(__global short16 *src, __global short16 *dst)\n"
191     "{\n"
192     "    int  tid = get_global_id(0);\n"
193     "\n"
194     "    dst[tid] = src[tid];\n"
195     "}\n" };
196 
197 static const char *short_kernel_name[] = { "test_buffer_write_short", "test_buffer_write_short2", "test_buffer_write_short4", "test_buffer_write_short8", "test_buffer_write_short16" };
198 
199 
200 const char *buffer_write_char_kernel_code[] = {
201     "__kernel void test_buffer_write_char(__global char *src, __global char *dst)\n"
202     "{\n"
203     "    int  tid = get_global_id(0);\n"
204     "\n"
205     "    dst[tid] = src[tid];\n"
206     "}\n",
207 
208     "__kernel void test_buffer_write_char2(__global char2 *src, __global char2 *dst)\n"
209     "{\n"
210     "    int  tid = get_global_id(0);\n"
211     "\n"
212     "    dst[tid] = src[tid];\n"
213     "}\n",
214 
215     "__kernel void test_buffer_write_char4(__global char4 *src, __global char4 *dst)\n"
216     "{\n"
217     "    int  tid = get_global_id(0);\n"
218     "\n"
219     "    dst[tid] = src[tid];\n"
220     "}\n",
221 
222     "__kernel void test_buffer_write_char8(__global char8 *src, __global char8 *dst)\n"
223     "{\n"
224     "    int  tid = get_global_id(0);\n"
225     "\n"
226     "    dst[tid] = src[tid];\n"
227     "}\n",
228 
229     "__kernel void test_buffer_write_char16(__global char16 *src, __global char16 *dst)\n"
230     "{\n"
231     "    int  tid = get_global_id(0);\n"
232     "\n"
233     "    dst[tid] = src[tid];\n"
234     "}\n" };
235 
236 static const char *char_kernel_name[] = { "test_buffer_write_char", "test_buffer_write_char2", "test_buffer_write_char4", "test_buffer_write_char8", "test_buffer_write_char16" };
237 
238 
239 const char *buffer_write_uchar_kernel_code[] = {
240     "__kernel void test_buffer_write_uchar(__global uchar *src, __global uchar *dst)\n"
241     "{\n"
242     "    int  tid = get_global_id(0);\n"
243     "\n"
244     "    dst[tid] = src[tid];\n"
245     "}\n",
246 
247     "__kernel void test_buffer_write_uchar2(__global uchar2 *src, __global uchar2 *dst)\n"
248     "{\n"
249     "    int  tid = get_global_id(0);\n"
250     "\n"
251     "    dst[tid] = src[tid];\n"
252     "}\n",
253 
254     "__kernel void test_buffer_write_uchar4(__global uchar4 *src, __global uchar4 *dst)\n"
255     "{\n"
256     "    int  tid = get_global_id(0);\n"
257     "\n"
258     "    dst[tid] = src[tid];\n"
259     "}\n",
260 
261     "__kernel void test_buffer_write_uchar8(__global uchar8 *src, __global uchar8 *dst)\n"
262     "{\n"
263     "    int  tid = get_global_id(0);\n"
264     "\n"
265     "    dst[tid] = src[tid];\n"
266     "}\n",
267 
268     "__kernel void test_buffer_write_uchar16(__global uchar16 *src, __global uchar16 *dst)\n"
269     "{\n"
270     "    int  tid = get_global_id(0);\n"
271     "\n"
272     "    dst[tid] = src[tid];\n"
273     "}\n" };
274 
275 static const char *uchar_kernel_name[] = { "test_buffer_write_uchar", "test_buffer_write_uchar2", "test_buffer_write_uchar4", "test_buffer_write_uchar8", "test_buffer_write_uchar16" };
276 
277 
278 const char *buffer_write_float_kernel_code[] = {
279     "__kernel void test_buffer_write_float(__global float *src, __global float *dst)\n"
280     "{\n"
281     "    int  tid = get_global_id(0);\n"
282     "\n"
283     "    dst[tid] = src[tid];\n"
284     "}\n",
285 
286     "__kernel void test_buffer_write_float2(__global float2 *src, __global float2 *dst)\n"
287     "{\n"
288     "    int  tid = get_global_id(0);\n"
289     "\n"
290     "    dst[tid] = src[tid];\n"
291     "}\n",
292 
293     "__kernel void test_buffer_write_float4(__global float4 *src, __global float4 *dst)\n"
294     "{\n"
295     "    int  tid = get_global_id(0);\n"
296     "\n"
297     "    dst[tid] = src[tid];\n"
298     "}\n",
299 
300     "__kernel void test_buffer_write_float8(__global float8 *src, __global float8 *dst)\n"
301     "{\n"
302     "    int  tid = get_global_id(0);\n"
303     "\n"
304     "    dst[tid] = src[tid];\n"
305     "}\n",
306 
307     "__kernel void test_buffer_write_float16(__global float16 *src, __global float16 *dst)\n"
308     "{\n"
309     "    int  tid = get_global_id(0);\n"
310     "\n"
311     "    dst[tid] = src[tid];\n"
312     "}\n" };
313 
314 static const char *float_kernel_name[] = { "test_buffer_write_float", "test_buffer_write_float2", "test_buffer_write_float4", "test_buffer_write_float8", "test_buffer_write_float16" };
315 
316 
317 const char *buffer_write_half_kernel_code[] = {
318     "__kernel void test_buffer_write_half(__global half *src, __global float *dst)\n"
319     "{\n"
320     "    int  tid = get_global_id(0);\n"
321     "\n"
322     "    dst[tid] = vload_half( tid * 2, src );\n"
323     "}\n",
324 
325     "__kernel void test_buffer_write_half2(__global half2 *src, __global float2 *dst)\n"
326     "{\n"
327     "    int  tid = get_global_id(0);\n"
328     "\n"
329     "    dst[tid] = vload_half2( tid * 2, src );\n"
330     "}\n",
331 
332     "__kernel void test_buffer_write_half4(__global half4 *src, __global float4 *dst)\n"
333     "{\n"
334     "    int  tid = get_global_id(0);\n"
335     "\n"
336     "    dst[tid] = vload_half4( tid * 2, src );\n"
337     "}\n",
338 
339     "__kernel void test_buffer_write_half8(__global half8 *src, __global float8 *dst)\n"
340     "{\n"
341     "    int  tid = get_global_id(0);\n"
342     "\n"
343     "    dst[tid] = vload_half8( tid * 2, src );\n"
344     "}\n",
345 
346     "__kernel void test_buffer_write_half16(__global half16 *src, __global float16 *dst)\n"
347     "{\n"
348     "    int  tid = get_global_id(0);\n"
349     "\n"
350     "    dst[tid] = vload_half16( tid * 2, src );\n"
351     "}\n" };
352 
353 static const char *half_kernel_name[] = { "test_buffer_write_half", "test_buffer_write_half2", "test_buffer_write_half4", "test_buffer_write_half8", "test_buffer_write_half16" };
354 
355 
356 const char *buffer_write_long_kernel_code[] = {
357     "__kernel void test_buffer_write_long(__global long *src, __global long *dst)\n"
358     "{\n"
359     "    int  tid = get_global_id(0);\n"
360     "\n"
361     "    dst[tid] = src[tid];\n"
362     "}\n",
363 
364     "__kernel void test_buffer_write_long2(__global long2 *src, __global long2 *dst)\n"
365     "{\n"
366     "    int  tid = get_global_id(0);\n"
367     "\n"
368     "    dst[tid] = src[tid];\n"
369     "}\n",
370 
371     "__kernel void test_buffer_write_long4(__global long4 *src, __global long4 *dst)\n"
372     "{\n"
373     "    int  tid = get_global_id(0);\n"
374     "\n"
375     "    dst[tid] = src[tid];\n"
376     "}\n",
377 
378     "__kernel void test_buffer_write_long8(__global long8 *src, __global long8 *dst)\n"
379     "{\n"
380     "    int  tid = get_global_id(0);\n"
381     "\n"
382     "    dst[tid] = src[tid];\n"
383     "}\n",
384 
385     "__kernel void test_buffer_write_long16(__global long16 *src, __global long16 *dst)\n"
386     "{\n"
387     "    int  tid = get_global_id(0);\n"
388     "\n"
389     "    dst[tid] = src[tid];\n"
390     "}\n" };
391 
392 static const char *long_kernel_name[] = { "test_buffer_write_long", "test_buffer_write_long2", "test_buffer_write_long4", "test_buffer_write_long8", "test_buffer_write_long16" };
393 
394 
395 const char *buffer_write_ulong_kernel_code[] = {
396     "__kernel void test_buffer_write_ulong(__global ulong *src, __global ulong *dst)\n"
397     "{\n"
398     "    int  tid = get_global_id(0);\n"
399     "\n"
400     "    dst[tid] = src[tid];\n"
401     "}\n",
402 
403     "__kernel void test_buffer_write_ulong2(__global ulong2 *src, __global ulong2 *dst)\n"
404     "{\n"
405     "    int  tid = get_global_id(0);\n"
406     "\n"
407     "    dst[tid] = src[tid];\n"
408     "}\n",
409 
410     "__kernel void test_buffer_write_ulong4(__global ulong4 *src, __global ulong4 *dst)\n"
411     "{\n"
412     "    int  tid = get_global_id(0);\n"
413     "\n"
414     "    dst[tid] = src[tid];\n"
415     "}\n",
416 
417     "__kernel void test_buffer_write_ulong8(__global ulong8 *src, __global ulong8 *dst)\n"
418     "{\n"
419     "    int  tid = get_global_id(0);\n"
420     "\n"
421     "    dst[tid] = src[tid];\n"
422     "}\n",
423 
424     "__kernel void test_buffer_write_ulong16(__global ulong16 *src, __global ulong16 *dst)\n"
425     "{\n"
426     "    int  tid = get_global_id(0);\n"
427     "\n"
428     "    dst[tid] = src[tid];\n"
429     "}\n" };
430 
431 static const char *ulong_kernel_name[] = { "test_buffer_write_ulong", "test_buffer_write_ulong2", "test_buffer_write_ulong4", "test_buffer_write_ulong8", "test_buffer_write_ulong16" };
432 
433 
434 static const char *struct_kernel_code =
435 "typedef struct{\n"
436 "int    a;\n"
437 "float    b;\n"
438 "} TestStruct;\n"
439 "__kernel void read_write_struct(__global TestStruct *src, __global TestStruct *dst)\n"
440 "{\n"
441 "    int  tid = get_global_id(0);\n"
442 "\n"
443 "    dst[tid].a = src[tid].a;\n"
444 "     dst[tid].b = src[tid].b;\n"
445 "}\n";
446 
447 
448 
verify_write_int(void * ptr1,void * ptr2,int n)449 static int verify_write_int( void *ptr1, void *ptr2, int n )
450 {
451     int     i;
452     int     *inptr = (int *)ptr1;
453     int     *outptr = (int *)ptr2;
454 
455     for (i=0; i<n; i++){
456         if ( outptr[i] != inptr[i] )
457             return -1;
458     }
459 
460     return 0;
461 }
462 
463 
verify_write_uint(void * ptr1,void * ptr2,int n)464 static int verify_write_uint( void *ptr1, void *ptr2, int n )
465 {
466     int     i;
467     cl_uint *inptr = (cl_uint *)ptr1;
468     cl_uint *outptr = (cl_uint *)ptr2;
469 
470     for (i=0; i<n; i++){
471         if ( outptr[i] != inptr[i] )
472             return -1;
473     }
474 
475     return 0;
476 }
477 
478 
verify_write_short(void * ptr1,void * ptr2,int n)479 static int verify_write_short( void *ptr1, void *ptr2, int n )
480 {
481     int     i;
482     short   *inptr = (short *)ptr1;
483     short   *outptr = (short *)ptr2;
484 
485     for (i=0; i<n; i++){
486         if ( outptr[i] != inptr[i] )
487             return -1;
488     }
489 
490     return 0;
491 }
492 
493 
verify_write_ushort(void * ptr1,void * ptr2,int n)494 static int verify_write_ushort( void *ptr1, void *ptr2, int n )
495 {
496     int     i;
497     cl_ushort   *inptr = (cl_ushort *)ptr1;
498     cl_ushort   *outptr = (cl_ushort *)ptr2;
499 
500     for (i=0; i<n; i++){
501         if ( outptr[i] != inptr[i] )
502             return -1;
503     }
504 
505     return 0;
506 }
507 
508 
verify_write_char(void * ptr1,void * ptr2,int n)509 static int verify_write_char( void *ptr1, void *ptr2, int n )
510 {
511     int     i;
512     char    *inptr = (char *)ptr1;
513     char    *outptr = (char *)ptr2;
514 
515     for (i=0; i<n; i++){
516         if ( outptr[i] != inptr[i] )
517             return -1;
518     }
519 
520     return 0;
521 }
522 
523 
verify_write_uchar(void * ptr1,void * ptr2,int n)524 static int verify_write_uchar( void *ptr1, void *ptr2, int n )
525 {
526     int     i;
527     uchar   *inptr = (uchar *)ptr1;
528     uchar   *outptr = (uchar *)ptr2;
529 
530     for (i=0; i<n; i++){
531         if ( outptr[i] != inptr[i] )
532             return -1;
533     }
534 
535     return 0;
536 }
537 
538 
verify_write_float(void * ptr1,void * ptr2,int n)539 static int verify_write_float( void *ptr1, void *ptr2, int n )
540 {
541     int     i;
542     float   *inptr = (float *)ptr1;
543     float   *outptr = (float *)ptr2;
544 
545     for (i=0; i<n; i++){
546         if ( outptr[i] != inptr[i] )
547             return -1;
548     }
549 
550     return 0;
551 }
552 
553 
verify_write_half(void * ptr1,void * ptr2,int n)554 static int verify_write_half( void *ptr1, void *ptr2, int n )
555 {
556     int     i;
557     cl_ushort   *inptr = (cl_ushort *)ptr1;
558     cl_ushort   *outptr = (cl_ushort *)ptr2;
559 
560     for ( i = 0; i < n; i++ ){
561         if ( outptr[i] != inptr[i] )
562             return -1;
563     }
564 
565     return 0;
566 }
567 
568 
verify_write_long(void * ptr1,void * ptr2,int n)569 static int verify_write_long( void *ptr1, void *ptr2, int n )
570 {
571     int     i;
572     cl_long *inptr = (cl_long *)ptr1;
573     cl_long *outptr = (cl_long *)ptr2;
574 
575     for (i=0; i<n; i++){
576         if ( outptr[i] != inptr[i] )
577             return -1;
578     }
579 
580     return 0;
581 }
582 
583 
verify_write_ulong(void * ptr1,void * ptr2,int n)584 static int verify_write_ulong( void *ptr1, void *ptr2, int n )
585 {
586     int     i;
587     cl_ulong    *inptr = (cl_ulong *)ptr1;
588     cl_ulong    *outptr = (cl_ulong *)ptr2;
589 
590     for (i=0; i<n; i++){
591         if ( outptr[i] != inptr[i] )
592             return -1;
593     }
594 
595     return 0;
596 }
597 
598 
verify_write_struct(void * ptr1,void * ptr2,int n)599 static int verify_write_struct( void *ptr1, void *ptr2, int n )
600 {
601     int         i;
602     TestStruct  *inptr = (TestStruct *)ptr1;
603     TestStruct  *outptr = (TestStruct *)ptr2;
604 
605     for (i=0; i<n; i++){
606         if ( ( outptr[i].a != inptr[i].a ) || ( outptr[i].b != outptr[i].b ) )
607             return -1;
608     }
609 
610     return 0;
611 }
612 
613 
test_buffer_write(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,void * inptr[5],const char * kernelCode[],const char * kernelName[],int (* fn)(void *,void *,int),MTdata d)614 int test_buffer_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
615                        void *inptr[5], const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int), MTdata d )
616 {
617     cl_mem      buffers[10];
618     void        *outptr[5];
619     cl_program  program[5];
620     cl_kernel   kernel[5];
621     size_t      ptrSizes[5];
622     size_t      global_work_size[3];
623 #ifdef USE_LOCAL_WORK_GROUP
624     size_t      local_work_size[3];
625 #endif
626     cl_int      err;
627     int         i, ii;
628     int         src_flag_id, dst_flag_id;
629     int         total_errors = 0;
630 
631     size_t      min_alignment = get_min_alignment(context);
632 
633     global_work_size[0] = (size_t)num_elements;
634 
635     ptrSizes[0] = size;
636     ptrSizes[1] = ptrSizes[0] << 1;
637     ptrSizes[2] = ptrSizes[1] << 1;
638     ptrSizes[3] = ptrSizes[2] << 1;
639     ptrSizes[4] = ptrSizes[3] << 1;
640 
641     for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
642         for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
643             log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
644 
645             loops = ( loops < 5 ? loops : 5 );
646             for ( i = 0; i < loops; i++ ){
647                 ii = i << 1;
648                 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
649                     buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
650                 else
651                     buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
652 
653                 if ( ! buffers[ii] || err){
654                     align_free( outptr[i] );
655                     print_error(err, " clCreateBuffer failed\n" );
656                     return -1;
657                 }
658                 if ( ! strcmp( type, "half" ) ){
659                     outptr[i] = align_malloc( ptrSizes[i] * (num_elements * 2 ), min_alignment);
660                     if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
661                         buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * 2 * num_elements, outptr[i], &err);
662                     else
663                         buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * 2 * num_elements, NULL, &err);
664                 }
665                 else{
666                     outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
667                     if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
668                         buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, outptr[i], &err);
669                     else
670                         buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
671                 }
672                 if ( err ){
673                     clReleaseMemObject(buffers[ii]);
674                     align_free( outptr[i] );
675                     print_error(err, " clCreateBuffer failed\n" );
676                     return -1;
677                 }
678 
679                 if (gTestMap) {
680                     void *dataPtr;
681                     dataPtr = clEnqueueMapBuffer(queue, buffers[ii], CL_TRUE, CL_MAP_WRITE, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
682                     if (err) {
683                         print_error(err, "clEnqueueMapBuffer failed");
684                         clReleaseMemObject(buffers[ii]);
685                         clReleaseMemObject(buffers[ii+1]);
686                         align_free( outptr[i] );
687                         return -1;
688                     }
689 
690                     memcpy(dataPtr, inptr[i], ptrSizes[i]*num_elements);
691 
692                     err = clEnqueueUnmapMemObject(queue, buffers[ii], dataPtr, 0, NULL, NULL);
693                     if (err) {
694                         print_error(err, "clEnqueueUnmapMemObject failed");
695                         clReleaseMemObject(buffers[ii]);
696                         clReleaseMemObject(buffers[ii+1]);
697                         align_free( outptr[i] );
698                         return -1;
699                     }
700                 }
701                 else if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
702                     err = clEnqueueWriteBuffer(queue, buffers[ii], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL);
703                     if ( err != CL_SUCCESS ){
704                         clReleaseMemObject(buffers[ii]);
705                         clReleaseMemObject(buffers[ii+1]);
706                         align_free( outptr[i] );
707                         print_error( err, " clWriteBuffer failed" );
708                         return -1;
709                     }
710                 }
711 
712                 err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
713                 if ( err ){
714                     clReleaseMemObject(buffers[ii]);
715                     clReleaseMemObject(buffers[ii+1]);
716                     align_free( outptr[i] );
717                     log_error( " Error creating program for %s\n", type );
718                     return -1;
719                 }
720 
721 #ifdef USE_LOCAL_WORK_GROUP
722                 err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
723                 test_error( err, "Unable to get work group size to use" );
724 #endif
725 
726                 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
727                 err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
728                 if ( err != CL_SUCCESS ){
729                     clReleaseMemObject( buffers[ii] );
730                     clReleaseMemObject( buffers[ii+1] );
731                     clReleaseKernel( kernel[i] );
732                     clReleaseProgram( program[i] );
733                     align_free( outptr[i] );
734                     print_error( err, " clSetKernelArg failed" );
735                     return -1;
736                 }
737 
738 #ifdef USE_LOCAL_WORK_GROUP
739                 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
740 #else
741                 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
742 #endif
743                 if ( err != CL_SUCCESS ){
744                     print_error( err, " clEnqueueNDRangeKernel failed" );
745                     clReleaseMemObject( buffers[ii] );
746                     clReleaseMemObject( buffers[ii+1] );
747                     clReleaseKernel( kernel[i] );
748                     clReleaseProgram( program[i] );
749                     align_free( outptr[i] );
750                     return -1;
751                 }
752 
753                 if ( ! strcmp( type, "half" ) ){
754                     err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
755                 }
756                 else{
757                     err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
758                 }
759                 if ( err != CL_SUCCESS ){
760                     clReleaseMemObject( buffers[ii] );
761                     clReleaseMemObject( buffers[ii+1] );
762                     clReleaseKernel( kernel[i] );
763                     clReleaseProgram( program[i] );
764                     align_free( outptr[i] );
765                     print_error( err, " clEnqueueReadBuffer failed" );
766                     return -1;
767                 }
768 
769                 if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
770                     log_error( " %s%d test failed\n", type, 1<<i );
771                     total_errors++;
772                 }
773                 else{
774                     log_info( " %s%d test passed\n", type, 1<<i );
775                 }
776                 // cleanup
777                 clReleaseMemObject( buffers[ii] );
778                 clReleaseMemObject( buffers[ii+1] );
779                 clReleaseKernel( kernel[i] );
780                 clReleaseProgram( program[i] );
781                 align_free( outptr[i] );
782             }
783         } // dst cl_mem_flag
784     } // src cl_mem_flag
785 
786     return total_errors;
787 
788 }   // end test_buffer_write()
789 
790 
791 
792 
test_buffer_write_struct(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)793 int test_buffer_write_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
794 {
795     cl_mem      buffers[10];
796     void        *outptr[5];
797     TestStruct  *inptr[5];
798     cl_program  program[5];
799     cl_kernel   kernel[5];
800     size_t      ptrSizes[5];
801     size_t      size = sizeof( TestStruct );
802     size_t      global_work_size[3];
803 #ifdef USE_LOCAL_WORK_GROUP
804     size_t      local_work_size[3];
805 #endif
806     cl_int      err;
807     int         i, ii;
808     cl_uint     j;
809     int         loops = 1;      // no vector for structs
810     int         src_flag_id, dst_flag_id;
811     int         total_errors = 0;
812     MTdata      d = init_genrand( gRandomSeed );
813 
814     size_t      min_alignment = get_min_alignment(context);
815 
816     global_work_size[0] = (size_t)num_elements;
817 
818     ptrSizes[0] = size;
819     ptrSizes[1] = ptrSizes[0] << 1;
820     ptrSizes[2] = ptrSizes[1] << 1;
821     ptrSizes[3] = ptrSizes[2] << 1;
822     ptrSizes[4] = ptrSizes[3] << 1;
823 
824     for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
825         for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
826             log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
827 
828             loops = ( loops < 5 ? loops : 5 );
829             for ( i = 0; i < loops; i++ ){
830 
831                 inptr[i] = (TestStruct *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
832 
833                 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ ){
834                     inptr[i][j].a = (int)genrand_int32(d);
835                     inptr[i][j].b = get_random_float( -FLT_MAX, FLT_MAX, d );
836                 }
837 
838                 ii = i << 1;
839                 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
840                     buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
841                 else
842                     buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
843                 if ( err ){
844                     align_free( outptr[i] );
845                     print_error(err, " clCreateBuffer failed\n" );
846                     free_mtdata(d);
847                     return -1;
848                 }
849                 outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
850                 if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
851                     buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, outptr[i], &err);
852                 else
853                     buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
854                 if ( ! buffers[ii+1] || err){
855                     clReleaseMemObject(buffers[ii]);
856                     align_free( outptr[i] );
857                     print_error(err, " clCreateBuffer failed\n" );
858                     free_mtdata(d);
859                     return -1;
860                 }
861 
862                 if (gTestMap) {
863                     void *dataPtr;
864                     dataPtr = clEnqueueMapBuffer(queue, buffers[ii], CL_TRUE, CL_MAP_WRITE, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
865                     if (err) {
866                         print_error(err, "clEnqueueMapBuffer failed");
867                         clReleaseMemObject(buffers[ii]);
868                         clReleaseMemObject(buffers[ii+1]);
869                         align_free( outptr[i] );
870                         free_mtdata(d);
871                         return -1;
872                     }
873 
874                     memcpy(dataPtr, inptr[i], ptrSizes[i]*num_elements);
875 
876                     err = clEnqueueUnmapMemObject(queue, buffers[ii], dataPtr, 0, NULL, NULL);
877                     if (err) {
878                         print_error(err, "clEnqueueUnmapMemObject failed");
879                         clReleaseMemObject(buffers[ii]);
880                         clReleaseMemObject(buffers[ii+1]);
881                         align_free( outptr[i] );
882                         free_mtdata(d);
883                         return -1;
884                     }
885                 }
886                 else if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
887                     err = clEnqueueWriteBuffer(queue, buffers[ii], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL);
888                     if ( err != CL_SUCCESS ){
889                         clReleaseMemObject(buffers[ii]);
890                         clReleaseMemObject(buffers[ii+1]);
891                         align_free( outptr[i] );
892                         print_error( err, " clWriteBuffer failed" );
893                         free_mtdata(d);
894                         return -1;
895                     }
896                 }
897 
898                 err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &struct_kernel_code, "read_write_struct" );
899                 if ( err ){
900                     clReleaseMemObject(buffers[ii]);
901                     clReleaseMemObject(buffers[ii+1]);
902                     align_free( outptr[i] );
903                     log_error( " Error creating program for struct\n" );
904                     free_mtdata(d);
905                     return -1;
906                 }
907 
908 #ifdef USE_LOCAL_WORK_GROUP
909                 err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
910                 test_error( err, "Unable to get work group size to use" );
911 #endif
912 
913                 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
914                 err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
915                 if ( err != CL_SUCCESS ){
916                     clReleaseMemObject( buffers[ii] );
917                     clReleaseMemObject( buffers[ii+1] );
918                     clReleaseKernel( kernel[i] );
919                     clReleaseProgram( program[i] );
920                     align_free( outptr[i] );
921                     print_error( err, " clSetKernelArg failed" );
922                     free_mtdata(d);
923                     return -1;
924                 }
925 
926 #ifdef USE_LOCAL_WORK_GROUP
927                 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
928 #else
929                 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
930 #endif
931                 if ( err != CL_SUCCESS ){
932                     print_error( err, " clEnqueueNDRangeKernel failed" );
933                     clReleaseMemObject( buffers[ii] );
934                     clReleaseMemObject( buffers[ii+1] );
935                     clReleaseKernel( kernel[i] );
936                     clReleaseProgram( program[i] );
937                     align_free( outptr[i] );
938                     free_mtdata(d);
939                     return -1;
940                 }
941 
942                 err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
943                 if ( err != CL_SUCCESS ){
944                     clReleaseMemObject( buffers[ii] );
945                     clReleaseMemObject( buffers[ii+1] );
946                     clReleaseKernel( kernel[i] );
947                     clReleaseProgram( program[i] );
948                     align_free( outptr[i] );
949                     print_error( err, " clEnqueueReadBuffer failed" );
950                     free_mtdata(d);
951                     return -1;
952                 }
953 
954                 if ( verify_write_struct( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
955                     log_error( " buffer_WRITE struct%d test failed\n", 1<<i );
956                     total_errors++;
957                 }
958                 else{
959                     log_info( " buffer_WRITE struct%d test passed\n", 1<<i );
960                 }
961                 // cleanup
962                 clReleaseMemObject( buffers[ii] );
963                 clReleaseMemObject( buffers[ii+1] );
964                 clReleaseKernel( kernel[i] );
965                 clReleaseProgram( program[i] );
966                 align_free( outptr[i] );
967                 align_free( (void *)inptr[i] );
968             }
969         } // dst cl_mem_flag
970     } // src cl_mem_flag
971 
972     free_mtdata(d);
973 
974     return total_errors;
975 
976 }   // end test_buffer_struct_write()
977 
978 
test_buffer_write_array_async(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,void * inptr[5],const char * kernelCode[],const char * kernelName[],int (* fn)(void *,void *,int))979 int test_buffer_write_array_async( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
980                                    void *inptr[5], const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int) )
981 {
982     cl_mem      buffers[10];
983     void        *outptr[5];
984     cl_program  program[5];
985     cl_kernel   kernel[5];
986     cl_event    event[2];
987     size_t      ptrSizes[5];
988     size_t      global_work_size[3];
989 #ifdef USE_LOCAL_WORK_GROUP
990     size_t      local_work_size[3];
991 #endif
992     cl_int      err;
993     int         i, ii;
994     int         src_flag_id, dst_flag_id;
995     int         total_errors = 0;
996 
997     size_t      min_alignment = get_min_alignment(context);
998 
999     global_work_size[0] = (size_t)num_elements;
1000 
1001     ptrSizes[0] = size;
1002     ptrSizes[1] = ptrSizes[0] << 1;
1003     ptrSizes[2] = ptrSizes[1] << 1;
1004     ptrSizes[3] = ptrSizes[2] << 1;
1005     ptrSizes[4] = ptrSizes[3] << 1;
1006 
1007     for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
1008         for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
1009             log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
1010 
1011             loops = ( loops < 5 ? loops : 5 );
1012             for ( i = 0; i < loops; i++ ){
1013                 ii = i << 1;
1014                 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
1015                     buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, inptr[i], &err);
1016                 else
1017                     buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
1018                 if ( !buffers[ii] || err){
1019                     print_error(err, "clCreateBuffer failed\n" );
1020                     return -1;
1021                 }
1022 
1023                 outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
1024                 if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
1025                     buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, outptr[i], &err);
1026                 else
1027                     buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
1028                 if ( !buffers[ii+1] || err){
1029                     print_error(err, "clCreateBuffer failed\n" );
1030                     return -1;
1031                 }
1032 
1033                 err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &(event[0]));
1034                 if ( err != CL_SUCCESS ){
1035                     print_error( err, "clEnqueueWriteBuffer failed" );
1036                     return -1;
1037                 }
1038 
1039                 err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
1040                 if ( err ){
1041                     log_error( " Error creating program for %s\n", type );
1042                     clReleaseMemObject( buffers[ii] );
1043                     clReleaseMemObject( buffers[ii+1] );
1044                     align_free( outptr[i] );
1045                     return -1;
1046                 }
1047 
1048 #ifdef USE_LOCAL_WORK_GROUP
1049                 err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
1050                 test_error( err, "Unable to get work group size to use" );
1051 #endif
1052 
1053                 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
1054                 err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
1055                 if ( err != CL_SUCCESS ){
1056                     print_error( err, "clSetKernelArg failed" );
1057                     clReleaseKernel( kernel[i] );
1058                     clReleaseProgram( program[i] );
1059                     clReleaseMemObject( buffers[ii] );
1060                     clReleaseMemObject( buffers[ii+1] );
1061                     align_free( outptr[i] );
1062                     return -1;
1063                 }
1064 
1065                 err = clWaitForEvents(  1, &(event[0]) );
1066                 if ( err != CL_SUCCESS ){
1067                     print_error( err, "clWaitForEvents() failed" );
1068                     clReleaseKernel( kernel[i] );
1069                     clReleaseProgram( program[i] );
1070                     clReleaseMemObject( buffers[ii] );
1071                     clReleaseMemObject( buffers[ii+1] );
1072                     align_free( outptr[i] );
1073                     return -1;
1074                 }
1075 
1076 #ifdef USE_LOCAL_WORK_GROUP
1077                 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
1078 #else
1079                 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1080 #endif
1081                 if (err != CL_SUCCESS){
1082                     print_error( err, "clEnqueueNDRangeKernel failed" );
1083                     return -1;
1084                 }
1085 
1086                 err = clEnqueueReadBuffer( queue, buffers[ii+1], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &(event[1]) );
1087                 if (err != CL_SUCCESS){
1088                     print_error( err, "clEnqueueReadBuffer failed" );
1089                     return -1;
1090                 }
1091 
1092                 err = clWaitForEvents( 1, &(event[1]) );
1093                 if ( err != CL_SUCCESS ){
1094                     print_error( err, "clWaitForEvents() failed" );
1095                 }
1096 
1097                 if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
1098                     log_error( " %s%d test failed\n", type, 1<<i );
1099                     total_errors++;
1100                 }
1101                 else{
1102                     log_info( " %s%d test passed\n", type, 1<<i );
1103                 }
1104 
1105                 // cleanup
1106                 clReleaseEvent( event[0] );
1107                 clReleaseEvent( event[1] );
1108                 clReleaseMemObject( buffers[ii] );
1109                 clReleaseMemObject( buffers[ii+1] );
1110                 clReleaseKernel( kernel[i] );
1111                 clReleaseProgram( program[i] );
1112                 align_free( outptr[i] );
1113             }
1114         } // dst cl_mem_flag
1115     } // src cl_mem_flag
1116 
1117     return total_errors;
1118 
1119 }   // end test_buffer_write_array_async()
1120 
1121 
test_buffer_write_int(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1122 int test_buffer_write_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1123 {
1124     int     *inptr[5];
1125     size_t  ptrSizes[5];
1126     int     i, err;
1127     cl_uint j;
1128     int     (*foo)(void *,void *,int);
1129     MTdata  d = init_genrand( gRandomSeed );
1130 
1131     size_t  min_alignment = get_min_alignment(context);
1132 
1133     foo = verify_write_int;
1134 
1135     ptrSizes[0] = sizeof(cl_int);
1136     ptrSizes[1] = ptrSizes[0] << 1;
1137     ptrSizes[2] = ptrSizes[1] << 1;
1138     ptrSizes[3] = ptrSizes[2] << 1;
1139     ptrSizes[4] = ptrSizes[3] << 1;
1140 
1141     for ( i = 0; i < 5; i++ ){
1142         inptr[i] = (int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1143 
1144         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1145             inptr[i][j] = (int)genrand_int32(d);
1146     }
1147 
1148     err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_int ), (char*)"int", 5, (void**)inptr,
1149                              buffer_write_int_kernel_code, int_kernel_name, foo, d );
1150 
1151     for ( i = 0; i < 5; i++ ){
1152         align_free( (void *)inptr[i] );
1153     }
1154     free_mtdata(d);
1155 
1156     return err;
1157 
1158 }   // end test_buffer_int_write()
1159 
1160 
test_buffer_write_uint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1161 int test_buffer_write_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1162 {
1163     cl_uint *inptr[5];
1164     size_t  ptrSizes[5];
1165     int     i, err;
1166     cl_uint j;
1167     MTdata  d = init_genrand( gRandomSeed );
1168     int     (*foo)(void *,void *,int);
1169 
1170     size_t  min_alignment = get_min_alignment(context);
1171 
1172     foo = verify_write_uint;
1173 
1174     ptrSizes[0] = sizeof(cl_uint);
1175     ptrSizes[1] = ptrSizes[0] << 1;
1176     ptrSizes[2] = ptrSizes[1] << 1;
1177     ptrSizes[3] = ptrSizes[2] << 1;
1178     ptrSizes[4] = ptrSizes[3] << 1;
1179 
1180     for ( i = 0; i < 5; i++ ){
1181         inptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1182 
1183         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1184             inptr[i][j] = genrand_int32(d);
1185     }
1186 
1187     err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_uint ), (char*)"uint", 5, (void**)inptr,
1188                              buffer_write_uint_kernel_code, uint_kernel_name, foo, d );
1189 
1190     for ( i = 0; i < 5; i++ ){
1191         align_free( (void *)inptr[i] );
1192     }
1193 
1194     free_mtdata(d);
1195     return err;
1196 
1197 }   // end test_buffer_uint_write()
1198 
1199 
test_buffer_write_short(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1200 int test_buffer_write_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1201 {
1202     short   *inptr[5];
1203     size_t  ptrSizes[5];
1204     int     i, err;
1205     cl_uint j;
1206     MTdata  d = init_genrand( gRandomSeed );
1207     int     (*foo)(void *,void *,int);
1208 
1209     size_t  min_alignment = get_min_alignment(context);
1210 
1211     foo = verify_write_short;
1212 
1213     ptrSizes[0] = sizeof(cl_short);
1214     ptrSizes[1] = ptrSizes[0] << 1;
1215     ptrSizes[2] = ptrSizes[1] << 1;
1216     ptrSizes[3] = ptrSizes[2] << 1;
1217     ptrSizes[4] = ptrSizes[3] << 1;
1218 
1219     for ( i = 0; i < 5; i++ ){
1220         inptr[i] = (cl_short *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1221 
1222         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1223             inptr[i][j] = (cl_short)genrand_int32(d);
1224     }
1225 
1226     err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_short ), (char*)"short", 5, (void**)inptr,
1227                              buffer_write_short_kernel_code, short_kernel_name, foo, d );
1228 
1229     for ( i = 0; i < 5; i++ ){
1230         align_free( (void *)inptr[i] );
1231 
1232     }
1233 
1234     free_mtdata(d);
1235     return err;
1236 
1237 }   // end test_buffer_short_write()
1238 
1239 
test_buffer_write_ushort(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1240 int test_buffer_write_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1241 {
1242     cl_ushort *inptr[5];
1243     size_t    ptrSizes[5];
1244     int       i, err;
1245     cl_uint   j;
1246     MTdata    d = init_genrand( gRandomSeed );
1247     int       (*foo)(void *,void *,int);
1248 
1249     size_t    min_alignment = get_min_alignment(context);
1250 
1251     foo = verify_write_ushort;
1252 
1253     ptrSizes[0] = sizeof(cl_ushort);
1254     ptrSizes[1] = ptrSizes[0] << 1;
1255     ptrSizes[2] = ptrSizes[1] << 1;
1256     ptrSizes[3] = ptrSizes[2] << 1;
1257     ptrSizes[4] = ptrSizes[3] << 1;
1258 
1259     for ( i = 0; i < 5; i++ ){
1260         inptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1261 
1262         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1263             inptr[i][j] = (cl_ushort)genrand_int32(d);
1264     }
1265 
1266     err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_ushort ), (char*)"ushort", 5, (void**)inptr,
1267                              buffer_write_ushort_kernel_code, ushort_kernel_name, foo, d );
1268 
1269     for ( i = 0; i < 5; i++ ){
1270         align_free( (void *)inptr[i] );
1271 
1272     }
1273 
1274     free_mtdata(d);
1275     return err;
1276 
1277 }   // end test_buffer_ushort_write()
1278 
1279 
test_buffer_write_char(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1280 int test_buffer_write_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1281 {
1282     char    *inptr[5];
1283     size_t  ptrSizes[5];
1284     int     i,  err;
1285     cl_uint j;
1286     MTdata  d = init_genrand( gRandomSeed );
1287     int     (*foo)(void *,void *,int);
1288 
1289     size_t  min_alignment = get_min_alignment(context);
1290 
1291     foo = verify_write_char;
1292 
1293     ptrSizes[0] = sizeof(cl_char);
1294     ptrSizes[1] = ptrSizes[0] << 1;
1295     ptrSizes[2] = ptrSizes[1] << 1;
1296     ptrSizes[3] = ptrSizes[2] << 1;
1297     ptrSizes[4] = ptrSizes[3] << 1;
1298 
1299     for ( i = 0; i < 5; i++ ){
1300         inptr[i] = (char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1301 
1302         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1303             inptr[i][j] = (char)genrand_int32(d);
1304     }
1305 
1306     err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_char ), (char*)"char", 5, (void**)inptr,
1307                              buffer_write_char_kernel_code, char_kernel_name, foo, d );
1308 
1309     for ( i = 0; i < 5; i++ ){
1310         align_free( (void *)inptr[i] );
1311 
1312     }
1313 
1314     free_mtdata(d);
1315     return err;
1316 
1317 }   // end test_buffer_char_write()
1318 
1319 
test_buffer_write_uchar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1320 int test_buffer_write_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1321 {
1322     uchar   *inptr[5];
1323     size_t  ptrSizes[5];
1324     int     i, err;
1325     cl_uint j;
1326     MTdata  d = init_genrand( gRandomSeed );
1327     int     (*foo)(void *,void *,int);
1328 
1329     size_t  min_alignment = get_min_alignment(context);
1330 
1331     foo = verify_write_uchar;
1332 
1333     ptrSizes[0] = sizeof(cl_uchar);
1334     ptrSizes[1] = ptrSizes[0] << 1;
1335     ptrSizes[2] = ptrSizes[1] << 1;
1336     ptrSizes[3] = ptrSizes[2] << 1;
1337     ptrSizes[4] = ptrSizes[3] << 1;
1338 
1339     for ( i = 0; i < 5; i++ ){
1340         inptr[i] = (uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1341 
1342         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1343             inptr[i][j] = (uchar)genrand_int32(d);
1344     }
1345 
1346     err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_uchar ), (char*)"uchar", 5, (void**)inptr,
1347                              buffer_write_uchar_kernel_code, uchar_kernel_name, foo, d );
1348 
1349     for ( i = 0; i < 5; i++ ){
1350         align_free( (void *)inptr[i] );
1351 
1352     }
1353 
1354     free_mtdata(d);
1355     return err;
1356 
1357 }   // end test_buffer_uchar_write()
1358 
1359 
test_buffer_write_float(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1360 int test_buffer_write_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1361 {
1362     float   *inptr[5];
1363     size_t  ptrSizes[5];
1364     int     i, err;
1365     cl_uint j;
1366     MTdata  d = init_genrand( gRandomSeed );
1367     int     (*foo)(void *,void *,int);
1368 
1369     size_t  min_alignment = get_min_alignment(context);
1370 
1371     foo = verify_write_float;
1372 
1373     ptrSizes[0] = sizeof(cl_float);
1374     ptrSizes[1] = ptrSizes[0] << 1;
1375     ptrSizes[2] = ptrSizes[1] << 1;
1376     ptrSizes[3] = ptrSizes[2] << 1;
1377     ptrSizes[4] = ptrSizes[3] << 1;
1378 
1379     for ( i = 0; i < 5; i++ ){
1380         inptr[i] = (float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1381 
1382         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1383             inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
1384     }
1385 
1386     err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_float ), (char*)"float", 5, (void**)inptr,
1387                              buffer_write_float_kernel_code, float_kernel_name, foo, d );
1388 
1389     for ( i = 0; i < 5; i++ ){
1390         align_free( (void *)inptr[i] );
1391     }
1392 
1393     free_mtdata(d);
1394     return err;
1395 
1396 }   // end test_buffer_float_write()
1397 
1398 
test_buffer_write_half(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1399 int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1400 {
1401     float   *inptr[5];
1402     size_t  ptrSizes[5];
1403     int     i, err;
1404     cl_uint j;
1405     MTdata  d = init_genrand( gRandomSeed );
1406     int     (*foo)(void *,void *,int);
1407 
1408     size_t  min_alignment = get_min_alignment(context);
1409 
1410     foo = verify_write_half;
1411 
1412     ptrSizes[0] = sizeof( cl_float ) / 2;
1413     ptrSizes[1] = ptrSizes[0] << 1;
1414     ptrSizes[2] = ptrSizes[1] << 1;
1415     ptrSizes[3] = ptrSizes[2] << 1;
1416     ptrSizes[4] = ptrSizes[3] << 1;
1417 
1418     for ( i = 0; i < 5; i++ ){
1419         inptr[i] = (float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1420 
1421         for ( j = 0; j < ptrSizes[i] * num_elements / ( ptrSizes[0] * 2 ); j++ )
1422             inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
1423     }
1424 
1425     err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5, (void**)inptr,
1426                              buffer_write_half_kernel_code, half_kernel_name, foo, d );
1427 
1428     for ( i = 0; i < 5; i++ ){
1429         align_free( (void *)inptr[i] );
1430     }
1431 
1432     free_mtdata(d);
1433     return err;
1434 
1435 }   // end test_buffer_half_write()
1436 
1437 
test_buffer_write_long(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1438 int test_buffer_write_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1439 {
1440     cl_long *inptr[5];
1441     size_t  ptrSizes[5];
1442     int     i, err;
1443     cl_uint j;
1444     MTdata  d = init_genrand( gRandomSeed );
1445     int     (*foo)(void *,void *,int);
1446 
1447     size_t  min_alignment = get_min_alignment(context);
1448 
1449     foo = verify_write_long;
1450 
1451     ptrSizes[0] = sizeof(cl_long);
1452     ptrSizes[1] = ptrSizes[0] << 1;
1453     ptrSizes[2] = ptrSizes[1] << 1;
1454     ptrSizes[3] = ptrSizes[2] << 1;
1455     ptrSizes[4] = ptrSizes[3] << 1;
1456 
1457     //skip devices that don't support long
1458     if (! gHasLong )
1459     {
1460         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
1461         return CL_SUCCESS;
1462     }
1463 
1464     for ( i = 0; i < 5; i++ ){
1465         inptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1466 
1467         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1468             inptr[i][j] = (cl_long) genrand_int32(d) ^ ((cl_long) genrand_int32(d) << 32);
1469     }
1470 
1471     err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_long ), (char*)"cl_long", 5, (void**)inptr,
1472                              buffer_write_long_kernel_code, long_kernel_name, foo, d );
1473 
1474     for ( i = 0; i < 5; i++ ){
1475         align_free( (void *)inptr[i] );
1476     }
1477 
1478     free_mtdata(d);
1479     return err;
1480 
1481 }   // end test_buffer_long_write()
1482 
1483 
test_buffer_write_ulong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1484 int test_buffer_write_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1485 {
1486     cl_ulong *inptr[5];
1487     size_t   ptrSizes[5];
1488     int      i, err;
1489     cl_uint  j;
1490     MTdata   d = init_genrand( gRandomSeed );
1491     int      (*foo)(void *,void *,int);
1492 
1493     size_t   min_alignment = get_min_alignment(context);
1494 
1495     foo = verify_write_ulong;
1496 
1497     ptrSizes[0] = sizeof(cl_ulong);
1498     ptrSizes[1] = ptrSizes[0] << 1;
1499     ptrSizes[2] = ptrSizes[1] << 1;
1500     ptrSizes[3] = ptrSizes[2] << 1;
1501     ptrSizes[4] = ptrSizes[3] << 1;
1502 
1503     if (! gHasLong )
1504     {
1505         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
1506         return CL_SUCCESS;
1507     }
1508 
1509     for ( i = 0; i < 5; i++ ){
1510         inptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1511 
1512         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1513             inptr[i][j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
1514     }
1515 
1516     err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_ulong ), (char*)"ulong long", 5, (void**)inptr,
1517                              buffer_write_ulong_kernel_code, ulong_kernel_name, foo, d );
1518 
1519     for ( i = 0; i < 5; i++ ){
1520         align_free( (void *)inptr[i] );
1521     }
1522 
1523     free_mtdata(d);
1524 
1525     return err;
1526 
1527 }   // end test_buffer_ulong_write()
1528 
1529 
test_buffer_map_write_int(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1530 int test_buffer_map_write_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1531 {
1532     gTestMap = 1;
1533     return test_buffer_write_int(deviceID, context, queue, num_elements);
1534 }
1535 
test_buffer_map_write_uint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1536 int test_buffer_map_write_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1537 {
1538     gTestMap = 1;
1539     return test_buffer_write_uint(deviceID, context, queue, num_elements);
1540 }
1541 
test_buffer_map_write_long(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1542 int test_buffer_map_write_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1543 {
1544     gTestMap = 1;
1545     return test_buffer_write_long(deviceID, context, queue, num_elements);
1546 }
1547 
test_buffer_map_write_ulong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1548 int test_buffer_map_write_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1549 {
1550     gTestMap = 1;
1551     return test_buffer_write_ulong(deviceID, context, queue, num_elements);
1552 }
1553 
test_buffer_map_write_short(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1554 int test_buffer_map_write_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1555 {
1556     gTestMap = 1;
1557     return test_buffer_write_short(deviceID, context, queue, num_elements);
1558 }
1559 
test_buffer_map_write_ushort(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1560 int test_buffer_map_write_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1561 {
1562     gTestMap = 1;
1563     return test_buffer_write_ushort(deviceID, context, queue, num_elements);
1564 }
1565 
test_buffer_map_write_char(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1566 int test_buffer_map_write_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1567 {
1568     gTestMap = 1;
1569     return test_buffer_write_char(deviceID, context, queue, num_elements);
1570 }
1571 
test_buffer_map_write_uchar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1572 int test_buffer_map_write_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1573 {
1574     gTestMap = 1;
1575     return test_buffer_write_uchar(deviceID, context, queue, num_elements);
1576 }
1577 
test_buffer_map_write_float(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1578 int test_buffer_map_write_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1579 {
1580     gTestMap = 1;
1581     return test_buffer_write_float(deviceID, context, queue, num_elements);
1582 }
1583 
test_buffer_map_write_struct(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1584 int test_buffer_map_write_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1585 {
1586     gTestMap = 1;
1587     return test_buffer_write_struct(deviceID, context, queue, num_elements);
1588 }
1589 
1590 
test_buffer_write_async_int(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1591 int test_buffer_write_async_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1592 {
1593     int     *inptr[5];
1594     size_t  ptrSizes[5];
1595     int     i, err;
1596     cl_uint j;
1597     MTdata  d = init_genrand( gRandomSeed );
1598     int     (*foo)(void *,void *,int);
1599 
1600     size_t  min_alignment = get_min_alignment(context);
1601 
1602     foo = verify_write_int;
1603 
1604     ptrSizes[0] = sizeof(cl_int);
1605     ptrSizes[1] = ptrSizes[0] << 1;
1606     ptrSizes[2] = ptrSizes[1] << 1;
1607     ptrSizes[3] = ptrSizes[2] << 1;
1608     ptrSizes[4] = ptrSizes[3] << 1;
1609 
1610     for ( i = 0; i < 5; i++ ){
1611         inptr[i] = (int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1612 
1613         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1614             inptr[i][j] = (int)genrand_int32(d);
1615     }
1616 
1617     err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_int ), (char*)"int", 5, (void**)inptr,
1618                                          buffer_write_int_kernel_code, int_kernel_name, foo );
1619 
1620     for ( i = 0; i < 5; i++ ){
1621         align_free( (void *)inptr[i] );
1622     }
1623 
1624     free_mtdata(d);
1625     return err;
1626 
1627 }   // end test_buffer_int_write_array_async()
1628 
1629 
test_buffer_write_async_uint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1630 int test_buffer_write_async_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1631 {
1632     cl_uint *inptr[5];
1633     size_t  ptrSizes[5];
1634     int     i, err;
1635     cl_uint j;
1636     MTdata  d = init_genrand( gRandomSeed );
1637     int     (*foo)(void *,void *,int);
1638 
1639     size_t  min_alignment = get_min_alignment(context);
1640 
1641     foo = verify_write_uint;
1642 
1643     ptrSizes[0] = sizeof(cl_uint);
1644     ptrSizes[1] = ptrSizes[0] << 1;
1645     ptrSizes[2] = ptrSizes[1] << 1;
1646     ptrSizes[3] = ptrSizes[2] << 1;
1647     ptrSizes[4] = ptrSizes[3] << 1;
1648 
1649     for ( i = 0; i < 5; i++ ){
1650         inptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1651 
1652         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1653             inptr[i][j] = (cl_uint)genrand_int32(d);
1654     }
1655 
1656     err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_uint ), (char*)"uint", 5, (void**)inptr,
1657                                          buffer_write_uint_kernel_code, uint_kernel_name, foo );
1658 
1659     for ( i = 0; i < 5; i++ ){
1660         align_free( (void *)inptr[i] );
1661     }
1662 
1663     free_mtdata(d);
1664     return err;
1665 
1666 }   // end test_buffer_uint_write_array_async()
1667 
1668 
test_buffer_write_async_short(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1669 int test_buffer_write_async_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1670 {
1671     short   *inptr[5];
1672     size_t  ptrSizes[5];
1673     int     i, err;
1674     cl_uint j;
1675     MTdata  d = init_genrand( gRandomSeed );
1676     int     (*foo)(void *,void *,int);
1677 
1678     size_t  min_alignment = get_min_alignment(context);
1679 
1680     foo = verify_write_short;
1681 
1682     ptrSizes[0] = sizeof(cl_short);
1683     ptrSizes[1] = ptrSizes[0] << 1;
1684     ptrSizes[2] = ptrSizes[1] << 1;
1685     ptrSizes[3] = ptrSizes[2] << 1;
1686     ptrSizes[4] = ptrSizes[3] << 1;
1687 
1688     for ( i = 0; i < 5; i++ ){
1689         inptr[i] = (short *)align_malloc(ptrSizes[i] * num_elements + min_alignment, min_alignment);
1690 
1691         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1692             inptr[i][j] = (short)genrand_int32(d);
1693     }
1694 
1695     err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_short ), (char*)"short", 5, (void**)inptr,
1696                                          buffer_write_short_kernel_code, short_kernel_name, foo );
1697 
1698     for ( i = 0; i < 5; i++ ){
1699         align_free( (void *)inptr[i] );
1700 
1701     }
1702 
1703     free_mtdata(d);
1704     return err;
1705 
1706 }   // end test_buffer_short_write_array_async()
1707 
1708 
test_buffer_write_async_ushort(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1709 int test_buffer_write_async_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1710 {
1711     cl_ushort *inptr[5];
1712     size_t    ptrSizes[5];
1713     int       i, err;
1714     cl_uint   j;
1715     MTdata    d = init_genrand( gRandomSeed );
1716     int       (*foo)(void *,void *,int);
1717 
1718     size_t    min_alignment = get_min_alignment(context);
1719 
1720     foo = verify_write_ushort;
1721 
1722     ptrSizes[0] = sizeof(cl_ushort);
1723     ptrSizes[1] = ptrSizes[0] << 1;
1724     ptrSizes[2] = ptrSizes[1] << 1;
1725     ptrSizes[3] = ptrSizes[2] << 1;
1726     ptrSizes[4] = ptrSizes[3] << 1;
1727 
1728     for ( i = 0; i < 5; i++ ){
1729         inptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1730 
1731         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1732             inptr[i][j] = (cl_ushort)genrand_int32(d);
1733     }
1734 
1735     err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_ushort ), (char*)"ushort", 5, (void**)inptr,
1736                                          buffer_write_ushort_kernel_code, ushort_kernel_name, foo );
1737 
1738     for ( i = 0; i < 5; i++ ){
1739         align_free( (void *)inptr[i] );
1740 
1741     }
1742 
1743     free_mtdata(d);
1744     return err;
1745 
1746 }   // end test_buffer_ushort_write_array_async()
1747 
1748 
test_buffer_write_async_char(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1749 int test_buffer_write_async_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1750 {
1751     char    *inptr[5];
1752     size_t  ptrSizes[5];
1753     int     i, err;
1754     cl_uint j;
1755     MTdata  d = init_genrand( gRandomSeed );
1756     int     (*foo)(void *,void *,int);
1757 
1758     size_t  min_alignment = get_min_alignment(context);
1759 
1760     foo = verify_write_char;
1761 
1762     ptrSizes[0] = sizeof(cl_char);
1763     ptrSizes[1] = ptrSizes[0] << 1;
1764     ptrSizes[2] = ptrSizes[1] << 1;
1765     ptrSizes[3] = ptrSizes[2] << 1;
1766     ptrSizes[4] = ptrSizes[3] << 1;
1767 
1768     for ( i = 0; i < 5; i++ ){
1769         inptr[i] = (char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1770 
1771         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1772             inptr[i][j] = (char)genrand_int32(d);
1773     }
1774 
1775     err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_char ), (char*)"char", 5, (void**)inptr,
1776                                          buffer_write_char_kernel_code, char_kernel_name, foo );
1777 
1778     for ( i = 0; i < 5; i++ ){
1779         align_free( (void *)inptr[i] );
1780 
1781     }
1782 
1783     free_mtdata(d);
1784     return err;
1785 
1786 }   // end test_buffer_char_write_array_async()
1787 
1788 
test_buffer_write_async_uchar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1789 int test_buffer_write_async_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1790 {
1791     uchar   *inptr[5];
1792     size_t  ptrSizes[5];
1793     int     i, err;
1794     cl_uint j;
1795     MTdata  d = init_genrand( gRandomSeed );
1796     int     (*foo)(void *,void *,int);
1797 
1798     size_t  min_alignment = get_min_alignment(context);
1799 
1800     foo = verify_write_uchar;
1801 
1802     ptrSizes[0] = sizeof(cl_uchar);
1803     ptrSizes[1] = ptrSizes[0] << 1;
1804     ptrSizes[2] = ptrSizes[1] << 1;
1805     ptrSizes[3] = ptrSizes[2] << 1;
1806     ptrSizes[4] = ptrSizes[3] << 1;
1807 
1808     for ( i = 0; i < 5; i++ ){
1809         inptr[i] = (uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1810 
1811         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1812             inptr[i][j] = (uchar)genrand_int32(d);
1813     }
1814 
1815     err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_uchar ), (char*)"uchar", 5, (void**)inptr,
1816                                          buffer_write_uchar_kernel_code, uchar_kernel_name, foo );
1817 
1818     for ( i = 0; i < 5; i++ ){
1819         align_free( (void *)inptr[i] );
1820 
1821     }
1822 
1823     free_mtdata(d);
1824     return err;
1825 
1826 }   // end test_buffer_uchar_write_array_async()
1827 
1828 
test_buffer_write_async_float(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1829 int test_buffer_write_async_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1830 {
1831     float   *inptr[5];
1832     size_t  ptrSizes[5];
1833     int     i, err;
1834     cl_uint j;
1835     MTdata  d = init_genrand( gRandomSeed );
1836     int     (*foo)(void *,void *,int);
1837 
1838     size_t  min_alignment = get_min_alignment(context);
1839 
1840     foo = verify_write_float;
1841 
1842     ptrSizes[0] = sizeof(cl_float);
1843     ptrSizes[1] = ptrSizes[0] << 1;
1844     ptrSizes[2] = ptrSizes[1] << 1;
1845     ptrSizes[3] = ptrSizes[2] << 1;
1846     ptrSizes[4] = ptrSizes[3] << 1;
1847 
1848     for ( i = 0; i < 5; i++ ){
1849         inptr[i] = (float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1850 
1851         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1852             inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
1853     }
1854 
1855     err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_float ), (char*)"float", 5, (void**)inptr,
1856                                          buffer_write_float_kernel_code, float_kernel_name, foo );
1857 
1858     for ( i = 0; i < 5; i++ ){
1859         align_free( (void *)inptr[i] );
1860     }
1861 
1862     free_mtdata(d);
1863     return err;
1864 
1865 }   // end test_buffer_float_write_array_async()
1866 
1867 
test_buffer_write_async_long(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1868 int test_buffer_write_async_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1869 {
1870     cl_long *inptr[5];
1871     size_t  ptrSizes[5];
1872     int     i, err;
1873     cl_uint j;
1874     MTdata  d = init_genrand( gRandomSeed );
1875     int     (*foo)(void *,void *,int);
1876 
1877     size_t  min_alignment = get_min_alignment(context);
1878 
1879     foo = verify_write_long;
1880 
1881     ptrSizes[0] = sizeof(cl_long);
1882     ptrSizes[1] = ptrSizes[0] << 1;
1883     ptrSizes[2] = ptrSizes[1] << 1;
1884     ptrSizes[3] = ptrSizes[2] << 1;
1885     ptrSizes[4] = ptrSizes[3] << 1;
1886 
1887     if (! gHasLong )
1888     {
1889         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
1890         return CL_SUCCESS;
1891     }
1892 
1893     for ( i = 0; i < 5; i++ ){
1894         inptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1895 
1896         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1897             inptr[i][j] = ((cl_long) genrand_int32(d)) ^ ((cl_long) genrand_int32(d) << 32);
1898     }
1899 
1900     err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_long ), (char*)"cl_long", 5, (void**)inptr,
1901                                          buffer_write_long_kernel_code, long_kernel_name, foo );
1902 
1903     for ( i = 0; i < 5; i++ ){
1904         align_free( (void *)inptr[i] );
1905     }
1906 
1907     free_mtdata(d);
1908     return err;
1909 
1910 }   // end test_buffer_long_write_array_async()
1911 
1912 
test_buffer_write_async_ulong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1913 int test_buffer_write_async_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1914 {
1915     cl_ulong *inptr[5];
1916     size_t   ptrSizes[5];
1917     int      i, err;
1918     cl_uint  j;
1919     MTdata   d = init_genrand( gRandomSeed );
1920     int      (*foo)(void *,void *,int);
1921 
1922     size_t   min_alignment = get_min_alignment(context);
1923 
1924     foo = verify_write_ulong;
1925 
1926     ptrSizes[0] = sizeof(cl_ulong);
1927     ptrSizes[1] = ptrSizes[0] << 1;
1928     ptrSizes[2] = ptrSizes[1] << 1;
1929     ptrSizes[3] = ptrSizes[2] << 1;
1930     ptrSizes[4] = ptrSizes[3] << 1;
1931 
1932     if (! gHasLong )
1933     {
1934         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
1935         return CL_SUCCESS;
1936     }
1937 
1938     for ( i = 0; i < 5; i++ ){
1939         inptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1940 
1941         for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1942             inptr[i][j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
1943     }
1944 
1945     err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_ulong ), (char*)"ulong long", 5, (void**)inptr,
1946                                          buffer_write_ulong_kernel_code, ulong_kernel_name, foo );
1947 
1948     for ( i = 0; i < 5; i++ ){
1949         align_free( (void *)inptr[i] );
1950     }
1951 
1952     free_mtdata(d);
1953     return err;
1954 
1955 }   // end test_buffer_ulong_write_array_async()
1956 
1957