• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include "procs.h"
25 #include "harness/errorHelpers.h"
26 
27 #define TEST_PRIME_CHAR        0x77
28 #define TEST_PRIME_INT        ((1<<16)+1)
29 #define TEST_PRIME_UINT        ((1U<<16)+1U)
30 #define TEST_PRIME_LONG        ((1LL<<32)+1LL)
31 #define TEST_PRIME_ULONG    ((1ULL<<32)+1ULL)
32 #define TEST_PRIME_SHORT    (cl_short)((1<<8)+1)
33 #define TEST_PRIME_USHORT   (cl_ushort)((1<<8)+1)
34 #define TEST_PRIME_FLOAT    (cl_float)3.40282346638528860e+38
35 #define TEST_PRIME_HALF        119.f
36 
37 #ifndef TestStruct
38 typedef struct{
39     cl_int     a;
40     cl_float   b;
41 } TestStruct;
42 #endif
43 
44 const char *buffer_fill_int_kernel_code[] = {
45     "__kernel void test_buffer_fill_int(__global int *src, __global int *dst)\n"
46     "{\n"
47     "    int  tid = get_global_id(0);\n"
48     "\n"
49     "    dst[tid] = src[tid];\n"
50     "}\n",
51 
52     "__kernel void test_buffer_fill_int2(__global int2 *src, __global int2 *dst)\n"
53     "{\n"
54     "    int  tid = get_global_id(0);\n"
55     "\n"
56     "    dst[tid] = src[tid];\n"
57     "}\n",
58 
59     "__kernel void test_buffer_fill_int4(__global int4 *src, __global int4 *dst)\n"
60     "{\n"
61     "    int  tid = get_global_id(0);\n"
62     "\n"
63     "    dst[tid] = src[tid];\n"
64     "}\n",
65 
66     "__kernel void test_buffer_fill_int8(__global int8 *src, __global int8 *dst)\n"
67     "{\n"
68     "    int  tid = get_global_id(0);\n"
69     "\n"
70     "    dst[tid] = src[tid];\n"
71     "}\n",
72 
73     "__kernel void test_buffer_fill_int16(__global int16 *src, __global int16 *dst)\n"
74     "{\n"
75     "    int  tid = get_global_id(0);\n"
76     "\n"
77     "    dst[tid] = src[tid];\n"
78     "}\n" };
79 
80 static const char *int_kernel_name[] = { "test_buffer_fill_int", "test_buffer_fill_int2", "test_buffer_fill_int4", "test_buffer_fill_int8", "test_buffer_fill_int16" };
81 
82 
83 const char *buffer_fill_uint_kernel_code[] = {
84     "__kernel void test_buffer_fill_uint(__global uint *src, __global uint *dst)\n"
85     "{\n"
86     "    int  tid = get_global_id(0);\n"
87     "\n"
88     "    dst[tid] = src[tid];\n"
89     "}\n",
90 
91     "__kernel void test_buffer_fill_uint2(__global uint2 *src, __global uint2 *dst)\n"
92     "{\n"
93     "    int  tid = get_global_id(0);\n"
94     "\n"
95     "    dst[tid] = src[tid];\n"
96     "}\n",
97 
98     "__kernel void test_buffer_fill_uint4(__global uint4 *src, __global uint4 *dst)\n"
99     "{\n"
100     "    int  tid = get_global_id(0);\n"
101     "\n"
102     "    dst[tid] = src[tid];\n"
103     "}\n",
104 
105     "__kernel void test_buffer_fill_uint8(__global uint8 *src, __global uint8 *dst)\n"
106     "{\n"
107     "    int  tid = get_global_id(0);\n"
108     "\n"
109     "    dst[tid] = src[tid];\n"
110     "}\n",
111 
112     "__kernel void test_buffer_fill_uint16(__global uint16 *src, __global uint16 *dst)\n"
113     "{\n"
114     "    int  tid = get_global_id(0);\n"
115     "\n"
116     "    dst[tid] = src[tid];\n"
117     "}\n" };
118 
119 static const char *uint_kernel_name[] = { "test_buffer_fill_uint", "test_buffer_fill_uint2", "test_buffer_fill_uint4", "test_buffer_fill_uint8", "test_buffer_fill_uint16" };
120 
121 
122 const char *buffer_fill_short_kernel_code[] = {
123     "__kernel void test_buffer_fill_short(__global short *src, __global short *dst)\n"
124     "{\n"
125     "    int  tid = get_global_id(0);\n"
126     "\n"
127     "    dst[tid] = src[tid];\n"
128     "}\n",
129 
130     "__kernel void test_buffer_fill_short2(__global short2 *src, __global short2 *dst)\n"
131     "{\n"
132     "    int  tid = get_global_id(0);\n"
133     "\n"
134     "    dst[tid] = src[tid];\n"
135     "}\n",
136 
137     "__kernel void test_buffer_fill_short4(__global short4 *src, __global short4 *dst)\n"
138     "{\n"
139     "    int  tid = get_global_id(0);\n"
140     "\n"
141     "    dst[tid] = src[tid];\n"
142     "}\n",
143 
144     "__kernel void test_buffer_fill_short8(__global short8 *src, __global short8 *dst)\n"
145     "{\n"
146     "    int  tid = get_global_id(0);\n"
147     "\n"
148     "    dst[tid] = src[tid];\n"
149     "}\n",
150 
151     "__kernel void test_buffer_fill_short16(__global short16 *src, __global short16 *dst)\n"
152     "{\n"
153     "    int  tid = get_global_id(0);\n"
154     "\n"
155     "    dst[tid] = src[tid];\n"
156     "}\n" };
157 
158 static const char *short_kernel_name[] = { "test_buffer_fill_short", "test_buffer_fill_short2", "test_buffer_fill_short4", "test_buffer_fill_short8", "test_buffer_fill_short16" };
159 
160 
161 const char *buffer_fill_ushort_kernel_code[] = {
162     "__kernel void test_buffer_fill_ushort(__global ushort *src, __global ushort *dst)\n"
163     "{\n"
164     "    int  tid = get_global_id(0);\n"
165     "\n"
166     "    dst[tid] = src[tid];\n"
167     "}\n",
168 
169     "__kernel void test_buffer_fill_ushort2(__global ushort2 *src, __global ushort2 *dst)\n"
170     "{\n"
171     "    int  tid = get_global_id(0);\n"
172     "\n"
173     "    dst[tid] = src[tid];\n"
174     "}\n",
175 
176     "__kernel void test_buffer_fill_ushort4(__global ushort4 *src, __global ushort4 *dst)\n"
177     "{\n"
178     "    int  tid = get_global_id(0);\n"
179     "\n"
180     "    dst[tid] = src[tid];\n"
181     "}\n",
182 
183     "__kernel void test_buffer_fill_ushort8(__global ushort8 *src, __global ushort8 *dst)\n"
184     "{\n"
185     "    int  tid = get_global_id(0);\n"
186     "\n"
187     "    dst[tid] = src[tid];\n"
188     "}\n",
189 
190     "__kernel void test_buffer_fill_ushort16(__global ushort16 *src, __global ushort16 *dst)\n"
191     "{\n"
192     "    int  tid = get_global_id(0);\n"
193     "\n"
194     "    dst[tid] = src[tid];\n"
195     "}\n" };
196 
197 static const char *ushort_kernel_name[] = { "test_buffer_fill_ushort", "test_buffer_fill_ushort2", "test_buffer_fill_ushort4", "test_buffer_fill_ushort8", "test_buffer_fill_ushort16" };
198 
199 
200 const char *buffer_fill_char_kernel_code[] = {
201     "__kernel void test_buffer_fill_char(__global char *src, __global char *dst)\n"
202     "{\n"
203     "    int  tid = get_global_id(0);\n"
204     "\n"
205     "    dst[tid] = src[tid];\n"
206     "}\n",
207 
208     "__kernel void test_buffer_fill_char2(__global char2 *src, __global char2 *dst)\n"
209     "{\n"
210     "    int  tid = get_global_id(0);\n"
211     "\n"
212     "    dst[tid] = src[tid];\n"
213     "}\n",
214 
215     "__kernel void test_buffer_fill_char4(__global char4 *src, __global char4 *dst)\n"
216     "{\n"
217     "    int  tid = get_global_id(0);\n"
218     "\n"
219     "    dst[tid] = src[tid];\n"
220     "}\n",
221 
222     "__kernel void test_buffer_fill_char8(__global char8 *src, __global char8 *dst)\n"
223     "{\n"
224     "    int  tid = get_global_id(0);\n"
225     "\n"
226     "    dst[tid] = src[tid];\n"
227     "}\n",
228 
229     "__kernel void test_buffer_fill_char16(__global char16 *src, __global char16 *dst)\n"
230     "{\n"
231     "    int  tid = get_global_id(0);\n"
232     "\n"
233     "    dst[tid] = src[tid];\n"
234     "}\n" };
235 
236 static const char *char_kernel_name[] = { "test_buffer_fill_char", "test_buffer_fill_char2", "test_buffer_fill_char4", "test_buffer_fill_char8", "test_buffer_fill_char16" };
237 
238 
239 const char *buffer_fill_uchar_kernel_code[] = {
240     "__kernel void test_buffer_fill_uchar(__global uchar *src, __global uchar *dst)\n"
241     "{\n"
242     "    int  tid = get_global_id(0);\n"
243     "\n"
244     "    dst[tid] = src[tid];\n"
245     "}\n",
246 
247     "__kernel void test_buffer_fill_uchar2(__global uchar2 *src, __global uchar2 *dst)\n"
248     "{\n"
249     "    int  tid = get_global_id(0);\n"
250     "\n"
251     "    dst[tid] = src[tid];\n"
252     "}\n",
253 
254     "__kernel void test_buffer_fill_uchar4(__global uchar4 *src, __global uchar4 *dst)\n"
255     "{\n"
256     "    int  tid = get_global_id(0);\n"
257     "\n"
258     "    dst[tid] = src[tid];\n"
259     "}\n",
260 
261     "__kernel void test_buffer_fill_uchar8(__global uchar8 *src, __global uchar8 *dst)\n"
262     "{\n"
263     "    int  tid = get_global_id(0);\n"
264     "\n"
265     "    dst[tid] = src[tid];\n"
266     "}\n",
267 
268     "__kernel void test_buffer_fill_uchar16(__global uchar16 *src, __global uchar16 *dst)\n"
269     "{\n"
270     "    int  tid = get_global_id(0);\n"
271     "\n"
272     "    dst[tid] = src[tid];\n"
273     "}\n" };
274 
275 static const char *uchar_kernel_name[] = { "test_buffer_fill_uchar", "test_buffer_fill_uchar2", "test_buffer_fill_uchar4", "test_buffer_fill_uchar8", "test_buffer_fill_uchar16" };
276 
277 
278 const char *buffer_fill_long_kernel_code[] = {
279     "__kernel void test_buffer_fill_long(__global long *src, __global long *dst)\n"
280     "{\n"
281     "    int  tid = get_global_id(0);\n"
282     "\n"
283     "    dst[tid] = src[tid];\n"
284     "}\n",
285 
286     "__kernel void test_buffer_fill_long2(__global long2 *src, __global long2 *dst)\n"
287     "{\n"
288     "    int  tid = get_global_id(0);\n"
289     "\n"
290     "    dst[tid] = src[tid];\n"
291     "}\n",
292 
293     "__kernel void test_buffer_fill_long4(__global long4 *src, __global long4 *dst)\n"
294     "{\n"
295     "    int  tid = get_global_id(0);\n"
296     "\n"
297     "    dst[tid] = src[tid];\n"
298     "}\n",
299 
300     "__kernel void test_buffer_fill_long8(__global long8 *src, __global long8 *dst)\n"
301     "{\n"
302     "    int  tid = get_global_id(0);\n"
303     "\n"
304     "    dst[tid] = src[tid];\n"
305     "}\n",
306 
307     "__kernel void test_buffer_fill_long16(__global long16 *src, __global long16 *dst)\n"
308     "{\n"
309     "    int  tid = get_global_id(0);\n"
310     "\n"
311     "    dst[tid] = src[tid];\n"
312     "}\n" };
313 
314 static const char *long_kernel_name[] = { "test_buffer_fill_long", "test_buffer_fill_long2", "test_buffer_fill_long4", "test_buffer_fill_long8", "test_buffer_fill_long16" };
315 
316 
317 const char *buffer_fill_ulong_kernel_code[] = {
318     "__kernel void test_buffer_fill_ulong(__global ulong *src, __global ulong *dst)\n"
319     "{\n"
320     "    int  tid = get_global_id(0);\n"
321     "\n"
322     "    dst[tid] = src[tid];\n"
323     "}\n",
324 
325     "__kernel void test_buffer_fill_ulong2(__global ulong2 *src, __global ulong2 *dst)\n"
326     "{\n"
327     "    int  tid = get_global_id(0);\n"
328     "\n"
329     "    dst[tid] = src[tid];\n"
330     "}\n",
331 
332     "__kernel void test_buffer_fill_ulong4(__global ulong4 *src, __global ulong4 *dst)\n"
333     "{\n"
334     "    int  tid = get_global_id(0);\n"
335     "\n"
336     "    dst[tid] = src[tid];\n"
337     "}\n",
338 
339     "__kernel void test_buffer_fill_ulong8(__global ulong8 *src, __global ulong8 *dst)\n"
340     "{\n"
341     "    int  tid = get_global_id(0);\n"
342     "\n"
343     "    dst[tid] = src[tid];\n"
344     "}\n",
345 
346     "__kernel void test_buffer_fill_ulong16(__global ulong16 *src, __global ulong16 *dst)\n"
347     "{\n"
348     "    int  tid = get_global_id(0);\n"
349     "\n"
350     "    dst[tid] = src[tid];\n"
351     "}\n" };
352 
353 static const char *ulong_kernel_name[] = { "test_buffer_fill_ulong", "test_buffer_fill_ulong2", "test_buffer_fill_ulong4", "test_buffer_fill_ulong8", "test_buffer_fill_ulong16" };
354 
355 
356 const char *buffer_fill_float_kernel_code[] = {
357     "__kernel void test_buffer_fill_float(__global float *src, __global float *dst)\n"
358     "{\n"
359     "    int  tid = get_global_id(0);\n"
360     "\n"
361     "    dst[tid] = src[tid];\n"
362     "}\n",
363 
364     "__kernel void test_buffer_fill_float2(__global float2 *src, __global float2 *dst)\n"
365     "{\n"
366     "    int  tid = get_global_id(0);\n"
367     "\n"
368     "    dst[tid] = src[tid];\n"
369     "}\n",
370 
371     "__kernel void test_buffer_fill_float4(__global float4 *src, __global float4 *dst)\n"
372     "{\n"
373     "    int  tid = get_global_id(0);\n"
374     "\n"
375     "    dst[tid] = src[tid];\n"
376     "}\n",
377 
378     "__kernel void test_buffer_fill_float8(__global float8 *src, __global float8 *dst)\n"
379     "{\n"
380     "    int  tid = get_global_id(0);\n"
381     "\n"
382     "    dst[tid] = src[tid];\n"
383     "}\n",
384 
385     "__kernel void test_buffer_fill_float16(__global float16 *src, __global float16 *dst)\n"
386     "{\n"
387     "    int  tid = get_global_id(0);\n"
388     "\n"
389     "    dst[tid] = src[tid];\n"
390     "}\n" };
391 
392 static const char *float_kernel_name[] = { "test_buffer_fill_float", "test_buffer_fill_float2", "test_buffer_fill_float4", "test_buffer_fill_float8", "test_buffer_fill_float16" };
393 
394 
395 static const char *struct_kernel_code =
396 "typedef struct{\n"
397 "int    a;\n"
398 "float    b;\n"
399 "} TestStruct;\n"
400 "__kernel void read_fill_struct(__global TestStruct *src, __global TestStruct *dst)\n"
401 "{\n"
402 "    int  tid = get_global_id(0);\n"
403 "\n"
404 "    dst[tid].a = src[tid].a;\n"
405 "     dst[tid].b = src[tid].b;\n"
406 "}\n";
407 
408 
409 
verify_fill_int(void * ptr1,void * ptr2,int n)410 static int verify_fill_int( void *ptr1, void *ptr2, int n )
411 {
412     int     i;
413     cl_int  *inptr = (cl_int *)ptr1;
414     cl_int  *outptr = (cl_int *)ptr2;
415 
416     for (i=0; i<n; i++){
417         if ( outptr[i] != inptr[i] )
418             return -1;
419     }
420 
421     return 0;
422 }
423 
424 
verify_fill_uint(void * ptr1,void * ptr2,int n)425 static int verify_fill_uint( void *ptr1, void *ptr2, int n )
426 {
427     int     i;
428     cl_uint *inptr = (cl_uint *)ptr1;
429     cl_uint *outptr = (cl_uint *)ptr2;
430 
431     for (i=0; i<n; i++){
432         if ( outptr[i] != inptr[i] )
433             return -1;
434     }
435 
436     return 0;
437 }
438 
439 
verify_fill_short(void * ptr1,void * ptr2,int n)440 static int verify_fill_short( void *ptr1, void *ptr2, int n )
441 {
442     int      i;
443     cl_short *inptr = (cl_short *)ptr1;
444     cl_short *outptr = (cl_short *)ptr2;
445 
446     for (i=0; i<n; i++){
447         if ( outptr[i] != inptr[i] )
448             return -1;
449     }
450 
451     return 0;
452 }
453 
454 
verify_fill_ushort(void * ptr1,void * ptr2,int n)455 static int verify_fill_ushort( void *ptr1, void *ptr2, int n )
456 {
457     int       i;
458     cl_ushort *inptr = (cl_ushort *)ptr1;
459     cl_ushort *outptr = (cl_ushort *)ptr2;
460 
461     for (i=0; i<n; i++){
462         if ( outptr[i] != inptr[i] )
463             return -1;
464     }
465 
466     return 0;
467 }
468 
469 
verify_fill_char(void * ptr1,void * ptr2,int n)470 static int verify_fill_char( void *ptr1, void *ptr2, int n )
471 {
472     int     i;
473     cl_char *inptr = (cl_char *)ptr1;
474     cl_char *outptr = (cl_char *)ptr2;
475 
476     for (i=0; i<n; i++){
477         if ( outptr[i] != inptr[i] )
478             return -1;
479     }
480 
481     return 0;
482 }
483 
484 
verify_fill_uchar(void * ptr1,void * ptr2,int n)485 static int verify_fill_uchar( void *ptr1, void *ptr2, int n )
486 {
487     int      i;
488     cl_uchar *inptr = (cl_uchar *)ptr1;
489     cl_uchar *outptr = (cl_uchar *)ptr2;
490 
491     for (i=0; i<n; i++){
492         if ( outptr[i] != inptr[i] )
493             return -1;
494     }
495 
496     return 0;
497 }
498 
499 
verify_fill_long(void * ptr1,void * ptr2,int n)500 static int verify_fill_long( void *ptr1, void *ptr2, int n )
501 {
502     int     i;
503     cl_long *inptr = (cl_long *)ptr1;
504     cl_long *outptr = (cl_long *)ptr2;
505 
506     for (i=0; i<n; i++){
507         if ( outptr[i] != inptr[i] )
508             return -1;
509     }
510 
511     return 0;
512 }
513 
514 
verify_fill_ulong(void * ptr1,void * ptr2,int n)515 static int verify_fill_ulong( void *ptr1, void *ptr2, int n )
516 {
517     int      i;
518     cl_ulong *inptr = (cl_ulong *)ptr1;
519     cl_ulong *outptr = (cl_ulong *)ptr2;
520 
521     for (i=0; i<n; i++){
522         if ( outptr[i] != inptr[i] )
523             return -1;
524     }
525 
526     return 0;
527 }
528 
529 
verify_fill_float(void * ptr1,void * ptr2,int n)530 static int verify_fill_float( void *ptr1, void *ptr2, int n )
531 {
532     int      i;
533     cl_float *inptr = (cl_float *)ptr1;
534     cl_float *outptr = (cl_float *)ptr2;
535 
536     for (i=0; i<n; i++){
537         if ( outptr[i] != inptr[i] )
538             return -1;
539     }
540 
541     return 0;
542 }
543 
544 
verify_fill_struct(void * ptr1,void * ptr2,int n)545 static int verify_fill_struct( void *ptr1, void *ptr2, int n )
546 {
547     int         i;
548     TestStruct  *inptr = (TestStruct *)ptr1;
549     TestStruct  *outptr = (TestStruct *)ptr2;
550 
551     for (i=0; i<n; i++){
552         if ( ( outptr[i].a != inptr[i].a ) || ( outptr[i].b != outptr[i].b ) )
553             return -1;
554     }
555 
556     return 0;
557 }
558 
559 
560 
test_buffer_fill(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,void * inptr[5],void * hostptr[5],void * pattern[5],size_t offset_elements,size_t fill_elements,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,void *,int))561 int test_buffer_fill( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type,
562                      int loops, void *inptr[5], void *hostptr[5], void *pattern[5], size_t offset_elements, size_t fill_elements,
563                      const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int) )
564 {
565     void        *outptr[5];
566     clProgramWrapper program[5];
567     clKernelWrapper kernel[5];
568     size_t      ptrSizes[5];
569     size_t      global_work_size[3];
570     int         err;
571     int i;
572     int         src_flag_id;
573     int         total_errors = 0;
574 
575     size_t      min_alignment = get_min_alignment(context);
576 
577     global_work_size[0] = (size_t)num_elements;
578 
579     ptrSizes[0] = size;
580     ptrSizes[1] = ptrSizes[0] << 1;
581     ptrSizes[2] = ptrSizes[1] << 1;
582     ptrSizes[3] = ptrSizes[2] << 1;
583     ptrSizes[4] = ptrSizes[3] << 1;
584 
585     loops = (loops < 5 ? loops : 5);
586     for (i = 0; i < loops; i++)
587     {
588         err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
589                                           &kernelCode[i], kernelName[i]);
590         if (err)
591         {
592             log_error(" Error creating program for %s\n", type);
593             return -1;
594         }
595 
596         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
597         {
598             clEventWrapper event[2];
599             clMemWrapper buffers[2];
600             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
601                 buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],
602                                             ptrSizes[i] * num_elements,
603                                             hostptr[i], &err);
604             else
605                 buffers[0] =
606                     clCreateBuffer(context, flag_set[src_flag_id],
607                                    ptrSizes[i] * num_elements, NULL, &err);
608             if (!buffers[0] || err)
609             {
610                 print_error(err, "clCreateBuffer failed\n" );
611                 return -1;
612             }
613             // Initialize source buffer with 0, since the validation code expects 0(s) outside of the fill region.
614             if (!((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))) {
615                 err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0,
616                                            ptrSizes[i] * num_elements,
617                                            hostptr[i], 0, NULL, NULL);
618                 if ( err != CL_SUCCESS ){
619                     print_error(err, "clEnqueueWriteBuffer failed\n" );
620                     return -1;
621                 }
622             }
623 
624             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
625             memset(outptr[i], 0, ptrSizes[i] * num_elements);
626             buffers[1] =
627                 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
628                                ptrSizes[i] * num_elements, outptr[i], &err);
629             if (!buffers[1] || err)
630             {
631                 print_error(err, "clCreateBuffer failed\n" );
632                 align_free( outptr[i] );
633                 return -1;
634             }
635 
636             err = clEnqueueFillBuffer(
637                 queue, buffers[0], pattern[i], ptrSizes[i],
638                 ptrSizes[i] * offset_elements, ptrSizes[i] * fill_elements, 0,
639                 NULL, &(event[0]));
640 
641             if ( err != CL_SUCCESS ){
642                 print_error( err, " clEnqueueFillBuffer failed" );
643                 align_free( outptr[i] );
644                 return -1;
645             }
646 
647             err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem),
648                                  (void *)&buffers[0]);
649             err |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem),
650                                   (void *)&buffers[1]);
651             if ( err != CL_SUCCESS ){
652                 print_error( err, "clSetKernelArg failed" );
653                 align_free( outptr[i] );
654                 return -1;
655             }
656 
657             err = clWaitForEvents(  1, &(event[0]) );
658             if ( err != CL_SUCCESS ){
659                 print_error( err, "clWaitForEvents() failed" );
660                 align_free( outptr[i] );
661                 return -1;
662             }
663 
664             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
665             if (err != CL_SUCCESS){
666                 print_error( err, "clEnqueueNDRangeKernel failed" );
667                 return -1;
668             }
669 
670             err = clEnqueueReadBuffer(queue, buffers[1], false, 0,
671                                       ptrSizes[i] * num_elements, outptr[i], 0,
672                                       NULL, &(event[1]));
673             if (err != CL_SUCCESS){
674                 print_error( err, "clEnqueueReadBuffer failed" );
675                 return -1;
676             }
677 
678             err = clWaitForEvents( 1, &(event[1]) );
679             if ( err != CL_SUCCESS ){
680                 print_error( err, "clWaitForEvents() failed" );
681             }
682 
683             if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
684                 log_error(" %s%d test failed. (cl_mem_flags: %s)\n", type,
685                           1 << i, flag_set_names[src_flag_id]);
686                 total_errors++;
687             }
688             else{
689                 log_info(" %s%d test passed (cl_mem_flags: %s)\n", type, 1 << i,
690                          flag_set_names[src_flag_id]);
691             }
692 
693             // cleanup
694             align_free( outptr[i] );
695         }
696     } // src cl_mem_flag
697 
698     return total_errors;
699 
700 }   // end test_buffer_fill()
701 
702 
test_buffer_fill_struct(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)703 int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
704 {
705     TestStruct pattern;
706     clProgramWrapper program;
707     clKernelWrapper kernel;
708     size_t      ptrSize = sizeof( TestStruct );
709     size_t      global_work_size[3];
710     int         n, err;
711     size_t      j, offset_elements, fill_elements;
712     int         src_flag_id;
713     int         total_errors = 0;
714     MTdata      d = init_genrand( gRandomSeed );
715 
716     size_t      min_alignment = get_min_alignment(context);
717 
718     global_work_size[0] = (size_t)num_elements;
719 
720 
721     for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
722     {
723         log_info("Testing with cl_mem_flags: %s\n",
724                  flag_set_names[src_flag_id]);
725 
726         err = create_single_kernel_helper(context, &program, &kernel, 1,
727                                           &struct_kernel_code,
728                                           "read_fill_struct");
729         if (err)
730         {
731             log_error(" Error creating program for struct\n");
732             free_mtdata(d);
733             return -1;
734         }
735 
736         // Test with random offsets and fill sizes
737         for (n = 0; n < 8; n++)
738         {
739             clEventWrapper event[2];
740             clMemWrapper buffers[2];
741             void *outptr;
742             TestStruct *inptr;
743             TestStruct *hostptr;
744 
745             offset_elements =
746                 (size_t)get_random_float(0.f, (float)(num_elements - 8), d);
747             fill_elements = (size_t)get_random_float(
748                 8.f, (float)(num_elements - offset_elements), d);
749             log_info("Testing random fill from offset %d for %d elements: \n",
750                      (int)offset_elements, (int)fill_elements);
751 
752             pattern.a = (cl_int)genrand_int32(d);
753             pattern.b = (cl_float)get_random_float(-FLT_MAX, FLT_MAX, d);
754 
755             inptr = (TestStruct *)align_malloc(ptrSize * num_elements,
756                                                min_alignment);
757             for (j = 0; j < offset_elements; j++)
758             {
759                 inptr[j].a = 0;
760                 inptr[j].b = 0;
761             }
762             for (j = offset_elements; j < offset_elements + fill_elements; j++)
763             {
764                 inptr[j].a = pattern.a;
765                 inptr[j].b = pattern.b;
766             }
767             for (j = offset_elements + fill_elements; j < (size_t)num_elements;
768                  j++)
769             {
770                 inptr[j].a = 0;
771                 inptr[j].b = 0;
772             }
773 
774             hostptr = (TestStruct *)align_malloc(ptrSize * num_elements,
775                                                  min_alignment);
776             memset(hostptr, 0, ptrSize * num_elements);
777 
778             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
779                 buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSize * num_elements, hostptr, &err);
780             else
781                 buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSize * num_elements, NULL, &err);
782             if ( err ){
783                 print_error(err, " clCreateBuffer failed\n" );
784                 align_free( (void *)inptr );
785                 align_free( (void *)hostptr );
786                 free_mtdata(d);
787                 return -1;
788             }
789             if (!((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))) {
790                 err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, ptrSize * num_elements, hostptr, 0, NULL, NULL);
791                 if ( err != CL_SUCCESS ){
792                     print_error(err, " clEnqueueWriteBuffer failed\n" );
793                     align_free( (void *)inptr );
794                     align_free( (void *)hostptr );
795                     free_mtdata(d);
796                     return -1;
797                 }
798             }
799             outptr = align_malloc( ptrSize * num_elements, min_alignment);
800             memset(outptr, 0, ptrSize * num_elements);
801             buffers[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,  ptrSize * num_elements, outptr, &err);
802             if ( ! buffers[1] || err){
803                 print_error(err, " clCreateBuffer failed\n" );
804                 align_free( outptr );
805                 align_free( (void *)inptr );
806                 align_free( (void *)hostptr );
807                 free_mtdata(d);
808                 return -1;
809             }
810 
811             err = clEnqueueFillBuffer(
812                 queue, buffers[0], &pattern, ptrSize, ptrSize * offset_elements,
813                 ptrSize * fill_elements, 0, NULL, &(event[0]));
814             /* uncomment for test debugging
815              err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, ptrSize * num_elements, inptr, 0, NULL, &(event[0]));
816              */
817             if ( err != CL_SUCCESS ){
818                 print_error( err, " clEnqueueFillBuffer failed" );
819                 align_free( outptr );
820                 align_free( (void *)inptr );
821                 align_free( (void *)hostptr );
822                 free_mtdata(d);
823                 return -1;
824             }
825 
826             err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *)&buffers[0] );
827             err |= clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *)&buffers[1] );
828             if ( err != CL_SUCCESS ){
829                 print_error( err, " clSetKernelArg failed" );
830                 align_free( outptr );
831                 align_free( (void *)inptr );
832                 align_free( (void *)hostptr );
833                 free_mtdata(d);
834                 return -1;
835             }
836 
837             err = clWaitForEvents(  1, &(event[0]) );
838             if ( err != CL_SUCCESS ){
839                 print_error( err, "clWaitForEvents() failed" );
840                 align_free( outptr );
841                 align_free( (void *)inptr );
842                 align_free( (void *)hostptr );
843                 free_mtdata(d);
844                 return -1;
845             }
846 
847             err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL );
848             if ( err != CL_SUCCESS ){
849                 print_error( err, " clEnqueueNDRangeKernel failed" );
850                 align_free( outptr );
851                 align_free( (void *)inptr );
852                 align_free( (void *)hostptr );
853                 free_mtdata(d);
854                 return -1;
855             }
856 
857             err = clEnqueueReadBuffer( queue, buffers[1], CL_FALSE, 0, ptrSize * num_elements, outptr, 0, NULL, &(event[1]) );
858             if ( err != CL_SUCCESS ){
859                 print_error( err, " clEnqueueReadBuffer failed" );
860                 align_free( outptr );
861                 align_free( (void *)inptr );
862                 align_free( (void *)hostptr );
863                 free_mtdata(d);
864                 return -1;
865             }
866 
867             err = clWaitForEvents( 1, &(event[1]) );
868             if ( err != CL_SUCCESS ){
869                 print_error( err, "clWaitForEvents() failed" );
870             }
871 
872             if ( verify_fill_struct( inptr, outptr, num_elements) ) {
873                 log_error( " buffer_FILL async struct test failed\n" );
874                 total_errors++;
875             }
876             else{
877                 log_info( " buffer_FILL async struct test passed\n" );
878             }
879             // cleanup
880             align_free( outptr );
881             align_free((void *)inptr);
882             align_free((void *)hostptr);
883         } // src cl_mem_flag
884     }
885 
886     free_mtdata(d);
887 
888     return total_errors;
889 
890 }   // end test_buffer_fill_struct()
891 
892 
test_buffer_fill_int(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)893 int test_buffer_fill_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
894 {
895     cl_int  *inptr[5];
896     cl_int  *hostptr[5];
897     cl_int  *pattern[5];
898     size_t  ptrSizes[5];
899     int     n, i, err=0;
900     size_t  j, offset_elements, fill_elements;
901     int     (*foo)(void *,void *,int);
902     MTdata  d = init_genrand( gRandomSeed );
903 
904     size_t  min_alignment = get_min_alignment(context);
905 
906     foo = verify_fill_int;
907 
908     ptrSizes[0] = sizeof(cl_int);
909     ptrSizes[1] = ptrSizes[0] << 1;
910     ptrSizes[2] = ptrSizes[1] << 1;
911     ptrSizes[3] = ptrSizes[2] << 1;
912     ptrSizes[4] = ptrSizes[3] << 1;
913 
914     // Test with random offsets and fill sizes
915     for ( n = 0; n < 8; n++ ){
916         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
917         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
918         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
919 
920         for ( i = 0; i < 5; i++ ){
921             pattern[i] = (cl_int *)malloc(ptrSizes[i]);
922             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
923                 pattern[i][j] = TEST_PRIME_INT;
924 
925             inptr[i] = (cl_int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
926             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
927                 inptr[i][j] = 0;
928             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
929                 inptr[i][j] = TEST_PRIME_INT;
930             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
931                 inptr[i][j] = 0;
932 
933             hostptr[i] = (cl_int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
934             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
935         }
936 
937         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_int ), (char*)"int",
938                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
939                              offset_elements, fill_elements,
940                              buffer_fill_int_kernel_code, int_kernel_name, foo ))
941             err++;
942 
943         for ( i = 0; i < 5; i++ ){
944             free( (void *)pattern[i] );
945             align_free( (void *)inptr[i] );
946             align_free( (void *)hostptr[i] );
947         }
948 
949     }
950 
951     free_mtdata(d);
952 
953     return err;
954 
955 }   // end test_buffer_int_fill()
956 
957 
test_buffer_fill_uint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)958 int test_buffer_fill_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
959 {
960     cl_uint *inptr[5];
961     cl_uint *hostptr[5];
962     cl_uint *pattern[5];
963     size_t  ptrSizes[5];
964     int     n, i, err=0;
965     size_t  j, offset_elements, fill_elements;
966     MTdata  d = init_genrand( gRandomSeed );
967     int     (*foo)(void *,void *,int);
968 
969     size_t  min_alignment = get_min_alignment(context);
970 
971     foo = verify_fill_uint;
972 
973     ptrSizes[0] = sizeof(cl_uint);
974     ptrSizes[1] = ptrSizes[0] << 1;
975     ptrSizes[2] = ptrSizes[1] << 1;
976     ptrSizes[3] = ptrSizes[2] << 1;
977     ptrSizes[4] = ptrSizes[3] << 1;
978 
979     // Test with random offsets and fill sizes
980     for ( n = 0; n < 8; n++ ){
981         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
982         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
983         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
984 
985         for ( i = 0; i < 5; i++ ){
986             pattern[i] = (cl_uint *)malloc(ptrSizes[i]);
987             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
988                 pattern[i][j] = TEST_PRIME_UINT;
989 
990             inptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
991             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
992                 inptr[i][j] = 0;
993             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
994                 inptr[i][j] = TEST_PRIME_UINT;
995             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
996                 inptr[i][j] = 0;
997 
998             hostptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
999             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1000         }
1001 
1002         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_uint ), (char*)"uint",
1003                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1004                              offset_elements, fill_elements,
1005                              buffer_fill_uint_kernel_code, uint_kernel_name, foo ))
1006             err++;
1007 
1008         for ( i = 0; i < 5; i++ ){
1009             free( (void *)pattern[i] );
1010             align_free( (void *)inptr[i] );
1011             align_free( (void *)hostptr[i] );
1012         }
1013 
1014     }
1015 
1016     free_mtdata(d);
1017 
1018     return err;
1019 
1020 }   // end test_buffer_uint_fill()
1021 
1022 
test_buffer_fill_short(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1023 int test_buffer_fill_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1024 {
1025     cl_short *inptr[5];
1026     cl_short *hostptr[5];
1027     cl_short *pattern[5];
1028     size_t   ptrSizes[5];
1029     int      n, i, err=0;
1030     size_t   j, offset_elements, fill_elements;
1031     MTdata   d = init_genrand( gRandomSeed );
1032     int      (*foo)(void *,void *,int);
1033 
1034     size_t  min_alignment = get_min_alignment(context);
1035 
1036     foo = verify_fill_short;
1037 
1038     ptrSizes[0] = sizeof(cl_short);
1039     ptrSizes[1] = ptrSizes[0] << 1;
1040     ptrSizes[2] = ptrSizes[1] << 1;
1041     ptrSizes[3] = ptrSizes[2] << 1;
1042     ptrSizes[4] = ptrSizes[3] << 1;
1043 
1044     // Test with random offsets and fill sizes
1045     for ( n = 0; n < 8; n++ ){
1046         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1047         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1048         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1049 
1050         for ( i = 0; i < 5; i++ ){
1051             pattern[i] = (cl_short *)malloc(ptrSizes[i]);
1052             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1053                 pattern[i][j] = TEST_PRIME_SHORT;
1054 
1055             inptr[i] = (cl_short *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1056             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1057                 inptr[i][j] = 0;
1058             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1059                 inptr[i][j] = TEST_PRIME_SHORT;
1060             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1061                 inptr[i][j] = 0;
1062 
1063             hostptr[i] = (cl_short *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1064             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1065         }
1066 
1067         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_short ), (char*)"short",
1068                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1069                              offset_elements, fill_elements,
1070                              buffer_fill_short_kernel_code, short_kernel_name, foo ))
1071             err++;
1072 
1073         for ( i = 0; i < 5; i++ ){
1074             free( (void *)pattern[i] );
1075             align_free( (void *)inptr[i] );
1076             align_free( (void *)hostptr[i] );
1077         }
1078 
1079     }
1080 
1081     free_mtdata(d);
1082 
1083     return err;
1084 
1085 }   // end test_buffer_short_fill()
1086 
1087 
test_buffer_fill_ushort(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1088 int test_buffer_fill_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1089 {
1090     cl_ushort *inptr[5];
1091     cl_ushort *hostptr[5];
1092     cl_ushort *pattern[5];
1093     size_t    ptrSizes[5];
1094     int       n, i, err=0;
1095     size_t    j, offset_elements, fill_elements;
1096     MTdata    d = init_genrand( gRandomSeed );
1097     int       (*foo)(void *,void *,int);
1098 
1099     size_t    min_alignment = get_min_alignment(context);
1100 
1101     foo = verify_fill_ushort;
1102 
1103     ptrSizes[0] = sizeof(cl_ushort);
1104     ptrSizes[1] = ptrSizes[0] << 1;
1105     ptrSizes[2] = ptrSizes[1] << 1;
1106     ptrSizes[3] = ptrSizes[2] << 1;
1107     ptrSizes[4] = ptrSizes[3] << 1;
1108 
1109     // Test with random offsets and fill sizes
1110     for ( n = 0; n < 8; n++ ){
1111         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1112         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1113         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1114 
1115         for ( i = 0; i < 5; i++ ){
1116             pattern[i] = (cl_ushort *)malloc(ptrSizes[i]);
1117             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1118                 pattern[i][j] = TEST_PRIME_USHORT;
1119 
1120             inptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1121             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1122                 inptr[i][j] = 0;
1123             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1124                 inptr[i][j] = TEST_PRIME_USHORT;
1125             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1126                 inptr[i][j] = 0;
1127 
1128             hostptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1129             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1130         }
1131 
1132         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_ushort ), (char*)"ushort",
1133                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1134                              offset_elements, fill_elements,
1135                              buffer_fill_ushort_kernel_code, ushort_kernel_name, foo ))
1136             err++;
1137 
1138         for ( i = 0; i < 5; i++ ){
1139             free( (void *)pattern[i] );
1140             align_free( (void *)inptr[i] );
1141             align_free( (void *)hostptr[i] );
1142         }
1143 
1144     }
1145 
1146     free_mtdata(d);
1147 
1148     return err;
1149 
1150 }   // end test_buffer_ushort_fill()
1151 
1152 
test_buffer_fill_char(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1153 int test_buffer_fill_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1154 {
1155     cl_char *inptr[5];
1156     cl_char *hostptr[5];
1157     cl_char *pattern[5];
1158     size_t  ptrSizes[5];
1159     int     n, i, err=0;
1160     size_t  j, offset_elements, fill_elements;
1161     MTdata  d = init_genrand( gRandomSeed );
1162     int     (*foo)(void *,void *,int);
1163 
1164     size_t  min_alignment = get_min_alignment(context);
1165 
1166     foo = verify_fill_char;
1167 
1168     ptrSizes[0] = sizeof(cl_char);
1169     ptrSizes[1] = ptrSizes[0] << 1;
1170     ptrSizes[2] = ptrSizes[1] << 1;
1171     ptrSizes[3] = ptrSizes[2] << 1;
1172     ptrSizes[4] = ptrSizes[3] << 1;
1173 
1174     // Test with random offsets and fill sizes
1175     for ( n = 0; n < 8; n++ ){
1176         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1177         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1178         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1179 
1180         for ( i = 0; i < 5; i++ ){
1181             pattern[i] = (cl_char *)malloc(ptrSizes[i]);
1182             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1183                 pattern[i][j] = TEST_PRIME_CHAR;
1184 
1185             inptr[i] = (cl_char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1186             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1187                 inptr[i][j] = 0;
1188             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1189                 inptr[i][j] = TEST_PRIME_CHAR;
1190             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1191                 inptr[i][j] = 0;
1192 
1193             hostptr[i] = (cl_char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1194             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1195         }
1196 
1197         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_char ), (char*)"char",
1198                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1199                              offset_elements, fill_elements,
1200                              buffer_fill_char_kernel_code, char_kernel_name, foo ))
1201             err++;
1202 
1203         for ( i = 0; i < 5; i++ ){
1204             free( (void *)pattern[i] );
1205             align_free( (void *)inptr[i] );
1206             align_free( (void *)hostptr[i] );
1207         }
1208 
1209     }
1210 
1211     free_mtdata(d);
1212 
1213     return err;
1214 
1215 }   // end test_buffer_char_fill()
1216 
1217 
test_buffer_fill_uchar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1218 int test_buffer_fill_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1219 {
1220     cl_uchar *inptr[5];
1221     cl_uchar *hostptr[5];
1222     cl_uchar *pattern[5];
1223     size_t   ptrSizes[5];
1224     int      n, i, err=0;
1225     size_t   j, offset_elements, fill_elements;
1226     MTdata   d = init_genrand( gRandomSeed );
1227     int      (*foo)(void *,void *,int);
1228 
1229     size_t  min_alignment = get_min_alignment(context);
1230 
1231     foo = verify_fill_uchar;
1232 
1233     ptrSizes[0] = sizeof(cl_uchar);
1234     ptrSizes[1] = ptrSizes[0] << 1;
1235     ptrSizes[2] = ptrSizes[1] << 1;
1236     ptrSizes[3] = ptrSizes[2] << 1;
1237     ptrSizes[4] = ptrSizes[3] << 1;
1238 
1239     // Test with random offsets and fill sizes
1240     for ( n = 0; n < 8; n++ ){
1241         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1242         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1243         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1244 
1245         for ( i = 0; i < 5; i++ ){
1246             pattern[i] = (cl_uchar *)malloc(ptrSizes[i]);
1247             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1248                 pattern[i][j] = TEST_PRIME_CHAR;
1249 
1250             inptr[i] = (cl_uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1251             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1252                 inptr[i][j] = 0;
1253             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1254                 inptr[i][j] = TEST_PRIME_CHAR;
1255             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1256                 inptr[i][j] = 0;
1257 
1258             hostptr[i] = (cl_uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1259             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1260         }
1261 
1262         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_uchar ), (char*)"uchar",
1263                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1264                              offset_elements, fill_elements,
1265                              buffer_fill_uchar_kernel_code, uchar_kernel_name, foo ))
1266             err++;
1267 
1268         for ( i = 0; i < 5; i++ ){
1269             free( (void *)pattern[i] );
1270             align_free( (void *)inptr[i] );
1271             align_free( (void *)hostptr[i] );
1272         }
1273 
1274     }
1275 
1276     free_mtdata(d);
1277 
1278     return err;
1279 
1280 }   // end test_buffer_uchar_fill()
1281 
1282 
test_buffer_fill_long(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1283 int test_buffer_fill_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1284 {
1285     cl_long *inptr[5];
1286     cl_long *hostptr[5];
1287     cl_long *pattern[5];
1288     size_t  ptrSizes[5];
1289     int     n, i, err=0;
1290     size_t  j, offset_elements, fill_elements;
1291     MTdata  d = init_genrand( gRandomSeed );
1292     int     (*foo)(void *,void *,int);
1293 
1294     size_t  min_alignment = get_min_alignment(context);
1295 
1296     foo = verify_fill_long;
1297 
1298     ptrSizes[0] = sizeof(cl_long);
1299     ptrSizes[1] = ptrSizes[0] << 1;
1300     ptrSizes[2] = ptrSizes[1] << 1;
1301     ptrSizes[3] = ptrSizes[2] << 1;
1302     ptrSizes[4] = ptrSizes[3] << 1;
1303 
1304     //skip devices that don't support long
1305     if (! gHasLong )
1306     {
1307         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
1308         return CL_SUCCESS;
1309     }
1310 
1311     // Test with random offsets and fill sizes
1312     for ( n = 0; n < 8; n++ ){
1313         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1314         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1315         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1316 
1317         for ( i = 0; i < 5; i++ ){
1318             pattern[i] = (cl_long *)malloc(ptrSizes[i]);
1319             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1320                 pattern[i][j] = TEST_PRIME_LONG;
1321 
1322             inptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1323             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1324                 inptr[i][j] = 0;
1325             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1326                 inptr[i][j] = TEST_PRIME_LONG;
1327             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1328                 inptr[i][j] = 0;
1329 
1330             hostptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1331             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1332         }
1333 
1334         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_long ), (char*)"long",
1335                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1336                              offset_elements, fill_elements,
1337                              buffer_fill_long_kernel_code, long_kernel_name, foo ))
1338             err++;
1339 
1340         for ( i = 0; i < 5; i++ ){
1341             free( (void *)pattern[i] );
1342             align_free( (void *)inptr[i] );
1343             align_free( (void *)hostptr[i] );
1344         }
1345 
1346     }
1347 
1348     free_mtdata(d);
1349 
1350     return err;
1351 
1352 }   // end test_buffer_long_fill()
1353 
1354 
test_buffer_fill_ulong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1355 int test_buffer_fill_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1356 {
1357     cl_ulong *inptr[5];
1358     cl_ulong *hostptr[5];
1359     cl_ulong *pattern[5];
1360     size_t   ptrSizes[5];
1361     int      n, i, err=0;
1362     size_t   j, offset_elements, fill_elements;
1363     MTdata   d = init_genrand( gRandomSeed );
1364     int      (*foo)(void *,void *,int);
1365 
1366     size_t   min_alignment = get_min_alignment(context);
1367 
1368     foo = verify_fill_ulong;
1369 
1370     ptrSizes[0] = sizeof(cl_ulong);
1371     ptrSizes[1] = ptrSizes[0] << 1;
1372     ptrSizes[2] = ptrSizes[1] << 1;
1373     ptrSizes[3] = ptrSizes[2] << 1;
1374     ptrSizes[4] = ptrSizes[3] << 1;
1375 
1376     if (! gHasLong )
1377     {
1378         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
1379         return CL_SUCCESS;
1380     }
1381 
1382     // Test with random offsets and fill sizes
1383     for ( n = 0; n < 8; n++ ){
1384         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1385         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1386         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1387 
1388         for ( i = 0; i < 5; i++ ){
1389             pattern[i] = (cl_ulong *)malloc(ptrSizes[i]);
1390             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1391                 pattern[i][j] = TEST_PRIME_ULONG;
1392 
1393             inptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1394             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1395                 inptr[i][j] = 0;
1396             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1397                 inptr[i][j] = TEST_PRIME_ULONG;
1398             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1399                 inptr[i][j] = 0;
1400 
1401             hostptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1402             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1403         }
1404 
1405         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_ulong ), (char*)"ulong",
1406                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1407                              offset_elements, fill_elements,
1408                              buffer_fill_ulong_kernel_code, ulong_kernel_name, foo ))
1409             err++;
1410 
1411         for ( i = 0; i < 5; i++ ){
1412             free( (void *)pattern[i] );
1413             align_free( (void *)inptr[i] );
1414             align_free( (void *)hostptr[i] );
1415         }
1416 
1417     }
1418 
1419     free_mtdata(d);
1420 
1421     return err;
1422 
1423 }   // end test_buffer_ulong_fill()
1424 
1425 
test_buffer_fill_float(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1426 int test_buffer_fill_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1427 {
1428     cl_float *inptr[5];
1429     cl_float *hostptr[5];
1430     cl_float *pattern[5];
1431     size_t   ptrSizes[5];
1432     int      n, i, err=0;
1433     size_t   j, offset_elements, fill_elements;
1434     MTdata   d = init_genrand( gRandomSeed );
1435     int      (*foo)(void *,void *,int);
1436 
1437     size_t  min_alignment = get_min_alignment(context);
1438 
1439     foo = verify_fill_float;
1440 
1441     ptrSizes[0] = sizeof(cl_float);
1442     ptrSizes[1] = ptrSizes[0] << 1;
1443     ptrSizes[2] = ptrSizes[1] << 1;
1444     ptrSizes[3] = ptrSizes[2] << 1;
1445     ptrSizes[4] = ptrSizes[3] << 1;
1446 
1447     // Test with random offsets and fill sizes
1448     for ( n = 0; n < 8; n++ ){
1449         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1450         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1451         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1452 
1453         for ( i = 0; i < 5; i++ ){
1454             pattern[i] = (cl_float *)malloc(ptrSizes[i]);
1455             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1456                 pattern[i][j] = TEST_PRIME_FLOAT;
1457 
1458             inptr[i] = (cl_float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1459             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1460                 inptr[i][j] = 0;
1461             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1462                 inptr[i][j] = TEST_PRIME_FLOAT;
1463             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1464                 inptr[i][j] = 0;
1465 
1466             hostptr[i] = (cl_float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1467             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1468         }
1469 
1470         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_float ), (char*)"float",
1471                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1472                              offset_elements, fill_elements,
1473                              buffer_fill_float_kernel_code, float_kernel_name, foo ))
1474             err++;
1475 
1476         for ( i = 0; i < 5; i++ ){
1477             free( (void *)pattern[i] );
1478             align_free( (void *)inptr[i] );
1479             align_free( (void *)hostptr[i] );
1480         }
1481 
1482     }
1483 
1484     free_mtdata(d);
1485 
1486     return err;
1487 
1488 }   // end test_buffer_float_fill()
1489