• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include "procs.h"
25 #include "harness/errorHelpers.h"
26 
27 #define USE_LOCAL_WORK_GROUP    1
28 
29 #define TEST_PRIME_CHAR        0x77
30 #define TEST_PRIME_INT        ((1<<16)+1)
31 #define TEST_PRIME_UINT        ((1U<<16)+1U)
32 #define TEST_PRIME_LONG        ((1LL<<32)+1LL)
33 #define TEST_PRIME_ULONG    ((1ULL<<32)+1ULL)
34 #define TEST_PRIME_SHORT    (cl_short)((1<<8)+1)
35 #define TEST_PRIME_USHORT   (cl_ushort)((1<<8)+1)
36 #define TEST_PRIME_FLOAT    (cl_float)3.40282346638528860e+38
37 #define TEST_PRIME_HALF        119.f
38 
39 #ifndef TestStruct
40 typedef struct{
41     cl_int     a;
42     cl_float   b;
43 } TestStruct;
44 #endif
45 
46 const char *buffer_fill_int_kernel_code[] = {
47     "__kernel void test_buffer_fill_int(__global int *src, __global int *dst)\n"
48     "{\n"
49     "    int  tid = get_global_id(0);\n"
50     "\n"
51     "    dst[tid] = src[tid];\n"
52     "}\n",
53 
54     "__kernel void test_buffer_fill_int2(__global int2 *src, __global int2 *dst)\n"
55     "{\n"
56     "    int  tid = get_global_id(0);\n"
57     "\n"
58     "    dst[tid] = src[tid];\n"
59     "}\n",
60 
61     "__kernel void test_buffer_fill_int4(__global int4 *src, __global int4 *dst)\n"
62     "{\n"
63     "    int  tid = get_global_id(0);\n"
64     "\n"
65     "    dst[tid] = src[tid];\n"
66     "}\n",
67 
68     "__kernel void test_buffer_fill_int8(__global int8 *src, __global int8 *dst)\n"
69     "{\n"
70     "    int  tid = get_global_id(0);\n"
71     "\n"
72     "    dst[tid] = src[tid];\n"
73     "}\n",
74 
75     "__kernel void test_buffer_fill_int16(__global int16 *src, __global int16 *dst)\n"
76     "{\n"
77     "    int  tid = get_global_id(0);\n"
78     "\n"
79     "    dst[tid] = src[tid];\n"
80     "}\n" };
81 
82 static const char *int_kernel_name[] = { "test_buffer_fill_int", "test_buffer_fill_int2", "test_buffer_fill_int4", "test_buffer_fill_int8", "test_buffer_fill_int16" };
83 
84 
85 const char *buffer_fill_uint_kernel_code[] = {
86     "__kernel void test_buffer_fill_uint(__global uint *src, __global uint *dst)\n"
87     "{\n"
88     "    int  tid = get_global_id(0);\n"
89     "\n"
90     "    dst[tid] = src[tid];\n"
91     "}\n",
92 
93     "__kernel void test_buffer_fill_uint2(__global uint2 *src, __global uint2 *dst)\n"
94     "{\n"
95     "    int  tid = get_global_id(0);\n"
96     "\n"
97     "    dst[tid] = src[tid];\n"
98     "}\n",
99 
100     "__kernel void test_buffer_fill_uint4(__global uint4 *src, __global uint4 *dst)\n"
101     "{\n"
102     "    int  tid = get_global_id(0);\n"
103     "\n"
104     "    dst[tid] = src[tid];\n"
105     "}\n",
106 
107     "__kernel void test_buffer_fill_uint8(__global uint8 *src, __global uint8 *dst)\n"
108     "{\n"
109     "    int  tid = get_global_id(0);\n"
110     "\n"
111     "    dst[tid] = src[tid];\n"
112     "}\n",
113 
114     "__kernel void test_buffer_fill_uint16(__global uint16 *src, __global uint16 *dst)\n"
115     "{\n"
116     "    int  tid = get_global_id(0);\n"
117     "\n"
118     "    dst[tid] = src[tid];\n"
119     "}\n" };
120 
121 static const char *uint_kernel_name[] = { "test_buffer_fill_uint", "test_buffer_fill_uint2", "test_buffer_fill_uint4", "test_buffer_fill_uint8", "test_buffer_fill_uint16" };
122 
123 
124 const char *buffer_fill_short_kernel_code[] = {
125     "__kernel void test_buffer_fill_short(__global short *src, __global short *dst)\n"
126     "{\n"
127     "    int  tid = get_global_id(0);\n"
128     "\n"
129     "    dst[tid] = src[tid];\n"
130     "}\n",
131 
132     "__kernel void test_buffer_fill_short2(__global short2 *src, __global short2 *dst)\n"
133     "{\n"
134     "    int  tid = get_global_id(0);\n"
135     "\n"
136     "    dst[tid] = src[tid];\n"
137     "}\n",
138 
139     "__kernel void test_buffer_fill_short4(__global short4 *src, __global short4 *dst)\n"
140     "{\n"
141     "    int  tid = get_global_id(0);\n"
142     "\n"
143     "    dst[tid] = src[tid];\n"
144     "}\n",
145 
146     "__kernel void test_buffer_fill_short8(__global short8 *src, __global short8 *dst)\n"
147     "{\n"
148     "    int  tid = get_global_id(0);\n"
149     "\n"
150     "    dst[tid] = src[tid];\n"
151     "}\n",
152 
153     "__kernel void test_buffer_fill_short16(__global short16 *src, __global short16 *dst)\n"
154     "{\n"
155     "    int  tid = get_global_id(0);\n"
156     "\n"
157     "    dst[tid] = src[tid];\n"
158     "}\n" };
159 
160 static const char *short_kernel_name[] = { "test_buffer_fill_short", "test_buffer_fill_short2", "test_buffer_fill_short4", "test_buffer_fill_short8", "test_buffer_fill_short16" };
161 
162 
163 const char *buffer_fill_ushort_kernel_code[] = {
164     "__kernel void test_buffer_fill_ushort(__global ushort *src, __global ushort *dst)\n"
165     "{\n"
166     "    int  tid = get_global_id(0);\n"
167     "\n"
168     "    dst[tid] = src[tid];\n"
169     "}\n",
170 
171     "__kernel void test_buffer_fill_ushort2(__global ushort2 *src, __global ushort2 *dst)\n"
172     "{\n"
173     "    int  tid = get_global_id(0);\n"
174     "\n"
175     "    dst[tid] = src[tid];\n"
176     "}\n",
177 
178     "__kernel void test_buffer_fill_ushort4(__global ushort4 *src, __global ushort4 *dst)\n"
179     "{\n"
180     "    int  tid = get_global_id(0);\n"
181     "\n"
182     "    dst[tid] = src[tid];\n"
183     "}\n",
184 
185     "__kernel void test_buffer_fill_ushort8(__global ushort8 *src, __global ushort8 *dst)\n"
186     "{\n"
187     "    int  tid = get_global_id(0);\n"
188     "\n"
189     "    dst[tid] = src[tid];\n"
190     "}\n",
191 
192     "__kernel void test_buffer_fill_ushort16(__global ushort16 *src, __global ushort16 *dst)\n"
193     "{\n"
194     "    int  tid = get_global_id(0);\n"
195     "\n"
196     "    dst[tid] = src[tid];\n"
197     "}\n" };
198 
199 static const char *ushort_kernel_name[] = { "test_buffer_fill_ushort", "test_buffer_fill_ushort2", "test_buffer_fill_ushort4", "test_buffer_fill_ushort8", "test_buffer_fill_ushort16" };
200 
201 
202 const char *buffer_fill_char_kernel_code[] = {
203     "__kernel void test_buffer_fill_char(__global char *src, __global char *dst)\n"
204     "{\n"
205     "    int  tid = get_global_id(0);\n"
206     "\n"
207     "    dst[tid] = src[tid];\n"
208     "}\n",
209 
210     "__kernel void test_buffer_fill_char2(__global char2 *src, __global char2 *dst)\n"
211     "{\n"
212     "    int  tid = get_global_id(0);\n"
213     "\n"
214     "    dst[tid] = src[tid];\n"
215     "}\n",
216 
217     "__kernel void test_buffer_fill_char4(__global char4 *src, __global char4 *dst)\n"
218     "{\n"
219     "    int  tid = get_global_id(0);\n"
220     "\n"
221     "    dst[tid] = src[tid];\n"
222     "}\n",
223 
224     "__kernel void test_buffer_fill_char8(__global char8 *src, __global char8 *dst)\n"
225     "{\n"
226     "    int  tid = get_global_id(0);\n"
227     "\n"
228     "    dst[tid] = src[tid];\n"
229     "}\n",
230 
231     "__kernel void test_buffer_fill_char16(__global char16 *src, __global char16 *dst)\n"
232     "{\n"
233     "    int  tid = get_global_id(0);\n"
234     "\n"
235     "    dst[tid] = src[tid];\n"
236     "}\n" };
237 
238 static const char *char_kernel_name[] = { "test_buffer_fill_char", "test_buffer_fill_char2", "test_buffer_fill_char4", "test_buffer_fill_char8", "test_buffer_fill_char16" };
239 
240 
241 const char *buffer_fill_uchar_kernel_code[] = {
242     "__kernel void test_buffer_fill_uchar(__global uchar *src, __global uchar *dst)\n"
243     "{\n"
244     "    int  tid = get_global_id(0);\n"
245     "\n"
246     "    dst[tid] = src[tid];\n"
247     "}\n",
248 
249     "__kernel void test_buffer_fill_uchar2(__global uchar2 *src, __global uchar2 *dst)\n"
250     "{\n"
251     "    int  tid = get_global_id(0);\n"
252     "\n"
253     "    dst[tid] = src[tid];\n"
254     "}\n",
255 
256     "__kernel void test_buffer_fill_uchar4(__global uchar4 *src, __global uchar4 *dst)\n"
257     "{\n"
258     "    int  tid = get_global_id(0);\n"
259     "\n"
260     "    dst[tid] = src[tid];\n"
261     "}\n",
262 
263     "__kernel void test_buffer_fill_uchar8(__global uchar8 *src, __global uchar8 *dst)\n"
264     "{\n"
265     "    int  tid = get_global_id(0);\n"
266     "\n"
267     "    dst[tid] = src[tid];\n"
268     "}\n",
269 
270     "__kernel void test_buffer_fill_uchar16(__global uchar16 *src, __global uchar16 *dst)\n"
271     "{\n"
272     "    int  tid = get_global_id(0);\n"
273     "\n"
274     "    dst[tid] = src[tid];\n"
275     "}\n" };
276 
277 static const char *uchar_kernel_name[] = { "test_buffer_fill_uchar", "test_buffer_fill_uchar2", "test_buffer_fill_uchar4", "test_buffer_fill_uchar8", "test_buffer_fill_uchar16" };
278 
279 
280 const char *buffer_fill_long_kernel_code[] = {
281     "__kernel void test_buffer_fill_long(__global long *src, __global long *dst)\n"
282     "{\n"
283     "    int  tid = get_global_id(0);\n"
284     "\n"
285     "    dst[tid] = src[tid];\n"
286     "}\n",
287 
288     "__kernel void test_buffer_fill_long2(__global long2 *src, __global long2 *dst)\n"
289     "{\n"
290     "    int  tid = get_global_id(0);\n"
291     "\n"
292     "    dst[tid] = src[tid];\n"
293     "}\n",
294 
295     "__kernel void test_buffer_fill_long4(__global long4 *src, __global long4 *dst)\n"
296     "{\n"
297     "    int  tid = get_global_id(0);\n"
298     "\n"
299     "    dst[tid] = src[tid];\n"
300     "}\n",
301 
302     "__kernel void test_buffer_fill_long8(__global long8 *src, __global long8 *dst)\n"
303     "{\n"
304     "    int  tid = get_global_id(0);\n"
305     "\n"
306     "    dst[tid] = src[tid];\n"
307     "}\n",
308 
309     "__kernel void test_buffer_fill_long16(__global long16 *src, __global long16 *dst)\n"
310     "{\n"
311     "    int  tid = get_global_id(0);\n"
312     "\n"
313     "    dst[tid] = src[tid];\n"
314     "}\n" };
315 
316 static const char *long_kernel_name[] = { "test_buffer_fill_long", "test_buffer_fill_long2", "test_buffer_fill_long4", "test_buffer_fill_long8", "test_buffer_fill_long16" };
317 
318 
319 const char *buffer_fill_ulong_kernel_code[] = {
320     "__kernel void test_buffer_fill_ulong(__global ulong *src, __global ulong *dst)\n"
321     "{\n"
322     "    int  tid = get_global_id(0);\n"
323     "\n"
324     "    dst[tid] = src[tid];\n"
325     "}\n",
326 
327     "__kernel void test_buffer_fill_ulong2(__global ulong2 *src, __global ulong2 *dst)\n"
328     "{\n"
329     "    int  tid = get_global_id(0);\n"
330     "\n"
331     "    dst[tid] = src[tid];\n"
332     "}\n",
333 
334     "__kernel void test_buffer_fill_ulong4(__global ulong4 *src, __global ulong4 *dst)\n"
335     "{\n"
336     "    int  tid = get_global_id(0);\n"
337     "\n"
338     "    dst[tid] = src[tid];\n"
339     "}\n",
340 
341     "__kernel void test_buffer_fill_ulong8(__global ulong8 *src, __global ulong8 *dst)\n"
342     "{\n"
343     "    int  tid = get_global_id(0);\n"
344     "\n"
345     "    dst[tid] = src[tid];\n"
346     "}\n",
347 
348     "__kernel void test_buffer_fill_ulong16(__global ulong16 *src, __global ulong16 *dst)\n"
349     "{\n"
350     "    int  tid = get_global_id(0);\n"
351     "\n"
352     "    dst[tid] = src[tid];\n"
353     "}\n" };
354 
355 static const char *ulong_kernel_name[] = { "test_buffer_fill_ulong", "test_buffer_fill_ulong2", "test_buffer_fill_ulong4", "test_buffer_fill_ulong8", "test_buffer_fill_ulong16" };
356 
357 
358 const char *buffer_fill_float_kernel_code[] = {
359     "__kernel void test_buffer_fill_float(__global float *src, __global float *dst)\n"
360     "{\n"
361     "    int  tid = get_global_id(0);\n"
362     "\n"
363     "    dst[tid] = src[tid];\n"
364     "}\n",
365 
366     "__kernel void test_buffer_fill_float2(__global float2 *src, __global float2 *dst)\n"
367     "{\n"
368     "    int  tid = get_global_id(0);\n"
369     "\n"
370     "    dst[tid] = src[tid];\n"
371     "}\n",
372 
373     "__kernel void test_buffer_fill_float4(__global float4 *src, __global float4 *dst)\n"
374     "{\n"
375     "    int  tid = get_global_id(0);\n"
376     "\n"
377     "    dst[tid] = src[tid];\n"
378     "}\n",
379 
380     "__kernel void test_buffer_fill_float8(__global float8 *src, __global float8 *dst)\n"
381     "{\n"
382     "    int  tid = get_global_id(0);\n"
383     "\n"
384     "    dst[tid] = src[tid];\n"
385     "}\n",
386 
387     "__kernel void test_buffer_fill_float16(__global float16 *src, __global float16 *dst)\n"
388     "{\n"
389     "    int  tid = get_global_id(0);\n"
390     "\n"
391     "    dst[tid] = src[tid];\n"
392     "}\n" };
393 
394 static const char *float_kernel_name[] = { "test_buffer_fill_float", "test_buffer_fill_float2", "test_buffer_fill_float4", "test_buffer_fill_float8", "test_buffer_fill_float16" };
395 
396 
397 static const char *struct_kernel_code =
398 "typedef struct{\n"
399 "int    a;\n"
400 "float    b;\n"
401 "} TestStruct;\n"
402 "__kernel void read_fill_struct(__global TestStruct *src, __global TestStruct *dst)\n"
403 "{\n"
404 "    int  tid = get_global_id(0);\n"
405 "\n"
406 "    dst[tid].a = src[tid].a;\n"
407 "     dst[tid].b = src[tid].b;\n"
408 "}\n";
409 
410 
411 
verify_fill_int(void * ptr1,void * ptr2,int n)412 static int verify_fill_int( void *ptr1, void *ptr2, int n )
413 {
414     int     i;
415     cl_int  *inptr = (cl_int *)ptr1;
416     cl_int  *outptr = (cl_int *)ptr2;
417 
418     for (i=0; i<n; i++){
419         if ( outptr[i] != inptr[i] )
420             return -1;
421     }
422 
423     return 0;
424 }
425 
426 
verify_fill_uint(void * ptr1,void * ptr2,int n)427 static int verify_fill_uint( void *ptr1, void *ptr2, int n )
428 {
429     int     i;
430     cl_uint *inptr = (cl_uint *)ptr1;
431     cl_uint *outptr = (cl_uint *)ptr2;
432 
433     for (i=0; i<n; i++){
434         if ( outptr[i] != inptr[i] )
435             return -1;
436     }
437 
438     return 0;
439 }
440 
441 
verify_fill_short(void * ptr1,void * ptr2,int n)442 static int verify_fill_short( void *ptr1, void *ptr2, int n )
443 {
444     int      i;
445     cl_short *inptr = (cl_short *)ptr1;
446     cl_short *outptr = (cl_short *)ptr2;
447 
448     for (i=0; i<n; i++){
449         if ( outptr[i] != inptr[i] )
450             return -1;
451     }
452 
453     return 0;
454 }
455 
456 
verify_fill_ushort(void * ptr1,void * ptr2,int n)457 static int verify_fill_ushort( void *ptr1, void *ptr2, int n )
458 {
459     int       i;
460     cl_ushort *inptr = (cl_ushort *)ptr1;
461     cl_ushort *outptr = (cl_ushort *)ptr2;
462 
463     for (i=0; i<n; i++){
464         if ( outptr[i] != inptr[i] )
465             return -1;
466     }
467 
468     return 0;
469 }
470 
471 
verify_fill_char(void * ptr1,void * ptr2,int n)472 static int verify_fill_char( void *ptr1, void *ptr2, int n )
473 {
474     int     i;
475     cl_char *inptr = (cl_char *)ptr1;
476     cl_char *outptr = (cl_char *)ptr2;
477 
478     for (i=0; i<n; i++){
479         if ( outptr[i] != inptr[i] )
480             return -1;
481     }
482 
483     return 0;
484 }
485 
486 
verify_fill_uchar(void * ptr1,void * ptr2,int n)487 static int verify_fill_uchar( void *ptr1, void *ptr2, int n )
488 {
489     int      i;
490     cl_uchar *inptr = (cl_uchar *)ptr1;
491     cl_uchar *outptr = (cl_uchar *)ptr2;
492 
493     for (i=0; i<n; i++){
494         if ( outptr[i] != inptr[i] )
495             return -1;
496     }
497 
498     return 0;
499 }
500 
501 
verify_fill_long(void * ptr1,void * ptr2,int n)502 static int verify_fill_long( void *ptr1, void *ptr2, int n )
503 {
504     int     i;
505     cl_long *inptr = (cl_long *)ptr1;
506     cl_long *outptr = (cl_long *)ptr2;
507 
508     for (i=0; i<n; i++){
509         if ( outptr[i] != inptr[i] )
510             return -1;
511     }
512 
513     return 0;
514 }
515 
516 
verify_fill_ulong(void * ptr1,void * ptr2,int n)517 static int verify_fill_ulong( void *ptr1, void *ptr2, int n )
518 {
519     int      i;
520     cl_ulong *inptr = (cl_ulong *)ptr1;
521     cl_ulong *outptr = (cl_ulong *)ptr2;
522 
523     for (i=0; i<n; i++){
524         if ( outptr[i] != inptr[i] )
525             return -1;
526     }
527 
528     return 0;
529 }
530 
531 
verify_fill_float(void * ptr1,void * ptr2,int n)532 static int verify_fill_float( void *ptr1, void *ptr2, int n )
533 {
534     int      i;
535     cl_float *inptr = (cl_float *)ptr1;
536     cl_float *outptr = (cl_float *)ptr2;
537 
538     for (i=0; i<n; i++){
539         if ( outptr[i] != inptr[i] )
540             return -1;
541     }
542 
543     return 0;
544 }
545 
546 
verify_fill_struct(void * ptr1,void * ptr2,int n)547 static int verify_fill_struct( void *ptr1, void *ptr2, int n )
548 {
549     int         i;
550     TestStruct  *inptr = (TestStruct *)ptr1;
551     TestStruct  *outptr = (TestStruct *)ptr2;
552 
553     for (i=0; i<n; i++){
554         if ( ( outptr[i].a != inptr[i].a ) || ( outptr[i].b != outptr[i].b ) )
555             return -1;
556     }
557 
558     return 0;
559 }
560 
561 
562 
test_buffer_fill(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,void * inptr[5],void * hostptr[5],void * pattern[5],size_t offset_elements,size_t fill_elements,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,void *,int))563 int test_buffer_fill( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type,
564                      int loops, void *inptr[5], void *hostptr[5], void *pattern[5], size_t offset_elements, size_t fill_elements,
565                      const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int) )
566 {
567     cl_mem      buffers[10];
568     void        *outptr[5];
569     cl_program  program[5];
570     cl_kernel   kernel[5];
571     cl_event    event[2];
572     size_t      ptrSizes[5];
573     size_t      global_work_size[3];
574 #ifdef USE_LOCAL_WORK_GROUP
575     size_t      local_work_size[3];
576 #endif
577     int         err;
578     int         i, ii;
579     int         src_flag_id;
580     int         total_errors = 0;
581 
582     size_t      min_alignment = get_min_alignment(context);
583 
584     global_work_size[0] = (size_t)num_elements;
585 
586     ptrSizes[0] = size;
587     ptrSizes[1] = ptrSizes[0] << 1;
588     ptrSizes[2] = ptrSizes[1] << 1;
589     ptrSizes[3] = ptrSizes[2] << 1;
590     ptrSizes[4] = ptrSizes[3] << 1;
591 
592     for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
593         log_info("Testing with cl_mem_flags: %s\n", flag_set_names[src_flag_id]);
594 
595         loops = ( loops < 5 ? loops : 5 );
596         for ( i = 0; i < loops; i++ ){
597             ii = i << 1;
598             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
599                 buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, hostptr[i], &err);
600             else
601                 buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSizes[i] * num_elements, NULL, &err);
602             if ( !buffers[ii] || err){
603                 print_error(err, "clCreateBuffer failed\n" );
604                 return -1;
605             }
606             // Initialize source buffer with 0, since the validation code expects 0(s) outside of the fill region.
607             if (!((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))) {
608                 err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, hostptr[i], 0, NULL, NULL);
609                 if ( err != CL_SUCCESS ){
610                     print_error(err, "clEnqueueWriteBuffer failed\n" );
611                     return -1;
612                 }
613             }
614 
615             outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
616             memset(outptr[i], 0, ptrSizes[i] * num_elements);
617             buffers[ii+1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,  ptrSizes[i] * num_elements, outptr[i], &err);
618             if ( !buffers[ii+1] || err){
619                 print_error(err, "clCreateBuffer failed\n" );
620                 clReleaseMemObject( buffers[ii] );
621                 align_free( outptr[i] );
622                 return -1;
623             }
624 
625             err = clEnqueueFillBuffer(queue, buffers[ii], pattern[i], ptrSizes[i],
626                                       ptrSizes[i] * offset_elements, ptrSizes[i] * fill_elements,
627                                       0, NULL, &(event[0]));
628             /* uncomment for test debugging
629              err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &(event[0]));
630              */
631             if ( err != CL_SUCCESS ){
632                 print_error( err, " clEnqueueFillBuffer failed" );
633                 clReleaseMemObject( buffers[ii] );
634                 clReleaseMemObject( buffers[ii+1] );
635                 align_free( outptr[i] );
636                 return -1;
637             }
638 
639             err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
640             if ( err ){
641                 log_error( " Error creating program for %s\n", type );
642                 clReleaseMemObject( buffers[ii] );
643                 clReleaseMemObject( buffers[ii+1] );
644                 align_free( outptr[i] );
645                 return -1;
646             }
647 
648 #ifdef USE_LOCAL_WORK_GROUP
649             err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
650             test_error( err, "Unable to get work group size to use" );
651 #endif
652 
653             err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
654             err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
655             if ( err != CL_SUCCESS ){
656                 print_error( err, "clSetKernelArg failed" );
657                 clReleaseKernel( kernel[i] );
658                 clReleaseProgram( program[i] );
659                 clReleaseMemObject( buffers[ii] );
660                 clReleaseMemObject( buffers[ii+1] );
661                 align_free( outptr[i] );
662                 return -1;
663             }
664 
665             err = clWaitForEvents(  1, &(event[0]) );
666             if ( err != CL_SUCCESS ){
667                 print_error( err, "clWaitForEvents() failed" );
668                 clReleaseKernel( kernel[i] );
669                 clReleaseProgram( program[i] );
670                 clReleaseMemObject( buffers[ii] );
671                 clReleaseMemObject( buffers[ii+1] );
672                 align_free( outptr[i] );
673                 return -1;
674             }
675             clReleaseEvent(event[0]);
676 
677 #ifdef USE_LOCAL_WORK_GROUP
678             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
679 #else
680             err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
681 #endif
682             if (err != CL_SUCCESS){
683                 print_error( err, "clEnqueueNDRangeKernel failed" );
684                 return -1;
685             }
686 
687             err = clEnqueueReadBuffer( queue, buffers[ii+1], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &(event[1]) );
688             if (err != CL_SUCCESS){
689                 print_error( err, "clEnqueueReadBuffer failed" );
690                 return -1;
691             }
692 
693             err = clWaitForEvents( 1, &(event[1]) );
694             if ( err != CL_SUCCESS ){
695                 print_error( err, "clWaitForEvents() failed" );
696             }
697             clReleaseEvent(event[1]);
698 
699             if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
700                 log_error( " %s%d test failed\n", type, 1<<i );
701                 total_errors++;
702             }
703             else{
704                 log_info( " %s%d test passed\n", type, 1<<i );
705             }
706 
707             // cleanup
708             clReleaseMemObject( buffers[ii] );
709             clReleaseMemObject( buffers[ii+1] );
710             clReleaseKernel( kernel[i] );
711             clReleaseProgram( program[i] );
712             align_free( outptr[i] );
713         }
714     } // src cl_mem_flag
715 
716     return total_errors;
717 
718 }   // end test_buffer_fill()
719 
720 
test_buffer_fill_struct(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)721 int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
722 {
723     cl_mem      buffers[2];
724     void        *outptr;
725     TestStruct  *inptr;
726     TestStruct  *hostptr;
727     TestStruct  *pattern;
728     cl_program  program;
729     cl_kernel   kernel;
730     cl_event    event[2];
731     size_t      ptrSize = sizeof( TestStruct );
732     size_t      global_work_size[3];
733 #ifdef USE_LOCAL_WORK_GROUP
734     size_t      local_work_size[3];
735 #endif
736     int         n, err;
737     size_t      j, offset_elements, fill_elements;
738     int         src_flag_id;
739     int         total_errors = 0;
740     MTdata      d = init_genrand( gRandomSeed );
741 
742     size_t      min_alignment = get_min_alignment(context);
743 
744     global_work_size[0] = (size_t)num_elements;
745 
746     // Test with random offsets and fill sizes
747     for ( n = 0; n < 8; n++ ){
748         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
749         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
750         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
751 
752         pattern = (TestStruct *)malloc(ptrSize);
753         pattern->a = (cl_int)genrand_int32(d);
754         pattern->b = (cl_float)get_random_float( -FLT_MAX, FLT_MAX, d );
755 
756         inptr = (TestStruct *)align_malloc(ptrSize * num_elements, min_alignment);
757         for ( j = 0; j < offset_elements; j++ ) {
758             inptr[j].a = 0;
759             inptr[j].b =0;
760         }
761         for ( j = offset_elements; j < offset_elements + fill_elements; j++ ) {
762             inptr[j].a = pattern->a;
763             inptr[j].b = pattern->b;
764         }
765         for ( j = offset_elements + fill_elements; j < (size_t)num_elements; j++ ) {
766             inptr[j].a = 0;
767             inptr[j].b = 0;
768         }
769 
770         hostptr = (TestStruct *)align_malloc(ptrSize * num_elements, min_alignment);
771         memset(hostptr, 0, ptrSize * num_elements);
772 
773         for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
774             log_info("Testing with cl_mem_flags: %s\n", flag_set_names[src_flag_id]);
775 
776             if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
777                 buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSize * num_elements, hostptr, &err);
778             else
779                 buffers[0] = clCreateBuffer(context, flag_set[src_flag_id],  ptrSize * num_elements, NULL, &err);
780             if ( err ){
781                 print_error(err, " clCreateBuffer failed\n" );
782                 clReleaseEvent( event[0] );
783                 clReleaseEvent( event[1] );
784                 free( (void *)pattern );
785                 align_free( (void *)inptr );
786                 align_free( (void *)hostptr );
787                 free_mtdata(d);
788                 return -1;
789             }
790             if (!((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))) {
791                 err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, ptrSize * num_elements, hostptr, 0, NULL, NULL);
792                 if ( err != CL_SUCCESS ){
793                     print_error(err, " clEnqueueWriteBuffer failed\n" );
794                     clReleaseEvent( event[0] );
795                     clReleaseEvent( event[1] );
796                     free( (void *)pattern );
797                     align_free( (void *)inptr );
798                     align_free( (void *)hostptr );
799                     free_mtdata(d);
800                     return -1;
801                 }
802             }
803             outptr = align_malloc( ptrSize * num_elements, min_alignment);
804             memset(outptr, 0, ptrSize * num_elements);
805             buffers[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,  ptrSize * num_elements, outptr, &err);
806             if ( ! buffers[1] || err){
807                 print_error(err, " clCreateBuffer failed\n" );
808                 align_free( outptr );
809                 clReleaseMemObject(buffers[0]);
810                 clReleaseEvent( event[0] );
811                 clReleaseEvent( event[1] );
812                 free( (void *)pattern );
813                 align_free( (void *)inptr );
814                 align_free( (void *)hostptr );
815                 free_mtdata(d);
816                 return -1;
817             }
818 
819             err = clEnqueueFillBuffer(queue, buffers[0], pattern, ptrSize,
820                                       ptrSize * offset_elements, ptrSize * fill_elements,
821                                       0, NULL, &(event[0]));
822             /* uncomment for test debugging
823              err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, ptrSize * num_elements, inptr, 0, NULL, &(event[0]));
824              */
825             if ( err != CL_SUCCESS ){
826                 print_error( err, " clEnqueueFillBuffer failed" );
827                 align_free( outptr );
828                 clReleaseMemObject(buffers[0]);
829                 clReleaseMemObject(buffers[1]);
830                 clReleaseEvent( event[0] );
831                 clReleaseEvent( event[1] );
832                 free( (void *)pattern );
833                 align_free( (void *)inptr );
834                 align_free( (void *)hostptr );
835                 free_mtdata(d);
836                 return -1;
837             }
838 
839             err = create_single_kernel_helper( context, &program, &kernel, 1, &struct_kernel_code, "read_fill_struct" );
840             if ( err ){
841                 log_error( " Error creating program for struct\n" );
842                 align_free( outptr );
843                 clReleaseMemObject(buffers[0]);
844                 clReleaseMemObject(buffers[1]);
845                 clReleaseEvent( event[0] );
846                 clReleaseEvent( event[1] );
847                 free( (void *)pattern );
848                 align_free( (void *)inptr );
849                 align_free( (void *)hostptr );
850                 free_mtdata(d);
851                 return -1;
852             }
853 
854 #ifdef USE_LOCAL_WORK_GROUP
855             err = get_max_common_work_group_size( context, kernel, global_work_size[0], &local_work_size[0] );
856             test_error( err, "Unable to get work group size to use" );
857 #endif
858 
859             err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *)&buffers[0] );
860             err |= clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *)&buffers[1] );
861             if ( err != CL_SUCCESS ){
862                 print_error( err, " clSetKernelArg failed" );
863                 clReleaseKernel( kernel );
864                 clReleaseProgram( program );
865                 align_free( outptr );
866                 clReleaseMemObject(buffers[0]);
867                 clReleaseMemObject(buffers[1]);
868                 clReleaseEvent( event[0] );
869                 clReleaseEvent( event[1] );
870                 free( (void *)pattern );
871                 align_free( (void *)inptr );
872                 align_free( (void *)hostptr );
873                 free_mtdata(d);
874                 return -1;
875             }
876 
877             err = clWaitForEvents(  1, &(event[0]) );
878             if ( err != CL_SUCCESS ){
879                 print_error( err, "clWaitForEvents() failed" );
880                 clReleaseKernel( kernel );
881                 clReleaseProgram( program );
882                 align_free( outptr );
883                 clReleaseMemObject(buffers[0]);
884                 clReleaseMemObject(buffers[1]);
885                 clReleaseEvent( event[0] );
886                 clReleaseEvent( event[1] );
887                 free( (void *)pattern );
888                 align_free( (void *)inptr );
889                 align_free( (void *)hostptr );
890                 free_mtdata(d);
891                 return -1;
892             }
893             clReleaseEvent( event[0] );
894 
895 #ifdef USE_LOCAL_WORK_GROUP
896             err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
897 #else
898             err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL );
899 #endif
900             if ( err != CL_SUCCESS ){
901                 print_error( err, " clEnqueueNDRangeKernel failed" );
902                 clReleaseKernel( kernel );
903                 clReleaseProgram( program );
904                 align_free( outptr );
905                 clReleaseMemObject(buffers[0]);
906                 clReleaseMemObject(buffers[1]);
907                 clReleaseEvent( event[0] );
908                 clReleaseEvent( event[1] );
909                 free( (void *)pattern );
910                 align_free( (void *)inptr );
911                 align_free( (void *)hostptr );
912                 free_mtdata(d);
913                 return -1;
914             }
915 
916             err = clEnqueueReadBuffer( queue, buffers[1], CL_FALSE, 0, ptrSize * num_elements, outptr, 0, NULL, &(event[1]) );
917             if ( err != CL_SUCCESS ){
918                 print_error( err, " clEnqueueReadBuffer failed" );
919                 clReleaseKernel( kernel );
920                 clReleaseProgram( program );
921                 align_free( outptr );
922                 clReleaseMemObject(buffers[0]);
923                 clReleaseMemObject(buffers[1]);
924                 clReleaseEvent( event[0] );
925                 clReleaseEvent( event[1] );
926                 free( (void *)pattern );
927                 align_free( (void *)inptr );
928                 align_free( (void *)hostptr );
929                 free_mtdata(d);
930                 return -1;
931             }
932 
933             err = clWaitForEvents( 1, &(event[1]) );
934             if ( err != CL_SUCCESS ){
935                 print_error( err, "clWaitForEvents() failed" );
936             }
937             clReleaseEvent( event[1] );
938 
939             if ( verify_fill_struct( inptr, outptr, num_elements) ) {
940                 log_error( " buffer_FILL async struct test failed\n" );
941                 total_errors++;
942             }
943             else{
944                 log_info( " buffer_FILL async struct test passed\n" );
945             }
946             // cleanup
947             clReleaseKernel( kernel );
948             clReleaseProgram( program );
949             align_free( outptr );
950             clReleaseMemObject( buffers[0] );
951             clReleaseMemObject( buffers[1] );
952         } // src cl_mem_flag
953         free( (void *)pattern );
954         align_free( (void *)inptr );
955         align_free( (void *)hostptr );
956     }
957 
958     free_mtdata(d);
959 
960     return total_errors;
961 
962 }   // end test_buffer_fill_struct()
963 
964 
test_buffer_fill_int(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)965 int test_buffer_fill_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
966 {
967     cl_int  *inptr[5];
968     cl_int  *hostptr[5];
969     cl_int  *pattern[5];
970     size_t  ptrSizes[5];
971     int     n, i, err=0;
972     size_t  j, offset_elements, fill_elements;
973     int     (*foo)(void *,void *,int);
974     MTdata  d = init_genrand( gRandomSeed );
975 
976     size_t  min_alignment = get_min_alignment(context);
977 
978     foo = verify_fill_int;
979 
980     ptrSizes[0] = sizeof(cl_int);
981     ptrSizes[1] = ptrSizes[0] << 1;
982     ptrSizes[2] = ptrSizes[1] << 1;
983     ptrSizes[3] = ptrSizes[2] << 1;
984     ptrSizes[4] = ptrSizes[3] << 1;
985 
986     // Test with random offsets and fill sizes
987     for ( n = 0; n < 8; n++ ){
988         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
989         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
990         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
991 
992         for ( i = 0; i < 5; i++ ){
993             pattern[i] = (cl_int *)malloc(ptrSizes[i]);
994             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
995                 pattern[i][j] = TEST_PRIME_INT;
996 
997             inptr[i] = (cl_int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
998             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
999                 inptr[i][j] = 0;
1000             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1001                 inptr[i][j] = TEST_PRIME_INT;
1002             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1003                 inptr[i][j] = 0;
1004 
1005             hostptr[i] = (cl_int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1006             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1007         }
1008 
1009         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_int ), (char*)"int",
1010                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1011                              offset_elements, fill_elements,
1012                              buffer_fill_int_kernel_code, int_kernel_name, foo ))
1013             err++;
1014 
1015         for ( i = 0; i < 5; i++ ){
1016             free( (void *)pattern[i] );
1017             align_free( (void *)inptr[i] );
1018             align_free( (void *)hostptr[i] );
1019         }
1020 
1021     }
1022 
1023     free_mtdata(d);
1024 
1025     return err;
1026 
1027 }   // end test_buffer_int_fill()
1028 
1029 
test_buffer_fill_uint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1030 int test_buffer_fill_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1031 {
1032     cl_uint *inptr[5];
1033     cl_uint *hostptr[5];
1034     cl_uint *pattern[5];
1035     size_t  ptrSizes[5];
1036     int     n, i, err=0;
1037     size_t  j, offset_elements, fill_elements;
1038     MTdata  d = init_genrand( gRandomSeed );
1039     int     (*foo)(void *,void *,int);
1040 
1041     size_t  min_alignment = get_min_alignment(context);
1042 
1043     foo = verify_fill_uint;
1044 
1045     ptrSizes[0] = sizeof(cl_uint);
1046     ptrSizes[1] = ptrSizes[0] << 1;
1047     ptrSizes[2] = ptrSizes[1] << 1;
1048     ptrSizes[3] = ptrSizes[2] << 1;
1049     ptrSizes[4] = ptrSizes[3] << 1;
1050 
1051     // Test with random offsets and fill sizes
1052     for ( n = 0; n < 8; n++ ){
1053         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1054         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1055         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1056 
1057         for ( i = 0; i < 5; i++ ){
1058             pattern[i] = (cl_uint *)malloc(ptrSizes[i]);
1059             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1060                 pattern[i][j] = TEST_PRIME_UINT;
1061 
1062             inptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1063             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1064                 inptr[i][j] = 0;
1065             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1066                 inptr[i][j] = TEST_PRIME_UINT;
1067             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1068                 inptr[i][j] = 0;
1069 
1070             hostptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1071             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1072         }
1073 
1074         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_uint ), (char*)"uint",
1075                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1076                              offset_elements, fill_elements,
1077                              buffer_fill_uint_kernel_code, uint_kernel_name, foo ))
1078             err++;
1079 
1080         for ( i = 0; i < 5; i++ ){
1081             free( (void *)pattern[i] );
1082             align_free( (void *)inptr[i] );
1083             align_free( (void *)hostptr[i] );
1084         }
1085 
1086     }
1087 
1088     free_mtdata(d);
1089 
1090     return err;
1091 
1092 }   // end test_buffer_uint_fill()
1093 
1094 
test_buffer_fill_short(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1095 int test_buffer_fill_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1096 {
1097     cl_short *inptr[5];
1098     cl_short *hostptr[5];
1099     cl_short *pattern[5];
1100     size_t   ptrSizes[5];
1101     int      n, i, err=0;
1102     size_t   j, offset_elements, fill_elements;
1103     MTdata   d = init_genrand( gRandomSeed );
1104     int      (*foo)(void *,void *,int);
1105 
1106     size_t  min_alignment = get_min_alignment(context);
1107 
1108     foo = verify_fill_short;
1109 
1110     ptrSizes[0] = sizeof(cl_short);
1111     ptrSizes[1] = ptrSizes[0] << 1;
1112     ptrSizes[2] = ptrSizes[1] << 1;
1113     ptrSizes[3] = ptrSizes[2] << 1;
1114     ptrSizes[4] = ptrSizes[3] << 1;
1115 
1116     // Test with random offsets and fill sizes
1117     for ( n = 0; n < 8; n++ ){
1118         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1119         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1120         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1121 
1122         for ( i = 0; i < 5; i++ ){
1123             pattern[i] = (cl_short *)malloc(ptrSizes[i]);
1124             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1125                 pattern[i][j] = TEST_PRIME_SHORT;
1126 
1127             inptr[i] = (cl_short *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1128             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1129                 inptr[i][j] = 0;
1130             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1131                 inptr[i][j] = TEST_PRIME_SHORT;
1132             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1133                 inptr[i][j] = 0;
1134 
1135             hostptr[i] = (cl_short *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1136             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1137         }
1138 
1139         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_short ), (char*)"short",
1140                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1141                              offset_elements, fill_elements,
1142                              buffer_fill_short_kernel_code, short_kernel_name, foo ))
1143             err++;
1144 
1145         for ( i = 0; i < 5; i++ ){
1146             free( (void *)pattern[i] );
1147             align_free( (void *)inptr[i] );
1148             align_free( (void *)hostptr[i] );
1149         }
1150 
1151     }
1152 
1153     free_mtdata(d);
1154 
1155     return err;
1156 
1157 }   // end test_buffer_short_fill()
1158 
1159 
test_buffer_fill_ushort(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1160 int test_buffer_fill_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1161 {
1162     cl_ushort *inptr[5];
1163     cl_ushort *hostptr[5];
1164     cl_ushort *pattern[5];
1165     size_t    ptrSizes[5];
1166     int       n, i, err=0;
1167     size_t    j, offset_elements, fill_elements;
1168     MTdata    d = init_genrand( gRandomSeed );
1169     int       (*foo)(void *,void *,int);
1170 
1171     size_t    min_alignment = get_min_alignment(context);
1172 
1173     foo = verify_fill_ushort;
1174 
1175     ptrSizes[0] = sizeof(cl_ushort);
1176     ptrSizes[1] = ptrSizes[0] << 1;
1177     ptrSizes[2] = ptrSizes[1] << 1;
1178     ptrSizes[3] = ptrSizes[2] << 1;
1179     ptrSizes[4] = ptrSizes[3] << 1;
1180 
1181     // Test with random offsets and fill sizes
1182     for ( n = 0; n < 8; n++ ){
1183         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1184         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1185         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1186 
1187         for ( i = 0; i < 5; i++ ){
1188             pattern[i] = (cl_ushort *)malloc(ptrSizes[i]);
1189             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1190                 pattern[i][j] = TEST_PRIME_USHORT;
1191 
1192             inptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1193             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1194                 inptr[i][j] = 0;
1195             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1196                 inptr[i][j] = TEST_PRIME_USHORT;
1197             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1198                 inptr[i][j] = 0;
1199 
1200             hostptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1201             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1202         }
1203 
1204         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_ushort ), (char*)"ushort",
1205                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1206                              offset_elements, fill_elements,
1207                              buffer_fill_ushort_kernel_code, ushort_kernel_name, foo ))
1208             err++;
1209 
1210         for ( i = 0; i < 5; i++ ){
1211             free( (void *)pattern[i] );
1212             align_free( (void *)inptr[i] );
1213             align_free( (void *)hostptr[i] );
1214         }
1215 
1216     }
1217 
1218     free_mtdata(d);
1219 
1220     return err;
1221 
1222 }   // end test_buffer_ushort_fill()
1223 
1224 
test_buffer_fill_char(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1225 int test_buffer_fill_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1226 {
1227     cl_char *inptr[5];
1228     cl_char *hostptr[5];
1229     cl_char *pattern[5];
1230     size_t  ptrSizes[5];
1231     int     n, i, err=0;
1232     size_t  j, offset_elements, fill_elements;
1233     MTdata  d = init_genrand( gRandomSeed );
1234     int     (*foo)(void *,void *,int);
1235 
1236     size_t  min_alignment = get_min_alignment(context);
1237 
1238     foo = verify_fill_char;
1239 
1240     ptrSizes[0] = sizeof(cl_char);
1241     ptrSizes[1] = ptrSizes[0] << 1;
1242     ptrSizes[2] = ptrSizes[1] << 1;
1243     ptrSizes[3] = ptrSizes[2] << 1;
1244     ptrSizes[4] = ptrSizes[3] << 1;
1245 
1246     // Test with random offsets and fill sizes
1247     for ( n = 0; n < 8; n++ ){
1248         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1249         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1250         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1251 
1252         for ( i = 0; i < 5; i++ ){
1253             pattern[i] = (cl_char *)malloc(ptrSizes[i]);
1254             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1255                 pattern[i][j] = TEST_PRIME_CHAR;
1256 
1257             inptr[i] = (cl_char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1258             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1259                 inptr[i][j] = 0;
1260             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1261                 inptr[i][j] = TEST_PRIME_CHAR;
1262             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1263                 inptr[i][j] = 0;
1264 
1265             hostptr[i] = (cl_char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1266             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1267         }
1268 
1269         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_char ), (char*)"char",
1270                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1271                              offset_elements, fill_elements,
1272                              buffer_fill_char_kernel_code, char_kernel_name, foo ))
1273             err++;
1274 
1275         for ( i = 0; i < 5; i++ ){
1276             free( (void *)pattern[i] );
1277             align_free( (void *)inptr[i] );
1278             align_free( (void *)hostptr[i] );
1279         }
1280 
1281     }
1282 
1283     free_mtdata(d);
1284 
1285     return err;
1286 
1287 }   // end test_buffer_char_fill()
1288 
1289 
test_buffer_fill_uchar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1290 int test_buffer_fill_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1291 {
1292     cl_uchar *inptr[5];
1293     cl_uchar *hostptr[5];
1294     cl_uchar *pattern[5];
1295     size_t   ptrSizes[5];
1296     int      n, i, err=0;
1297     size_t   j, offset_elements, fill_elements;
1298     MTdata   d = init_genrand( gRandomSeed );
1299     int      (*foo)(void *,void *,int);
1300 
1301     size_t  min_alignment = get_min_alignment(context);
1302 
1303     foo = verify_fill_uchar;
1304 
1305     ptrSizes[0] = sizeof(cl_uchar);
1306     ptrSizes[1] = ptrSizes[0] << 1;
1307     ptrSizes[2] = ptrSizes[1] << 1;
1308     ptrSizes[3] = ptrSizes[2] << 1;
1309     ptrSizes[4] = ptrSizes[3] << 1;
1310 
1311     // Test with random offsets and fill sizes
1312     for ( n = 0; n < 8; n++ ){
1313         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1314         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1315         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1316 
1317         for ( i = 0; i < 5; i++ ){
1318             pattern[i] = (cl_uchar *)malloc(ptrSizes[i]);
1319             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1320                 pattern[i][j] = TEST_PRIME_CHAR;
1321 
1322             inptr[i] = (cl_uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1323             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1324                 inptr[i][j] = 0;
1325             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1326                 inptr[i][j] = TEST_PRIME_CHAR;
1327             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1328                 inptr[i][j] = 0;
1329 
1330             hostptr[i] = (cl_uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1331             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1332         }
1333 
1334         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_uchar ), (char*)"uchar",
1335                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1336                              offset_elements, fill_elements,
1337                              buffer_fill_uchar_kernel_code, uchar_kernel_name, foo ))
1338             err++;
1339 
1340         for ( i = 0; i < 5; i++ ){
1341             free( (void *)pattern[i] );
1342             align_free( (void *)inptr[i] );
1343             align_free( (void *)hostptr[i] );
1344         }
1345 
1346     }
1347 
1348     free_mtdata(d);
1349 
1350     return err;
1351 
1352 }   // end test_buffer_uchar_fill()
1353 
1354 
test_buffer_fill_long(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1355 int test_buffer_fill_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1356 {
1357     cl_long *inptr[5];
1358     cl_long *hostptr[5];
1359     cl_long *pattern[5];
1360     size_t  ptrSizes[5];
1361     int     n, i, err=0;
1362     size_t  j, offset_elements, fill_elements;
1363     MTdata  d = init_genrand( gRandomSeed );
1364     int     (*foo)(void *,void *,int);
1365 
1366     size_t  min_alignment = get_min_alignment(context);
1367 
1368     foo = verify_fill_long;
1369 
1370     ptrSizes[0] = sizeof(cl_long);
1371     ptrSizes[1] = ptrSizes[0] << 1;
1372     ptrSizes[2] = ptrSizes[1] << 1;
1373     ptrSizes[3] = ptrSizes[2] << 1;
1374     ptrSizes[4] = ptrSizes[3] << 1;
1375 
1376     //skip devices that don't support long
1377     if (! gHasLong )
1378     {
1379         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
1380         return CL_SUCCESS;
1381     }
1382 
1383     // Test with random offsets and fill sizes
1384     for ( n = 0; n < 8; n++ ){
1385         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1386         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1387         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1388 
1389         for ( i = 0; i < 5; i++ ){
1390             pattern[i] = (cl_long *)malloc(ptrSizes[i]);
1391             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1392                 pattern[i][j] = TEST_PRIME_LONG;
1393 
1394             inptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1395             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1396                 inptr[i][j] = 0;
1397             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1398                 inptr[i][j] = TEST_PRIME_LONG;
1399             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1400                 inptr[i][j] = 0;
1401 
1402             hostptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1403             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1404         }
1405 
1406         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_long ), (char*)"long",
1407                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1408                              offset_elements, fill_elements,
1409                              buffer_fill_long_kernel_code, long_kernel_name, foo ))
1410             err++;
1411 
1412         for ( i = 0; i < 5; i++ ){
1413             free( (void *)pattern[i] );
1414             align_free( (void *)inptr[i] );
1415             align_free( (void *)hostptr[i] );
1416         }
1417 
1418     }
1419 
1420     free_mtdata(d);
1421 
1422     return err;
1423 
1424 }   // end test_buffer_long_fill()
1425 
1426 
test_buffer_fill_ulong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1427 int test_buffer_fill_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1428 {
1429     cl_ulong *inptr[5];
1430     cl_ulong *hostptr[5];
1431     cl_ulong *pattern[5];
1432     size_t   ptrSizes[5];
1433     int      n, i, err=0;
1434     size_t   j, offset_elements, fill_elements;
1435     MTdata   d = init_genrand( gRandomSeed );
1436     int      (*foo)(void *,void *,int);
1437 
1438     size_t   min_alignment = get_min_alignment(context);
1439 
1440     foo = verify_fill_ulong;
1441 
1442     ptrSizes[0] = sizeof(cl_ulong);
1443     ptrSizes[1] = ptrSizes[0] << 1;
1444     ptrSizes[2] = ptrSizes[1] << 1;
1445     ptrSizes[3] = ptrSizes[2] << 1;
1446     ptrSizes[4] = ptrSizes[3] << 1;
1447 
1448     if (! gHasLong )
1449     {
1450         log_info( "Device does not support 64-bit integers. Skipping test.\n" );
1451         return CL_SUCCESS;
1452     }
1453 
1454     // Test with random offsets and fill sizes
1455     for ( n = 0; n < 8; n++ ){
1456         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1457         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1458         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1459 
1460         for ( i = 0; i < 5; i++ ){
1461             pattern[i] = (cl_ulong *)malloc(ptrSizes[i]);
1462             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1463                 pattern[i][j] = TEST_PRIME_ULONG;
1464 
1465             inptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1466             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1467                 inptr[i][j] = 0;
1468             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1469                 inptr[i][j] = TEST_PRIME_ULONG;
1470             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1471                 inptr[i][j] = 0;
1472 
1473             hostptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1474             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1475         }
1476 
1477         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_ulong ), (char*)"ulong",
1478                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1479                              offset_elements, fill_elements,
1480                              buffer_fill_ulong_kernel_code, ulong_kernel_name, foo ))
1481             err++;
1482 
1483         for ( i = 0; i < 5; i++ ){
1484             free( (void *)pattern[i] );
1485             align_free( (void *)inptr[i] );
1486             align_free( (void *)hostptr[i] );
1487         }
1488 
1489     }
1490 
1491     free_mtdata(d);
1492 
1493     return err;
1494 
1495 }   // end test_buffer_ulong_fill()
1496 
1497 
test_buffer_fill_float(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1498 int test_buffer_fill_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1499 {
1500     cl_float *inptr[5];
1501     cl_float *hostptr[5];
1502     cl_float *pattern[5];
1503     size_t   ptrSizes[5];
1504     int      n, i, err=0;
1505     size_t   j, offset_elements, fill_elements;
1506     MTdata   d = init_genrand( gRandomSeed );
1507     int      (*foo)(void *,void *,int);
1508 
1509     size_t  min_alignment = get_min_alignment(context);
1510 
1511     foo = verify_fill_float;
1512 
1513     ptrSizes[0] = sizeof(cl_float);
1514     ptrSizes[1] = ptrSizes[0] << 1;
1515     ptrSizes[2] = ptrSizes[1] << 1;
1516     ptrSizes[3] = ptrSizes[2] << 1;
1517     ptrSizes[4] = ptrSizes[3] << 1;
1518 
1519     // Test with random offsets and fill sizes
1520     for ( n = 0; n < 8; n++ ){
1521         offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1522         fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1523         log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1524 
1525         for ( i = 0; i < 5; i++ ){
1526             pattern[i] = (cl_float *)malloc(ptrSizes[i]);
1527             for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1528                 pattern[i][j] = TEST_PRIME_FLOAT;
1529 
1530             inptr[i] = (cl_float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1531             for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1532                 inptr[i][j] = 0;
1533             for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1534                 inptr[i][j] = TEST_PRIME_FLOAT;
1535             for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1536                 inptr[i][j] = 0;
1537 
1538             hostptr[i] = (cl_float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1539             memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1540         }
1541 
1542         if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_float ), (char*)"float",
1543                              5, (void**)inptr, (void**)hostptr, (void**)pattern,
1544                              offset_elements, fill_elements,
1545                              buffer_fill_float_kernel_code, float_kernel_name, foo ))
1546             err++;
1547 
1548         for ( i = 0; i < 5; i++ ){
1549             free( (void *)pattern[i] );
1550             align_free( (void *)inptr[i] );
1551             align_free( (void *)hostptr[i] );
1552         }
1553 
1554     }
1555 
1556     free_mtdata(d);
1557 
1558     return err;
1559 
1560 }   // end test_buffer_float_fill()
1561