1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23
24 #include "procs.h"
25 #include "harness/errorHelpers.h"
26
27 #define USE_LOCAL_WORK_GROUP 1
28
29 #define TEST_PRIME_CHAR 0x77
30 #define TEST_PRIME_INT ((1<<16)+1)
31 #define TEST_PRIME_UINT ((1U<<16)+1U)
32 #define TEST_PRIME_LONG ((1LL<<32)+1LL)
33 #define TEST_PRIME_ULONG ((1ULL<<32)+1ULL)
34 #define TEST_PRIME_SHORT (cl_short)((1<<8)+1)
35 #define TEST_PRIME_USHORT (cl_ushort)((1<<8)+1)
36 #define TEST_PRIME_FLOAT (cl_float)3.40282346638528860e+38
37 #define TEST_PRIME_HALF 119.f
38
39 #ifndef TestStruct
40 typedef struct{
41 cl_int a;
42 cl_float b;
43 } TestStruct;
44 #endif
45
46 const char *buffer_fill_int_kernel_code[] = {
47 "__kernel void test_buffer_fill_int(__global int *src, __global int *dst)\n"
48 "{\n"
49 " int tid = get_global_id(0);\n"
50 "\n"
51 " dst[tid] = src[tid];\n"
52 "}\n",
53
54 "__kernel void test_buffer_fill_int2(__global int2 *src, __global int2 *dst)\n"
55 "{\n"
56 " int tid = get_global_id(0);\n"
57 "\n"
58 " dst[tid] = src[tid];\n"
59 "}\n",
60
61 "__kernel void test_buffer_fill_int4(__global int4 *src, __global int4 *dst)\n"
62 "{\n"
63 " int tid = get_global_id(0);\n"
64 "\n"
65 " dst[tid] = src[tid];\n"
66 "}\n",
67
68 "__kernel void test_buffer_fill_int8(__global int8 *src, __global int8 *dst)\n"
69 "{\n"
70 " int tid = get_global_id(0);\n"
71 "\n"
72 " dst[tid] = src[tid];\n"
73 "}\n",
74
75 "__kernel void test_buffer_fill_int16(__global int16 *src, __global int16 *dst)\n"
76 "{\n"
77 " int tid = get_global_id(0);\n"
78 "\n"
79 " dst[tid] = src[tid];\n"
80 "}\n" };
81
82 static const char *int_kernel_name[] = { "test_buffer_fill_int", "test_buffer_fill_int2", "test_buffer_fill_int4", "test_buffer_fill_int8", "test_buffer_fill_int16" };
83
84
85 const char *buffer_fill_uint_kernel_code[] = {
86 "__kernel void test_buffer_fill_uint(__global uint *src, __global uint *dst)\n"
87 "{\n"
88 " int tid = get_global_id(0);\n"
89 "\n"
90 " dst[tid] = src[tid];\n"
91 "}\n",
92
93 "__kernel void test_buffer_fill_uint2(__global uint2 *src, __global uint2 *dst)\n"
94 "{\n"
95 " int tid = get_global_id(0);\n"
96 "\n"
97 " dst[tid] = src[tid];\n"
98 "}\n",
99
100 "__kernel void test_buffer_fill_uint4(__global uint4 *src, __global uint4 *dst)\n"
101 "{\n"
102 " int tid = get_global_id(0);\n"
103 "\n"
104 " dst[tid] = src[tid];\n"
105 "}\n",
106
107 "__kernel void test_buffer_fill_uint8(__global uint8 *src, __global uint8 *dst)\n"
108 "{\n"
109 " int tid = get_global_id(0);\n"
110 "\n"
111 " dst[tid] = src[tid];\n"
112 "}\n",
113
114 "__kernel void test_buffer_fill_uint16(__global uint16 *src, __global uint16 *dst)\n"
115 "{\n"
116 " int tid = get_global_id(0);\n"
117 "\n"
118 " dst[tid] = src[tid];\n"
119 "}\n" };
120
121 static const char *uint_kernel_name[] = { "test_buffer_fill_uint", "test_buffer_fill_uint2", "test_buffer_fill_uint4", "test_buffer_fill_uint8", "test_buffer_fill_uint16" };
122
123
124 const char *buffer_fill_short_kernel_code[] = {
125 "__kernel void test_buffer_fill_short(__global short *src, __global short *dst)\n"
126 "{\n"
127 " int tid = get_global_id(0);\n"
128 "\n"
129 " dst[tid] = src[tid];\n"
130 "}\n",
131
132 "__kernel void test_buffer_fill_short2(__global short2 *src, __global short2 *dst)\n"
133 "{\n"
134 " int tid = get_global_id(0);\n"
135 "\n"
136 " dst[tid] = src[tid];\n"
137 "}\n",
138
139 "__kernel void test_buffer_fill_short4(__global short4 *src, __global short4 *dst)\n"
140 "{\n"
141 " int tid = get_global_id(0);\n"
142 "\n"
143 " dst[tid] = src[tid];\n"
144 "}\n",
145
146 "__kernel void test_buffer_fill_short8(__global short8 *src, __global short8 *dst)\n"
147 "{\n"
148 " int tid = get_global_id(0);\n"
149 "\n"
150 " dst[tid] = src[tid];\n"
151 "}\n",
152
153 "__kernel void test_buffer_fill_short16(__global short16 *src, __global short16 *dst)\n"
154 "{\n"
155 " int tid = get_global_id(0);\n"
156 "\n"
157 " dst[tid] = src[tid];\n"
158 "}\n" };
159
160 static const char *short_kernel_name[] = { "test_buffer_fill_short", "test_buffer_fill_short2", "test_buffer_fill_short4", "test_buffer_fill_short8", "test_buffer_fill_short16" };
161
162
163 const char *buffer_fill_ushort_kernel_code[] = {
164 "__kernel void test_buffer_fill_ushort(__global ushort *src, __global ushort *dst)\n"
165 "{\n"
166 " int tid = get_global_id(0);\n"
167 "\n"
168 " dst[tid] = src[tid];\n"
169 "}\n",
170
171 "__kernel void test_buffer_fill_ushort2(__global ushort2 *src, __global ushort2 *dst)\n"
172 "{\n"
173 " int tid = get_global_id(0);\n"
174 "\n"
175 " dst[tid] = src[tid];\n"
176 "}\n",
177
178 "__kernel void test_buffer_fill_ushort4(__global ushort4 *src, __global ushort4 *dst)\n"
179 "{\n"
180 " int tid = get_global_id(0);\n"
181 "\n"
182 " dst[tid] = src[tid];\n"
183 "}\n",
184
185 "__kernel void test_buffer_fill_ushort8(__global ushort8 *src, __global ushort8 *dst)\n"
186 "{\n"
187 " int tid = get_global_id(0);\n"
188 "\n"
189 " dst[tid] = src[tid];\n"
190 "}\n",
191
192 "__kernel void test_buffer_fill_ushort16(__global ushort16 *src, __global ushort16 *dst)\n"
193 "{\n"
194 " int tid = get_global_id(0);\n"
195 "\n"
196 " dst[tid] = src[tid];\n"
197 "}\n" };
198
199 static const char *ushort_kernel_name[] = { "test_buffer_fill_ushort", "test_buffer_fill_ushort2", "test_buffer_fill_ushort4", "test_buffer_fill_ushort8", "test_buffer_fill_ushort16" };
200
201
202 const char *buffer_fill_char_kernel_code[] = {
203 "__kernel void test_buffer_fill_char(__global char *src, __global char *dst)\n"
204 "{\n"
205 " int tid = get_global_id(0);\n"
206 "\n"
207 " dst[tid] = src[tid];\n"
208 "}\n",
209
210 "__kernel void test_buffer_fill_char2(__global char2 *src, __global char2 *dst)\n"
211 "{\n"
212 " int tid = get_global_id(0);\n"
213 "\n"
214 " dst[tid] = src[tid];\n"
215 "}\n",
216
217 "__kernel void test_buffer_fill_char4(__global char4 *src, __global char4 *dst)\n"
218 "{\n"
219 " int tid = get_global_id(0);\n"
220 "\n"
221 " dst[tid] = src[tid];\n"
222 "}\n",
223
224 "__kernel void test_buffer_fill_char8(__global char8 *src, __global char8 *dst)\n"
225 "{\n"
226 " int tid = get_global_id(0);\n"
227 "\n"
228 " dst[tid] = src[tid];\n"
229 "}\n",
230
231 "__kernel void test_buffer_fill_char16(__global char16 *src, __global char16 *dst)\n"
232 "{\n"
233 " int tid = get_global_id(0);\n"
234 "\n"
235 " dst[tid] = src[tid];\n"
236 "}\n" };
237
238 static const char *char_kernel_name[] = { "test_buffer_fill_char", "test_buffer_fill_char2", "test_buffer_fill_char4", "test_buffer_fill_char8", "test_buffer_fill_char16" };
239
240
241 const char *buffer_fill_uchar_kernel_code[] = {
242 "__kernel void test_buffer_fill_uchar(__global uchar *src, __global uchar *dst)\n"
243 "{\n"
244 " int tid = get_global_id(0);\n"
245 "\n"
246 " dst[tid] = src[tid];\n"
247 "}\n",
248
249 "__kernel void test_buffer_fill_uchar2(__global uchar2 *src, __global uchar2 *dst)\n"
250 "{\n"
251 " int tid = get_global_id(0);\n"
252 "\n"
253 " dst[tid] = src[tid];\n"
254 "}\n",
255
256 "__kernel void test_buffer_fill_uchar4(__global uchar4 *src, __global uchar4 *dst)\n"
257 "{\n"
258 " int tid = get_global_id(0);\n"
259 "\n"
260 " dst[tid] = src[tid];\n"
261 "}\n",
262
263 "__kernel void test_buffer_fill_uchar8(__global uchar8 *src, __global uchar8 *dst)\n"
264 "{\n"
265 " int tid = get_global_id(0);\n"
266 "\n"
267 " dst[tid] = src[tid];\n"
268 "}\n",
269
270 "__kernel void test_buffer_fill_uchar16(__global uchar16 *src, __global uchar16 *dst)\n"
271 "{\n"
272 " int tid = get_global_id(0);\n"
273 "\n"
274 " dst[tid] = src[tid];\n"
275 "}\n" };
276
277 static const char *uchar_kernel_name[] = { "test_buffer_fill_uchar", "test_buffer_fill_uchar2", "test_buffer_fill_uchar4", "test_buffer_fill_uchar8", "test_buffer_fill_uchar16" };
278
279
280 const char *buffer_fill_long_kernel_code[] = {
281 "__kernel void test_buffer_fill_long(__global long *src, __global long *dst)\n"
282 "{\n"
283 " int tid = get_global_id(0);\n"
284 "\n"
285 " dst[tid] = src[tid];\n"
286 "}\n",
287
288 "__kernel void test_buffer_fill_long2(__global long2 *src, __global long2 *dst)\n"
289 "{\n"
290 " int tid = get_global_id(0);\n"
291 "\n"
292 " dst[tid] = src[tid];\n"
293 "}\n",
294
295 "__kernel void test_buffer_fill_long4(__global long4 *src, __global long4 *dst)\n"
296 "{\n"
297 " int tid = get_global_id(0);\n"
298 "\n"
299 " dst[tid] = src[tid];\n"
300 "}\n",
301
302 "__kernel void test_buffer_fill_long8(__global long8 *src, __global long8 *dst)\n"
303 "{\n"
304 " int tid = get_global_id(0);\n"
305 "\n"
306 " dst[tid] = src[tid];\n"
307 "}\n",
308
309 "__kernel void test_buffer_fill_long16(__global long16 *src, __global long16 *dst)\n"
310 "{\n"
311 " int tid = get_global_id(0);\n"
312 "\n"
313 " dst[tid] = src[tid];\n"
314 "}\n" };
315
316 static const char *long_kernel_name[] = { "test_buffer_fill_long", "test_buffer_fill_long2", "test_buffer_fill_long4", "test_buffer_fill_long8", "test_buffer_fill_long16" };
317
318
319 const char *buffer_fill_ulong_kernel_code[] = {
320 "__kernel void test_buffer_fill_ulong(__global ulong *src, __global ulong *dst)\n"
321 "{\n"
322 " int tid = get_global_id(0);\n"
323 "\n"
324 " dst[tid] = src[tid];\n"
325 "}\n",
326
327 "__kernel void test_buffer_fill_ulong2(__global ulong2 *src, __global ulong2 *dst)\n"
328 "{\n"
329 " int tid = get_global_id(0);\n"
330 "\n"
331 " dst[tid] = src[tid];\n"
332 "}\n",
333
334 "__kernel void test_buffer_fill_ulong4(__global ulong4 *src, __global ulong4 *dst)\n"
335 "{\n"
336 " int tid = get_global_id(0);\n"
337 "\n"
338 " dst[tid] = src[tid];\n"
339 "}\n",
340
341 "__kernel void test_buffer_fill_ulong8(__global ulong8 *src, __global ulong8 *dst)\n"
342 "{\n"
343 " int tid = get_global_id(0);\n"
344 "\n"
345 " dst[tid] = src[tid];\n"
346 "}\n",
347
348 "__kernel void test_buffer_fill_ulong16(__global ulong16 *src, __global ulong16 *dst)\n"
349 "{\n"
350 " int tid = get_global_id(0);\n"
351 "\n"
352 " dst[tid] = src[tid];\n"
353 "}\n" };
354
355 static const char *ulong_kernel_name[] = { "test_buffer_fill_ulong", "test_buffer_fill_ulong2", "test_buffer_fill_ulong4", "test_buffer_fill_ulong8", "test_buffer_fill_ulong16" };
356
357
358 const char *buffer_fill_float_kernel_code[] = {
359 "__kernel void test_buffer_fill_float(__global float *src, __global float *dst)\n"
360 "{\n"
361 " int tid = get_global_id(0);\n"
362 "\n"
363 " dst[tid] = src[tid];\n"
364 "}\n",
365
366 "__kernel void test_buffer_fill_float2(__global float2 *src, __global float2 *dst)\n"
367 "{\n"
368 " int tid = get_global_id(0);\n"
369 "\n"
370 " dst[tid] = src[tid];\n"
371 "}\n",
372
373 "__kernel void test_buffer_fill_float4(__global float4 *src, __global float4 *dst)\n"
374 "{\n"
375 " int tid = get_global_id(0);\n"
376 "\n"
377 " dst[tid] = src[tid];\n"
378 "}\n",
379
380 "__kernel void test_buffer_fill_float8(__global float8 *src, __global float8 *dst)\n"
381 "{\n"
382 " int tid = get_global_id(0);\n"
383 "\n"
384 " dst[tid] = src[tid];\n"
385 "}\n",
386
387 "__kernel void test_buffer_fill_float16(__global float16 *src, __global float16 *dst)\n"
388 "{\n"
389 " int tid = get_global_id(0);\n"
390 "\n"
391 " dst[tid] = src[tid];\n"
392 "}\n" };
393
394 static const char *float_kernel_name[] = { "test_buffer_fill_float", "test_buffer_fill_float2", "test_buffer_fill_float4", "test_buffer_fill_float8", "test_buffer_fill_float16" };
395
396
397 static const char *struct_kernel_code =
398 "typedef struct{\n"
399 "int a;\n"
400 "float b;\n"
401 "} TestStruct;\n"
402 "__kernel void read_fill_struct(__global TestStruct *src, __global TestStruct *dst)\n"
403 "{\n"
404 " int tid = get_global_id(0);\n"
405 "\n"
406 " dst[tid].a = src[tid].a;\n"
407 " dst[tid].b = src[tid].b;\n"
408 "}\n";
409
410
411
verify_fill_int(void * ptr1,void * ptr2,int n)412 static int verify_fill_int( void *ptr1, void *ptr2, int n )
413 {
414 int i;
415 cl_int *inptr = (cl_int *)ptr1;
416 cl_int *outptr = (cl_int *)ptr2;
417
418 for (i=0; i<n; i++){
419 if ( outptr[i] != inptr[i] )
420 return -1;
421 }
422
423 return 0;
424 }
425
426
verify_fill_uint(void * ptr1,void * ptr2,int n)427 static int verify_fill_uint( void *ptr1, void *ptr2, int n )
428 {
429 int i;
430 cl_uint *inptr = (cl_uint *)ptr1;
431 cl_uint *outptr = (cl_uint *)ptr2;
432
433 for (i=0; i<n; i++){
434 if ( outptr[i] != inptr[i] )
435 return -1;
436 }
437
438 return 0;
439 }
440
441
verify_fill_short(void * ptr1,void * ptr2,int n)442 static int verify_fill_short( void *ptr1, void *ptr2, int n )
443 {
444 int i;
445 cl_short *inptr = (cl_short *)ptr1;
446 cl_short *outptr = (cl_short *)ptr2;
447
448 for (i=0; i<n; i++){
449 if ( outptr[i] != inptr[i] )
450 return -1;
451 }
452
453 return 0;
454 }
455
456
verify_fill_ushort(void * ptr1,void * ptr2,int n)457 static int verify_fill_ushort( void *ptr1, void *ptr2, int n )
458 {
459 int i;
460 cl_ushort *inptr = (cl_ushort *)ptr1;
461 cl_ushort *outptr = (cl_ushort *)ptr2;
462
463 for (i=0; i<n; i++){
464 if ( outptr[i] != inptr[i] )
465 return -1;
466 }
467
468 return 0;
469 }
470
471
verify_fill_char(void * ptr1,void * ptr2,int n)472 static int verify_fill_char( void *ptr1, void *ptr2, int n )
473 {
474 int i;
475 cl_char *inptr = (cl_char *)ptr1;
476 cl_char *outptr = (cl_char *)ptr2;
477
478 for (i=0; i<n; i++){
479 if ( outptr[i] != inptr[i] )
480 return -1;
481 }
482
483 return 0;
484 }
485
486
verify_fill_uchar(void * ptr1,void * ptr2,int n)487 static int verify_fill_uchar( void *ptr1, void *ptr2, int n )
488 {
489 int i;
490 cl_uchar *inptr = (cl_uchar *)ptr1;
491 cl_uchar *outptr = (cl_uchar *)ptr2;
492
493 for (i=0; i<n; i++){
494 if ( outptr[i] != inptr[i] )
495 return -1;
496 }
497
498 return 0;
499 }
500
501
verify_fill_long(void * ptr1,void * ptr2,int n)502 static int verify_fill_long( void *ptr1, void *ptr2, int n )
503 {
504 int i;
505 cl_long *inptr = (cl_long *)ptr1;
506 cl_long *outptr = (cl_long *)ptr2;
507
508 for (i=0; i<n; i++){
509 if ( outptr[i] != inptr[i] )
510 return -1;
511 }
512
513 return 0;
514 }
515
516
verify_fill_ulong(void * ptr1,void * ptr2,int n)517 static int verify_fill_ulong( void *ptr1, void *ptr2, int n )
518 {
519 int i;
520 cl_ulong *inptr = (cl_ulong *)ptr1;
521 cl_ulong *outptr = (cl_ulong *)ptr2;
522
523 for (i=0; i<n; i++){
524 if ( outptr[i] != inptr[i] )
525 return -1;
526 }
527
528 return 0;
529 }
530
531
verify_fill_float(void * ptr1,void * ptr2,int n)532 static int verify_fill_float( void *ptr1, void *ptr2, int n )
533 {
534 int i;
535 cl_float *inptr = (cl_float *)ptr1;
536 cl_float *outptr = (cl_float *)ptr2;
537
538 for (i=0; i<n; i++){
539 if ( outptr[i] != inptr[i] )
540 return -1;
541 }
542
543 return 0;
544 }
545
546
verify_fill_struct(void * ptr1,void * ptr2,int n)547 static int verify_fill_struct( void *ptr1, void *ptr2, int n )
548 {
549 int i;
550 TestStruct *inptr = (TestStruct *)ptr1;
551 TestStruct *outptr = (TestStruct *)ptr2;
552
553 for (i=0; i<n; i++){
554 if ( ( outptr[i].a != inptr[i].a ) || ( outptr[i].b != outptr[i].b ) )
555 return -1;
556 }
557
558 return 0;
559 }
560
561
562
test_buffer_fill(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,void * inptr[5],void * hostptr[5],void * pattern[5],size_t offset_elements,size_t fill_elements,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,void *,int))563 int test_buffer_fill( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type,
564 int loops, void *inptr[5], void *hostptr[5], void *pattern[5], size_t offset_elements, size_t fill_elements,
565 const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int) )
566 {
567 cl_mem buffers[10];
568 void *outptr[5];
569 cl_program program[5];
570 cl_kernel kernel[5];
571 cl_event event[2];
572 size_t ptrSizes[5];
573 size_t global_work_size[3];
574 #ifdef USE_LOCAL_WORK_GROUP
575 size_t local_work_size[3];
576 #endif
577 int err;
578 int i, ii;
579 int src_flag_id;
580 int total_errors = 0;
581
582 size_t min_alignment = get_min_alignment(context);
583
584 global_work_size[0] = (size_t)num_elements;
585
586 ptrSizes[0] = size;
587 ptrSizes[1] = ptrSizes[0] << 1;
588 ptrSizes[2] = ptrSizes[1] << 1;
589 ptrSizes[3] = ptrSizes[2] << 1;
590 ptrSizes[4] = ptrSizes[3] << 1;
591
592 for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
593 log_info("Testing with cl_mem_flags: %s\n", flag_set_names[src_flag_id]);
594
595 loops = ( loops < 5 ? loops : 5 );
596 for ( i = 0; i < loops; i++ ){
597 ii = i << 1;
598 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
599 buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, hostptr[i], &err);
600 else
601 buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, NULL, &err);
602 if ( !buffers[ii] || err){
603 print_error(err, "clCreateBuffer failed\n" );
604 return -1;
605 }
606 // Initialize source buffer with 0, since the validation code expects 0(s) outside of the fill region.
607 if (!((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))) {
608 err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, hostptr[i], 0, NULL, NULL);
609 if ( err != CL_SUCCESS ){
610 print_error(err, "clEnqueueWriteBuffer failed\n" );
611 return -1;
612 }
613 }
614
615 outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
616 memset(outptr[i], 0, ptrSizes[i] * num_elements);
617 buffers[ii+1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, ptrSizes[i] * num_elements, outptr[i], &err);
618 if ( !buffers[ii+1] || err){
619 print_error(err, "clCreateBuffer failed\n" );
620 clReleaseMemObject( buffers[ii] );
621 align_free( outptr[i] );
622 return -1;
623 }
624
625 err = clEnqueueFillBuffer(queue, buffers[ii], pattern[i], ptrSizes[i],
626 ptrSizes[i] * offset_elements, ptrSizes[i] * fill_elements,
627 0, NULL, &(event[0]));
628 /* uncomment for test debugging
629 err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &(event[0]));
630 */
631 if ( err != CL_SUCCESS ){
632 print_error( err, " clEnqueueFillBuffer failed" );
633 clReleaseMemObject( buffers[ii] );
634 clReleaseMemObject( buffers[ii+1] );
635 align_free( outptr[i] );
636 return -1;
637 }
638
639 err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
640 if ( err ){
641 log_error( " Error creating program for %s\n", type );
642 clReleaseMemObject( buffers[ii] );
643 clReleaseMemObject( buffers[ii+1] );
644 align_free( outptr[i] );
645 return -1;
646 }
647
648 #ifdef USE_LOCAL_WORK_GROUP
649 err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
650 test_error( err, "Unable to get work group size to use" );
651 #endif
652
653 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
654 err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
655 if ( err != CL_SUCCESS ){
656 print_error( err, "clSetKernelArg failed" );
657 clReleaseKernel( kernel[i] );
658 clReleaseProgram( program[i] );
659 clReleaseMemObject( buffers[ii] );
660 clReleaseMemObject( buffers[ii+1] );
661 align_free( outptr[i] );
662 return -1;
663 }
664
665 err = clWaitForEvents( 1, &(event[0]) );
666 if ( err != CL_SUCCESS ){
667 print_error( err, "clWaitForEvents() failed" );
668 clReleaseKernel( kernel[i] );
669 clReleaseProgram( program[i] );
670 clReleaseMemObject( buffers[ii] );
671 clReleaseMemObject( buffers[ii+1] );
672 align_free( outptr[i] );
673 return -1;
674 }
675 clReleaseEvent(event[0]);
676
677 #ifdef USE_LOCAL_WORK_GROUP
678 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
679 #else
680 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
681 #endif
682 if (err != CL_SUCCESS){
683 print_error( err, "clEnqueueNDRangeKernel failed" );
684 return -1;
685 }
686
687 err = clEnqueueReadBuffer( queue, buffers[ii+1], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &(event[1]) );
688 if (err != CL_SUCCESS){
689 print_error( err, "clEnqueueReadBuffer failed" );
690 return -1;
691 }
692
693 err = clWaitForEvents( 1, &(event[1]) );
694 if ( err != CL_SUCCESS ){
695 print_error( err, "clWaitForEvents() failed" );
696 }
697 clReleaseEvent(event[1]);
698
699 if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
700 log_error( " %s%d test failed\n", type, 1<<i );
701 total_errors++;
702 }
703 else{
704 log_info( " %s%d test passed\n", type, 1<<i );
705 }
706
707 // cleanup
708 clReleaseMemObject( buffers[ii] );
709 clReleaseMemObject( buffers[ii+1] );
710 clReleaseKernel( kernel[i] );
711 clReleaseProgram( program[i] );
712 align_free( outptr[i] );
713 }
714 } // src cl_mem_flag
715
716 return total_errors;
717
718 } // end test_buffer_fill()
719
720
test_buffer_fill_struct(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)721 int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
722 {
723 cl_mem buffers[2];
724 void *outptr;
725 TestStruct *inptr;
726 TestStruct *hostptr;
727 TestStruct *pattern;
728 cl_program program;
729 cl_kernel kernel;
730 cl_event event[2];
731 size_t ptrSize = sizeof( TestStruct );
732 size_t global_work_size[3];
733 #ifdef USE_LOCAL_WORK_GROUP
734 size_t local_work_size[3];
735 #endif
736 int n, err;
737 size_t j, offset_elements, fill_elements;
738 int src_flag_id;
739 int total_errors = 0;
740 MTdata d = init_genrand( gRandomSeed );
741
742 size_t min_alignment = get_min_alignment(context);
743
744 global_work_size[0] = (size_t)num_elements;
745
746 // Test with random offsets and fill sizes
747 for ( n = 0; n < 8; n++ ){
748 offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
749 fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
750 log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
751
752 pattern = (TestStruct *)malloc(ptrSize);
753 pattern->a = (cl_int)genrand_int32(d);
754 pattern->b = (cl_float)get_random_float( -FLT_MAX, FLT_MAX, d );
755
756 inptr = (TestStruct *)align_malloc(ptrSize * num_elements, min_alignment);
757 for ( j = 0; j < offset_elements; j++ ) {
758 inptr[j].a = 0;
759 inptr[j].b =0;
760 }
761 for ( j = offset_elements; j < offset_elements + fill_elements; j++ ) {
762 inptr[j].a = pattern->a;
763 inptr[j].b = pattern->b;
764 }
765 for ( j = offset_elements + fill_elements; j < (size_t)num_elements; j++ ) {
766 inptr[j].a = 0;
767 inptr[j].b = 0;
768 }
769
770 hostptr = (TestStruct *)align_malloc(ptrSize * num_elements, min_alignment);
771 memset(hostptr, 0, ptrSize * num_elements);
772
773 for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
774 log_info("Testing with cl_mem_flags: %s\n", flag_set_names[src_flag_id]);
775
776 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
777 buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], ptrSize * num_elements, hostptr, &err);
778 else
779 buffers[0] = clCreateBuffer(context, flag_set[src_flag_id], ptrSize * num_elements, NULL, &err);
780 if ( err ){
781 print_error(err, " clCreateBuffer failed\n" );
782 clReleaseEvent( event[0] );
783 clReleaseEvent( event[1] );
784 free( (void *)pattern );
785 align_free( (void *)inptr );
786 align_free( (void *)hostptr );
787 free_mtdata(d);
788 return -1;
789 }
790 if (!((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))) {
791 err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, ptrSize * num_elements, hostptr, 0, NULL, NULL);
792 if ( err != CL_SUCCESS ){
793 print_error(err, " clEnqueueWriteBuffer failed\n" );
794 clReleaseEvent( event[0] );
795 clReleaseEvent( event[1] );
796 free( (void *)pattern );
797 align_free( (void *)inptr );
798 align_free( (void *)hostptr );
799 free_mtdata(d);
800 return -1;
801 }
802 }
803 outptr = align_malloc( ptrSize * num_elements, min_alignment);
804 memset(outptr, 0, ptrSize * num_elements);
805 buffers[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, ptrSize * num_elements, outptr, &err);
806 if ( ! buffers[1] || err){
807 print_error(err, " clCreateBuffer failed\n" );
808 align_free( outptr );
809 clReleaseMemObject(buffers[0]);
810 clReleaseEvent( event[0] );
811 clReleaseEvent( event[1] );
812 free( (void *)pattern );
813 align_free( (void *)inptr );
814 align_free( (void *)hostptr );
815 free_mtdata(d);
816 return -1;
817 }
818
819 err = clEnqueueFillBuffer(queue, buffers[0], pattern, ptrSize,
820 ptrSize * offset_elements, ptrSize * fill_elements,
821 0, NULL, &(event[0]));
822 /* uncomment for test debugging
823 err = clEnqueueWriteBuffer(queue, buffers[0], CL_FALSE, 0, ptrSize * num_elements, inptr, 0, NULL, &(event[0]));
824 */
825 if ( err != CL_SUCCESS ){
826 print_error( err, " clEnqueueFillBuffer failed" );
827 align_free( outptr );
828 clReleaseMemObject(buffers[0]);
829 clReleaseMemObject(buffers[1]);
830 clReleaseEvent( event[0] );
831 clReleaseEvent( event[1] );
832 free( (void *)pattern );
833 align_free( (void *)inptr );
834 align_free( (void *)hostptr );
835 free_mtdata(d);
836 return -1;
837 }
838
839 err = create_single_kernel_helper( context, &program, &kernel, 1, &struct_kernel_code, "read_fill_struct" );
840 if ( err ){
841 log_error( " Error creating program for struct\n" );
842 align_free( outptr );
843 clReleaseMemObject(buffers[0]);
844 clReleaseMemObject(buffers[1]);
845 clReleaseEvent( event[0] );
846 clReleaseEvent( event[1] );
847 free( (void *)pattern );
848 align_free( (void *)inptr );
849 align_free( (void *)hostptr );
850 free_mtdata(d);
851 return -1;
852 }
853
854 #ifdef USE_LOCAL_WORK_GROUP
855 err = get_max_common_work_group_size( context, kernel, global_work_size[0], &local_work_size[0] );
856 test_error( err, "Unable to get work group size to use" );
857 #endif
858
859 err = clSetKernelArg( kernel, 0, sizeof( cl_mem ), (void *)&buffers[0] );
860 err |= clSetKernelArg( kernel, 1, sizeof( cl_mem ), (void *)&buffers[1] );
861 if ( err != CL_SUCCESS ){
862 print_error( err, " clSetKernelArg failed" );
863 clReleaseKernel( kernel );
864 clReleaseProgram( program );
865 align_free( outptr );
866 clReleaseMemObject(buffers[0]);
867 clReleaseMemObject(buffers[1]);
868 clReleaseEvent( event[0] );
869 clReleaseEvent( event[1] );
870 free( (void *)pattern );
871 align_free( (void *)inptr );
872 align_free( (void *)hostptr );
873 free_mtdata(d);
874 return -1;
875 }
876
877 err = clWaitForEvents( 1, &(event[0]) );
878 if ( err != CL_SUCCESS ){
879 print_error( err, "clWaitForEvents() failed" );
880 clReleaseKernel( kernel );
881 clReleaseProgram( program );
882 align_free( outptr );
883 clReleaseMemObject(buffers[0]);
884 clReleaseMemObject(buffers[1]);
885 clReleaseEvent( event[0] );
886 clReleaseEvent( event[1] );
887 free( (void *)pattern );
888 align_free( (void *)inptr );
889 align_free( (void *)hostptr );
890 free_mtdata(d);
891 return -1;
892 }
893 clReleaseEvent( event[0] );
894
895 #ifdef USE_LOCAL_WORK_GROUP
896 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
897 #else
898 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_work_size, NULL, 0, NULL, NULL );
899 #endif
900 if ( err != CL_SUCCESS ){
901 print_error( err, " clEnqueueNDRangeKernel failed" );
902 clReleaseKernel( kernel );
903 clReleaseProgram( program );
904 align_free( outptr );
905 clReleaseMemObject(buffers[0]);
906 clReleaseMemObject(buffers[1]);
907 clReleaseEvent( event[0] );
908 clReleaseEvent( event[1] );
909 free( (void *)pattern );
910 align_free( (void *)inptr );
911 align_free( (void *)hostptr );
912 free_mtdata(d);
913 return -1;
914 }
915
916 err = clEnqueueReadBuffer( queue, buffers[1], CL_FALSE, 0, ptrSize * num_elements, outptr, 0, NULL, &(event[1]) );
917 if ( err != CL_SUCCESS ){
918 print_error( err, " clEnqueueReadBuffer failed" );
919 clReleaseKernel( kernel );
920 clReleaseProgram( program );
921 align_free( outptr );
922 clReleaseMemObject(buffers[0]);
923 clReleaseMemObject(buffers[1]);
924 clReleaseEvent( event[0] );
925 clReleaseEvent( event[1] );
926 free( (void *)pattern );
927 align_free( (void *)inptr );
928 align_free( (void *)hostptr );
929 free_mtdata(d);
930 return -1;
931 }
932
933 err = clWaitForEvents( 1, &(event[1]) );
934 if ( err != CL_SUCCESS ){
935 print_error( err, "clWaitForEvents() failed" );
936 }
937 clReleaseEvent( event[1] );
938
939 if ( verify_fill_struct( inptr, outptr, num_elements) ) {
940 log_error( " buffer_FILL async struct test failed\n" );
941 total_errors++;
942 }
943 else{
944 log_info( " buffer_FILL async struct test passed\n" );
945 }
946 // cleanup
947 clReleaseKernel( kernel );
948 clReleaseProgram( program );
949 align_free( outptr );
950 clReleaseMemObject( buffers[0] );
951 clReleaseMemObject( buffers[1] );
952 } // src cl_mem_flag
953 free( (void *)pattern );
954 align_free( (void *)inptr );
955 align_free( (void *)hostptr );
956 }
957
958 free_mtdata(d);
959
960 return total_errors;
961
962 } // end test_buffer_fill_struct()
963
964
test_buffer_fill_int(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)965 int test_buffer_fill_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
966 {
967 cl_int *inptr[5];
968 cl_int *hostptr[5];
969 cl_int *pattern[5];
970 size_t ptrSizes[5];
971 int n, i, err=0;
972 size_t j, offset_elements, fill_elements;
973 int (*foo)(void *,void *,int);
974 MTdata d = init_genrand( gRandomSeed );
975
976 size_t min_alignment = get_min_alignment(context);
977
978 foo = verify_fill_int;
979
980 ptrSizes[0] = sizeof(cl_int);
981 ptrSizes[1] = ptrSizes[0] << 1;
982 ptrSizes[2] = ptrSizes[1] << 1;
983 ptrSizes[3] = ptrSizes[2] << 1;
984 ptrSizes[4] = ptrSizes[3] << 1;
985
986 // Test with random offsets and fill sizes
987 for ( n = 0; n < 8; n++ ){
988 offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
989 fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
990 log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
991
992 for ( i = 0; i < 5; i++ ){
993 pattern[i] = (cl_int *)malloc(ptrSizes[i]);
994 for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
995 pattern[i][j] = TEST_PRIME_INT;
996
997 inptr[i] = (cl_int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
998 for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
999 inptr[i][j] = 0;
1000 for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1001 inptr[i][j] = TEST_PRIME_INT;
1002 for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1003 inptr[i][j] = 0;
1004
1005 hostptr[i] = (cl_int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1006 memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1007 }
1008
1009 if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_int ), (char*)"int",
1010 5, (void**)inptr, (void**)hostptr, (void**)pattern,
1011 offset_elements, fill_elements,
1012 buffer_fill_int_kernel_code, int_kernel_name, foo ))
1013 err++;
1014
1015 for ( i = 0; i < 5; i++ ){
1016 free( (void *)pattern[i] );
1017 align_free( (void *)inptr[i] );
1018 align_free( (void *)hostptr[i] );
1019 }
1020
1021 }
1022
1023 free_mtdata(d);
1024
1025 return err;
1026
1027 } // end test_buffer_int_fill()
1028
1029
test_buffer_fill_uint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1030 int test_buffer_fill_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1031 {
1032 cl_uint *inptr[5];
1033 cl_uint *hostptr[5];
1034 cl_uint *pattern[5];
1035 size_t ptrSizes[5];
1036 int n, i, err=0;
1037 size_t j, offset_elements, fill_elements;
1038 MTdata d = init_genrand( gRandomSeed );
1039 int (*foo)(void *,void *,int);
1040
1041 size_t min_alignment = get_min_alignment(context);
1042
1043 foo = verify_fill_uint;
1044
1045 ptrSizes[0] = sizeof(cl_uint);
1046 ptrSizes[1] = ptrSizes[0] << 1;
1047 ptrSizes[2] = ptrSizes[1] << 1;
1048 ptrSizes[3] = ptrSizes[2] << 1;
1049 ptrSizes[4] = ptrSizes[3] << 1;
1050
1051 // Test with random offsets and fill sizes
1052 for ( n = 0; n < 8; n++ ){
1053 offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1054 fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1055 log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1056
1057 for ( i = 0; i < 5; i++ ){
1058 pattern[i] = (cl_uint *)malloc(ptrSizes[i]);
1059 for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1060 pattern[i][j] = TEST_PRIME_UINT;
1061
1062 inptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1063 for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1064 inptr[i][j] = 0;
1065 for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1066 inptr[i][j] = TEST_PRIME_UINT;
1067 for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1068 inptr[i][j] = 0;
1069
1070 hostptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1071 memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1072 }
1073
1074 if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_uint ), (char*)"uint",
1075 5, (void**)inptr, (void**)hostptr, (void**)pattern,
1076 offset_elements, fill_elements,
1077 buffer_fill_uint_kernel_code, uint_kernel_name, foo ))
1078 err++;
1079
1080 for ( i = 0; i < 5; i++ ){
1081 free( (void *)pattern[i] );
1082 align_free( (void *)inptr[i] );
1083 align_free( (void *)hostptr[i] );
1084 }
1085
1086 }
1087
1088 free_mtdata(d);
1089
1090 return err;
1091
1092 } // end test_buffer_uint_fill()
1093
1094
test_buffer_fill_short(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1095 int test_buffer_fill_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1096 {
1097 cl_short *inptr[5];
1098 cl_short *hostptr[5];
1099 cl_short *pattern[5];
1100 size_t ptrSizes[5];
1101 int n, i, err=0;
1102 size_t j, offset_elements, fill_elements;
1103 MTdata d = init_genrand( gRandomSeed );
1104 int (*foo)(void *,void *,int);
1105
1106 size_t min_alignment = get_min_alignment(context);
1107
1108 foo = verify_fill_short;
1109
1110 ptrSizes[0] = sizeof(cl_short);
1111 ptrSizes[1] = ptrSizes[0] << 1;
1112 ptrSizes[2] = ptrSizes[1] << 1;
1113 ptrSizes[3] = ptrSizes[2] << 1;
1114 ptrSizes[4] = ptrSizes[3] << 1;
1115
1116 // Test with random offsets and fill sizes
1117 for ( n = 0; n < 8; n++ ){
1118 offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1119 fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1120 log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1121
1122 for ( i = 0; i < 5; i++ ){
1123 pattern[i] = (cl_short *)malloc(ptrSizes[i]);
1124 for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1125 pattern[i][j] = TEST_PRIME_SHORT;
1126
1127 inptr[i] = (cl_short *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1128 for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1129 inptr[i][j] = 0;
1130 for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1131 inptr[i][j] = TEST_PRIME_SHORT;
1132 for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1133 inptr[i][j] = 0;
1134
1135 hostptr[i] = (cl_short *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1136 memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1137 }
1138
1139 if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_short ), (char*)"short",
1140 5, (void**)inptr, (void**)hostptr, (void**)pattern,
1141 offset_elements, fill_elements,
1142 buffer_fill_short_kernel_code, short_kernel_name, foo ))
1143 err++;
1144
1145 for ( i = 0; i < 5; i++ ){
1146 free( (void *)pattern[i] );
1147 align_free( (void *)inptr[i] );
1148 align_free( (void *)hostptr[i] );
1149 }
1150
1151 }
1152
1153 free_mtdata(d);
1154
1155 return err;
1156
1157 } // end test_buffer_short_fill()
1158
1159
test_buffer_fill_ushort(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1160 int test_buffer_fill_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1161 {
1162 cl_ushort *inptr[5];
1163 cl_ushort *hostptr[5];
1164 cl_ushort *pattern[5];
1165 size_t ptrSizes[5];
1166 int n, i, err=0;
1167 size_t j, offset_elements, fill_elements;
1168 MTdata d = init_genrand( gRandomSeed );
1169 int (*foo)(void *,void *,int);
1170
1171 size_t min_alignment = get_min_alignment(context);
1172
1173 foo = verify_fill_ushort;
1174
1175 ptrSizes[0] = sizeof(cl_ushort);
1176 ptrSizes[1] = ptrSizes[0] << 1;
1177 ptrSizes[2] = ptrSizes[1] << 1;
1178 ptrSizes[3] = ptrSizes[2] << 1;
1179 ptrSizes[4] = ptrSizes[3] << 1;
1180
1181 // Test with random offsets and fill sizes
1182 for ( n = 0; n < 8; n++ ){
1183 offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1184 fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1185 log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1186
1187 for ( i = 0; i < 5; i++ ){
1188 pattern[i] = (cl_ushort *)malloc(ptrSizes[i]);
1189 for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1190 pattern[i][j] = TEST_PRIME_USHORT;
1191
1192 inptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1193 for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1194 inptr[i][j] = 0;
1195 for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1196 inptr[i][j] = TEST_PRIME_USHORT;
1197 for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1198 inptr[i][j] = 0;
1199
1200 hostptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1201 memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1202 }
1203
1204 if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_ushort ), (char*)"ushort",
1205 5, (void**)inptr, (void**)hostptr, (void**)pattern,
1206 offset_elements, fill_elements,
1207 buffer_fill_ushort_kernel_code, ushort_kernel_name, foo ))
1208 err++;
1209
1210 for ( i = 0; i < 5; i++ ){
1211 free( (void *)pattern[i] );
1212 align_free( (void *)inptr[i] );
1213 align_free( (void *)hostptr[i] );
1214 }
1215
1216 }
1217
1218 free_mtdata(d);
1219
1220 return err;
1221
1222 } // end test_buffer_ushort_fill()
1223
1224
test_buffer_fill_char(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1225 int test_buffer_fill_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1226 {
1227 cl_char *inptr[5];
1228 cl_char *hostptr[5];
1229 cl_char *pattern[5];
1230 size_t ptrSizes[5];
1231 int n, i, err=0;
1232 size_t j, offset_elements, fill_elements;
1233 MTdata d = init_genrand( gRandomSeed );
1234 int (*foo)(void *,void *,int);
1235
1236 size_t min_alignment = get_min_alignment(context);
1237
1238 foo = verify_fill_char;
1239
1240 ptrSizes[0] = sizeof(cl_char);
1241 ptrSizes[1] = ptrSizes[0] << 1;
1242 ptrSizes[2] = ptrSizes[1] << 1;
1243 ptrSizes[3] = ptrSizes[2] << 1;
1244 ptrSizes[4] = ptrSizes[3] << 1;
1245
1246 // Test with random offsets and fill sizes
1247 for ( n = 0; n < 8; n++ ){
1248 offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1249 fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1250 log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1251
1252 for ( i = 0; i < 5; i++ ){
1253 pattern[i] = (cl_char *)malloc(ptrSizes[i]);
1254 for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1255 pattern[i][j] = TEST_PRIME_CHAR;
1256
1257 inptr[i] = (cl_char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1258 for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1259 inptr[i][j] = 0;
1260 for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1261 inptr[i][j] = TEST_PRIME_CHAR;
1262 for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1263 inptr[i][j] = 0;
1264
1265 hostptr[i] = (cl_char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1266 memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1267 }
1268
1269 if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_char ), (char*)"char",
1270 5, (void**)inptr, (void**)hostptr, (void**)pattern,
1271 offset_elements, fill_elements,
1272 buffer_fill_char_kernel_code, char_kernel_name, foo ))
1273 err++;
1274
1275 for ( i = 0; i < 5; i++ ){
1276 free( (void *)pattern[i] );
1277 align_free( (void *)inptr[i] );
1278 align_free( (void *)hostptr[i] );
1279 }
1280
1281 }
1282
1283 free_mtdata(d);
1284
1285 return err;
1286
1287 } // end test_buffer_char_fill()
1288
1289
test_buffer_fill_uchar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1290 int test_buffer_fill_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1291 {
1292 cl_uchar *inptr[5];
1293 cl_uchar *hostptr[5];
1294 cl_uchar *pattern[5];
1295 size_t ptrSizes[5];
1296 int n, i, err=0;
1297 size_t j, offset_elements, fill_elements;
1298 MTdata d = init_genrand( gRandomSeed );
1299 int (*foo)(void *,void *,int);
1300
1301 size_t min_alignment = get_min_alignment(context);
1302
1303 foo = verify_fill_uchar;
1304
1305 ptrSizes[0] = sizeof(cl_uchar);
1306 ptrSizes[1] = ptrSizes[0] << 1;
1307 ptrSizes[2] = ptrSizes[1] << 1;
1308 ptrSizes[3] = ptrSizes[2] << 1;
1309 ptrSizes[4] = ptrSizes[3] << 1;
1310
1311 // Test with random offsets and fill sizes
1312 for ( n = 0; n < 8; n++ ){
1313 offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1314 fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1315 log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1316
1317 for ( i = 0; i < 5; i++ ){
1318 pattern[i] = (cl_uchar *)malloc(ptrSizes[i]);
1319 for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1320 pattern[i][j] = TEST_PRIME_CHAR;
1321
1322 inptr[i] = (cl_uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1323 for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1324 inptr[i][j] = 0;
1325 for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1326 inptr[i][j] = TEST_PRIME_CHAR;
1327 for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1328 inptr[i][j] = 0;
1329
1330 hostptr[i] = (cl_uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1331 memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1332 }
1333
1334 if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_uchar ), (char*)"uchar",
1335 5, (void**)inptr, (void**)hostptr, (void**)pattern,
1336 offset_elements, fill_elements,
1337 buffer_fill_uchar_kernel_code, uchar_kernel_name, foo ))
1338 err++;
1339
1340 for ( i = 0; i < 5; i++ ){
1341 free( (void *)pattern[i] );
1342 align_free( (void *)inptr[i] );
1343 align_free( (void *)hostptr[i] );
1344 }
1345
1346 }
1347
1348 free_mtdata(d);
1349
1350 return err;
1351
1352 } // end test_buffer_uchar_fill()
1353
1354
test_buffer_fill_long(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1355 int test_buffer_fill_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1356 {
1357 cl_long *inptr[5];
1358 cl_long *hostptr[5];
1359 cl_long *pattern[5];
1360 size_t ptrSizes[5];
1361 int n, i, err=0;
1362 size_t j, offset_elements, fill_elements;
1363 MTdata d = init_genrand( gRandomSeed );
1364 int (*foo)(void *,void *,int);
1365
1366 size_t min_alignment = get_min_alignment(context);
1367
1368 foo = verify_fill_long;
1369
1370 ptrSizes[0] = sizeof(cl_long);
1371 ptrSizes[1] = ptrSizes[0] << 1;
1372 ptrSizes[2] = ptrSizes[1] << 1;
1373 ptrSizes[3] = ptrSizes[2] << 1;
1374 ptrSizes[4] = ptrSizes[3] << 1;
1375
1376 //skip devices that don't support long
1377 if (! gHasLong )
1378 {
1379 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
1380 return CL_SUCCESS;
1381 }
1382
1383 // Test with random offsets and fill sizes
1384 for ( n = 0; n < 8; n++ ){
1385 offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1386 fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1387 log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1388
1389 for ( i = 0; i < 5; i++ ){
1390 pattern[i] = (cl_long *)malloc(ptrSizes[i]);
1391 for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1392 pattern[i][j] = TEST_PRIME_LONG;
1393
1394 inptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1395 for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1396 inptr[i][j] = 0;
1397 for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1398 inptr[i][j] = TEST_PRIME_LONG;
1399 for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1400 inptr[i][j] = 0;
1401
1402 hostptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1403 memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1404 }
1405
1406 if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_long ), (char*)"long",
1407 5, (void**)inptr, (void**)hostptr, (void**)pattern,
1408 offset_elements, fill_elements,
1409 buffer_fill_long_kernel_code, long_kernel_name, foo ))
1410 err++;
1411
1412 for ( i = 0; i < 5; i++ ){
1413 free( (void *)pattern[i] );
1414 align_free( (void *)inptr[i] );
1415 align_free( (void *)hostptr[i] );
1416 }
1417
1418 }
1419
1420 free_mtdata(d);
1421
1422 return err;
1423
1424 } // end test_buffer_long_fill()
1425
1426
test_buffer_fill_ulong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1427 int test_buffer_fill_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1428 {
1429 cl_ulong *inptr[5];
1430 cl_ulong *hostptr[5];
1431 cl_ulong *pattern[5];
1432 size_t ptrSizes[5];
1433 int n, i, err=0;
1434 size_t j, offset_elements, fill_elements;
1435 MTdata d = init_genrand( gRandomSeed );
1436 int (*foo)(void *,void *,int);
1437
1438 size_t min_alignment = get_min_alignment(context);
1439
1440 foo = verify_fill_ulong;
1441
1442 ptrSizes[0] = sizeof(cl_ulong);
1443 ptrSizes[1] = ptrSizes[0] << 1;
1444 ptrSizes[2] = ptrSizes[1] << 1;
1445 ptrSizes[3] = ptrSizes[2] << 1;
1446 ptrSizes[4] = ptrSizes[3] << 1;
1447
1448 if (! gHasLong )
1449 {
1450 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
1451 return CL_SUCCESS;
1452 }
1453
1454 // Test with random offsets and fill sizes
1455 for ( n = 0; n < 8; n++ ){
1456 offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1457 fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1458 log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1459
1460 for ( i = 0; i < 5; i++ ){
1461 pattern[i] = (cl_ulong *)malloc(ptrSizes[i]);
1462 for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1463 pattern[i][j] = TEST_PRIME_ULONG;
1464
1465 inptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1466 for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1467 inptr[i][j] = 0;
1468 for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1469 inptr[i][j] = TEST_PRIME_ULONG;
1470 for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1471 inptr[i][j] = 0;
1472
1473 hostptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1474 memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1475 }
1476
1477 if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_ulong ), (char*)"ulong",
1478 5, (void**)inptr, (void**)hostptr, (void**)pattern,
1479 offset_elements, fill_elements,
1480 buffer_fill_ulong_kernel_code, ulong_kernel_name, foo ))
1481 err++;
1482
1483 for ( i = 0; i < 5; i++ ){
1484 free( (void *)pattern[i] );
1485 align_free( (void *)inptr[i] );
1486 align_free( (void *)hostptr[i] );
1487 }
1488
1489 }
1490
1491 free_mtdata(d);
1492
1493 return err;
1494
1495 } // end test_buffer_ulong_fill()
1496
1497
test_buffer_fill_float(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1498 int test_buffer_fill_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1499 {
1500 cl_float *inptr[5];
1501 cl_float *hostptr[5];
1502 cl_float *pattern[5];
1503 size_t ptrSizes[5];
1504 int n, i, err=0;
1505 size_t j, offset_elements, fill_elements;
1506 MTdata d = init_genrand( gRandomSeed );
1507 int (*foo)(void *,void *,int);
1508
1509 size_t min_alignment = get_min_alignment(context);
1510
1511 foo = verify_fill_float;
1512
1513 ptrSizes[0] = sizeof(cl_float);
1514 ptrSizes[1] = ptrSizes[0] << 1;
1515 ptrSizes[2] = ptrSizes[1] << 1;
1516 ptrSizes[3] = ptrSizes[2] << 1;
1517 ptrSizes[4] = ptrSizes[3] << 1;
1518
1519 // Test with random offsets and fill sizes
1520 for ( n = 0; n < 8; n++ ){
1521 offset_elements = (size_t)get_random_float( 0.f, (float)(num_elements - 8), d );
1522 fill_elements = (size_t)get_random_float( 8.f, (float)(num_elements - offset_elements), d );
1523 log_info( "Testing random fill from offset %d for %d elements: \n", (int)offset_elements, (int)fill_elements );
1524
1525 for ( i = 0; i < 5; i++ ){
1526 pattern[i] = (cl_float *)malloc(ptrSizes[i]);
1527 for ( j = 0; j < ptrSizes[i] / ptrSizes[0]; j++ )
1528 pattern[i][j] = TEST_PRIME_FLOAT;
1529
1530 inptr[i] = (cl_float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1531 for ( j = 0; j < ptrSizes[i] * offset_elements / ptrSizes[0]; j++ )
1532 inptr[i][j] = 0;
1533 for ( j = ptrSizes[i] * offset_elements / ptrSizes[0]; j < ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j++ )
1534 inptr[i][j] = TEST_PRIME_FLOAT;
1535 for ( j = ptrSizes[i] * (offset_elements + fill_elements) / ptrSizes[0]; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1536 inptr[i][j] = 0;
1537
1538 hostptr[i] = (cl_float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1539 memset(hostptr[i], 0, ptrSizes[i] * num_elements);
1540 }
1541
1542 if (test_buffer_fill( deviceID, context, queue, num_elements, sizeof( cl_float ), (char*)"float",
1543 5, (void**)inptr, (void**)hostptr, (void**)pattern,
1544 offset_elements, fill_elements,
1545 buffer_fill_float_kernel_code, float_kernel_name, foo ))
1546 err++;
1547
1548 for ( i = 0; i < 5; i++ ){
1549 free( (void *)pattern[i] );
1550 align_free( (void *)inptr[i] );
1551 align_free( (void *)hostptr[i] );
1552 }
1553
1554 }
1555
1556 free_mtdata(d);
1557
1558 return err;
1559
1560 } // end test_buffer_float_fill()
1561