1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <time.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <CL/cl_half.h>
25
26 #include "procs.h"
27
28 //#define HK_DO_NOT_RUN_SHORT_ASYNC 1
29 //#define HK_DO_NOT_RUN_USHORT_ASYNC 1
30 //#define HK_DO_NOT_RUN_CHAR_ASYNC 1
31 //#define HK_DO_NOT_RUN_UCHAR_ASYNC 1
32
33 #define TEST_PRIME_INT ((1<<16)+1)
34 #define TEST_PRIME_UINT ((1U<<16)+1U)
35 #define TEST_PRIME_LONG ((1LL<<32)+1LL)
36 #define TEST_PRIME_ULONG ((1ULL<<32)+1ULL)
37 #define TEST_PRIME_SHORT ((1S<<8)+1S)
38 #define TEST_PRIME_FLOAT (float)3.40282346638528860e+38
39 #define TEST_PRIME_HALF 119.f
40 #define TEST_BOOL true
41 #define TEST_PRIME_CHAR 0x77
42
43 #ifndef ulong
44 typedef unsigned long ulong;
45 #endif
46
47 #ifndef uchar
48 typedef unsigned char uchar;
49 #endif
50
51 #ifndef TestStruct
52 typedef struct{
53 int a;
54 float b;
55 } TestStruct;
56 #endif
57
58 //--- the code for the kernel executables
59 static const char *buffer_read_int_kernel_code[] = {
60 "__kernel void test_buffer_read_int(__global int *dst)\n"
61 "{\n"
62 " int tid = get_global_id(0);\n"
63 "\n"
64 " dst[tid] = ((1<<16)+1);\n"
65 "}\n",
66
67 "__kernel void test_buffer_read_int2(__global int2 *dst)\n"
68 "{\n"
69 " int tid = get_global_id(0);\n"
70 "\n"
71 " dst[tid] = ((1<<16)+1);\n"
72 "}\n",
73
74 "__kernel void test_buffer_read_int4(__global int4 *dst)\n"
75 "{\n"
76 " int tid = get_global_id(0);\n"
77 "\n"
78 " dst[tid] = ((1<<16)+1);\n"
79 "}\n",
80
81 "__kernel void test_buffer_read_int8(__global int8 *dst)\n"
82 "{\n"
83 " int tid = get_global_id(0);\n"
84 "\n"
85 " dst[tid] = ((1<<16)+1);\n"
86 "}\n",
87
88 "__kernel void test_buffer_read_int16(__global int16 *dst)\n"
89 "{\n"
90 " int tid = get_global_id(0);\n"
91 "\n"
92 " dst[tid] = ((1<<16)+1);\n"
93 "}\n" };
94
95 static const char *int_kernel_name[] = { "test_buffer_read_int", "test_buffer_read_int2", "test_buffer_read_int4", "test_buffer_read_int8", "test_buffer_read_int16" };
96
97 static const char *buffer_read_uint_kernel_code[] = {
98 "__kernel void test_buffer_read_uint(__global uint *dst)\n"
99 "{\n"
100 " int tid = get_global_id(0);\n"
101 "\n"
102 " dst[tid] = ((1U<<16)+1U);\n"
103 "}\n",
104
105 "__kernel void test_buffer_read_uint2(__global uint2 *dst)\n"
106 "{\n"
107 " int tid = get_global_id(0);\n"
108 "\n"
109 " dst[tid] = ((1U<<16)+1U);\n"
110 "}\n",
111
112 "__kernel void test_buffer_read_uint4(__global uint4 *dst)\n"
113 "{\n"
114 " int tid = get_global_id(0);\n"
115 "\n"
116 " dst[tid] = ((1U<<16)+1U);\n"
117 "}\n",
118
119 "__kernel void test_buffer_read_uint8(__global uint8 *dst)\n"
120 "{\n"
121 " int tid = get_global_id(0);\n"
122 "\n"
123 " dst[tid] = ((1U<<16)+1U);\n"
124 "}\n",
125
126 "__kernel void test_buffer_read_uint16(__global uint16 *dst)\n"
127 "{\n"
128 " int tid = get_global_id(0);\n"
129 "\n"
130 " dst[tid] = ((1U<<16)+1U);\n"
131 "}\n" };
132
133 static const char *uint_kernel_name[] = { "test_buffer_read_uint", "test_buffer_read_uint2", "test_buffer_read_uint4", "test_buffer_read_uint8", "test_buffer_read_uint16" };
134
135 static const char *buffer_read_long_kernel_code[] = {
136 "__kernel void test_buffer_read_long(__global long *dst)\n"
137 "{\n"
138 " int tid = get_global_id(0);\n"
139 "\n"
140 " dst[tid] = ((1L<<32)+1L);\n"
141 "}\n",
142
143 "__kernel void test_buffer_read_long2(__global long2 *dst)\n"
144 "{\n"
145 " int tid = get_global_id(0);\n"
146 "\n"
147 " dst[tid] = ((1L<<32)+1L);\n"
148 "}\n",
149
150 "__kernel void test_buffer_read_long4(__global long4 *dst)\n"
151 "{\n"
152 " int tid = get_global_id(0);\n"
153 "\n"
154 " dst[tid] = ((1L<<32)+1L);\n"
155 "}\n",
156
157 "__kernel void test_buffer_read_long8(__global long8 *dst)\n"
158 "{\n"
159 " int tid = get_global_id(0);\n"
160 "\n"
161 " dst[tid] = ((1L<<32)+1L);\n"
162 "}\n",
163
164 "__kernel void test_buffer_read_long16(__global long16 *dst)\n"
165 "{\n"
166 " int tid = get_global_id(0);\n"
167 "\n"
168 " dst[tid] = ((1L<<32)+1L);\n"
169 "}\n" };
170
171 static const char *long_kernel_name[] = { "test_buffer_read_long", "test_buffer_read_long2", "test_buffer_read_long4", "test_buffer_read_long8", "test_buffer_read_long16" };
172
173 static const char *buffer_read_ulong_kernel_code[] = {
174 "__kernel void test_buffer_read_ulong(__global ulong *dst)\n"
175 "{\n"
176 " int tid = get_global_id(0);\n"
177 "\n"
178 " dst[tid] = ((1UL<<32)+1UL);\n"
179 "}\n",
180
181 "__kernel void test_buffer_read_ulong2(__global ulong2 *dst)\n"
182 "{\n"
183 " int tid = get_global_id(0);\n"
184 "\n"
185 " dst[tid] = ((1UL<<32)+1UL);\n"
186 "}\n",
187
188 "__kernel void test_buffer_read_ulong4(__global ulong4 *dst)\n"
189 "{\n"
190 " int tid = get_global_id(0);\n"
191 "\n"
192 " dst[tid] = ((1UL<<32)+1UL);\n"
193 "}\n",
194
195 "__kernel void test_buffer_read_ulong8(__global ulong8 *dst)\n"
196 "{\n"
197 " int tid = get_global_id(0);\n"
198 "\n"
199 " dst[tid] = ((1UL<<32)+1UL);\n"
200 "}\n",
201
202 "__kernel void test_buffer_read_ulong16(__global ulong16 *dst)\n"
203 "{\n"
204 " int tid = get_global_id(0);\n"
205 "\n"
206 " dst[tid] = ((1UL<<32)+1UL);\n"
207 "}\n" };
208
209 static const char *ulong_kernel_name[] = { "test_buffer_read_ulong", "test_buffer_read_ulong2", "test_buffer_read_ulong4", "test_buffer_read_ulong8", "test_buffer_read_ulong16" };
210
211 static const char *buffer_read_short_kernel_code[] = {
212 "__kernel void test_buffer_read_short(__global short *dst)\n"
213 "{\n"
214 " int tid = get_global_id(0);\n"
215 "\n"
216 " dst[tid] = (short)((1<<8)+1);\n"
217 "}\n",
218
219 "__kernel void test_buffer_read_short2(__global short2 *dst)\n"
220 "{\n"
221 " int tid = get_global_id(0);\n"
222 "\n"
223 " dst[tid] = (short)((1<<8)+1);\n"
224 "}\n",
225
226 "__kernel void test_buffer_read_short4(__global short4 *dst)\n"
227 "{\n"
228 " int tid = get_global_id(0);\n"
229 "\n"
230 " dst[tid] = (short)((1<<8)+1);\n"
231 "}\n",
232
233 "__kernel void test_buffer_read_short8(__global short8 *dst)\n"
234 "{\n"
235 " int tid = get_global_id(0);\n"
236 "\n"
237 " dst[tid] = (short)((1<<8)+1);\n"
238 "}\n",
239
240 "__kernel void test_buffer_read_short16(__global short16 *dst)\n"
241 "{\n"
242 " int tid = get_global_id(0);\n"
243 "\n"
244 " dst[tid] = (short)((1<<8)+1);\n"
245 "}\n" };
246
247 static const char *short_kernel_name[] = { "test_buffer_read_short", "test_buffer_read_short2", "test_buffer_read_short4", "test_buffer_read_short8", "test_buffer_read_short16" };
248
249
250 static const char *buffer_read_ushort_kernel_code[] = {
251 "__kernel void test_buffer_read_ushort(__global ushort *dst)\n"
252 "{\n"
253 " int tid = get_global_id(0);\n"
254 "\n"
255 " dst[tid] = (ushort)((1<<8)+1);\n"
256 "}\n",
257
258 "__kernel void test_buffer_read_ushort2(__global ushort2 *dst)\n"
259 "{\n"
260 " int tid = get_global_id(0);\n"
261 "\n"
262 " dst[tid] = (ushort)((1<<8)+1);\n"
263 "}\n",
264
265 "__kernel void test_buffer_read_ushort4(__global ushort4 *dst)\n"
266 "{\n"
267 " int tid = get_global_id(0);\n"
268 "\n"
269 " dst[tid] = (ushort)((1<<8)+1);\n"
270 "}\n",
271
272 "__kernel void test_buffer_read_ushort8(__global ushort8 *dst)\n"
273 "{\n"
274 " int tid = get_global_id(0);\n"
275 "\n"
276 " dst[tid] = (ushort)((1<<8)+1);\n"
277 "}\n",
278
279 "__kernel void test_buffer_read_ushort16(__global ushort16 *dst)\n"
280 "{\n"
281 " int tid = get_global_id(0);\n"
282 "\n"
283 " dst[tid] = (ushort)((1<<8)+1);\n"
284 "}\n" };
285
286 static const char *ushort_kernel_name[] = { "test_buffer_read_ushort", "test_buffer_read_ushort2", "test_buffer_read_ushort4", "test_buffer_read_ushort8", "test_buffer_read_ushort16" };
287
288
289 static const char *buffer_read_float_kernel_code[] = {
290 "__kernel void test_buffer_read_float(__global float *dst)\n"
291 "{\n"
292 " int tid = get_global_id(0);\n"
293 "\n"
294 " dst[tid] = (float)3.40282346638528860e+38;\n"
295 "}\n",
296
297 "__kernel void test_buffer_read_float2(__global float2 *dst)\n"
298 "{\n"
299 " int tid = get_global_id(0);\n"
300 "\n"
301 " dst[tid] = (float)3.40282346638528860e+38;\n"
302 "}\n",
303
304 "__kernel void test_buffer_read_float4(__global float4 *dst)\n"
305 "{\n"
306 " int tid = get_global_id(0);\n"
307 "\n"
308 " dst[tid] = (float)3.40282346638528860e+38;\n"
309 "}\n",
310
311 "__kernel void test_buffer_read_float8(__global float8 *dst)\n"
312 "{\n"
313 " int tid = get_global_id(0);\n"
314 "\n"
315 " dst[tid] = (float)3.40282346638528860e+38;\n"
316 "}\n",
317
318 "__kernel void test_buffer_read_float16(__global float16 *dst)\n"
319 "{\n"
320 " int tid = get_global_id(0);\n"
321 "\n"
322 " dst[tid] = (float)3.40282346638528860e+38;\n"
323 "}\n" };
324
325 static const char *float_kernel_name[] = { "test_buffer_read_float", "test_buffer_read_float2", "test_buffer_read_float4", "test_buffer_read_float8", "test_buffer_read_float16" };
326
327
328 static const char *buffer_read_half_kernel_code[] = {
329 "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
330 "__kernel void test_buffer_read_half(__global half *dst)\n"
331 "{\n"
332 " int tid = get_global_id(0);\n"
333 "\n"
334 " dst[tid] = (half)119;\n"
335 "}\n",
336
337 "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
338 "__kernel void test_buffer_read_half2(__global half2 *dst)\n"
339 "{\n"
340 " int tid = get_global_id(0);\n"
341 "\n"
342 " dst[tid] = (half)119;\n"
343 "}\n",
344
345 "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
346 "__kernel void test_buffer_read_half4(__global half4 *dst)\n"
347 "{\n"
348 " int tid = get_global_id(0);\n"
349 "\n"
350 " dst[tid] = (half)119;\n"
351 "}\n",
352
353 "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
354 "__kernel void test_buffer_read_half8(__global half8 *dst)\n"
355 "{\n"
356 " int tid = get_global_id(0);\n"
357 "\n"
358 " dst[tid] = (half)119;\n"
359 "}\n",
360
361 "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
362 "__kernel void test_buffer_read_half16(__global half16 *dst)\n"
363 "{\n"
364 " int tid = get_global_id(0);\n"
365 "\n"
366 " dst[tid] = (half)119;\n"
367 "}\n"
368 };
369
370 static const char *half_kernel_name[] = { "test_buffer_read_half", "test_buffer_read_half2", "test_buffer_read_half4", "test_buffer_read_half8", "test_buffer_read_half16" };
371
372
373 static const char *buffer_read_char_kernel_code[] = {
374 "__kernel void test_buffer_read_char(__global char *dst)\n"
375 "{\n"
376 " int tid = get_global_id(0);\n"
377 "\n"
378 " dst[tid] = (char)'w';\n"
379 "}\n",
380
381 "__kernel void test_buffer_read_char2(__global char2 *dst)\n"
382 "{\n"
383 " int tid = get_global_id(0);\n"
384 "\n"
385 " dst[tid] = (char)'w';\n"
386 "}\n",
387
388 "__kernel void test_buffer_read_char4(__global char4 *dst)\n"
389 "{\n"
390 " int tid = get_global_id(0);\n"
391 "\n"
392 " dst[tid] = (char)'w';\n"
393 "}\n",
394
395 "__kernel void test_buffer_read_char8(__global char8 *dst)\n"
396 "{\n"
397 " int tid = get_global_id(0);\n"
398 "\n"
399 " dst[tid] = (char)'w';\n"
400 "}\n",
401
402 "__kernel void test_buffer_read_char16(__global char16 *dst)\n"
403 "{\n"
404 " int tid = get_global_id(0);\n"
405 "\n"
406 " dst[tid] = (char)'w';\n"
407 "}\n" };
408
409 static const char *char_kernel_name[] = { "test_buffer_read_char", "test_buffer_read_char2", "test_buffer_read_char4", "test_buffer_read_char8", "test_buffer_read_char16" };
410
411
412 static const char *buffer_read_uchar_kernel_code[] = {
413 "__kernel void test_buffer_read_uchar(__global uchar *dst)\n"
414 "{\n"
415 " int tid = get_global_id(0);\n"
416 "\n"
417 " dst[tid] = 'w';\n"
418 "}\n",
419
420 "__kernel void test_buffer_read_uchar2(__global uchar2 *dst)\n"
421 "{\n"
422 " int tid = get_global_id(0);\n"
423 "\n"
424 " dst[tid] = (uchar)'w';\n"
425 "}\n",
426
427 "__kernel void test_buffer_read_uchar4(__global uchar4 *dst)\n"
428 "{\n"
429 " int tid = get_global_id(0);\n"
430 "\n"
431 " dst[tid] = (uchar)'w';\n"
432 "}\n",
433
434 "__kernel void test_buffer_read_uchar8(__global uchar8 *dst)\n"
435 "{\n"
436 " int tid = get_global_id(0);\n"
437 "\n"
438 " dst[tid] = (uchar)'w';\n"
439 "}\n",
440
441 "__kernel void test_buffer_read_uchar16(__global uchar16 *dst)\n"
442 "{\n"
443 " int tid = get_global_id(0);\n"
444 "\n"
445 " dst[tid] = (uchar)'w';\n"
446 "}\n" };
447
448 static const char *uchar_kernel_name[] = { "test_buffer_read_uchar", "test_buffer_read_uchar2", "test_buffer_read_uchar4", "test_buffer_read_uchar8", "test_buffer_read_uchar16" };
449
450
451 static const char *buffer_read_struct_kernel_code =
452 "typedef struct{\n"
453 "int a;\n"
454 "float b;\n"
455 "} TestStruct;\n"
456 "__kernel void test_buffer_read_struct(__global TestStruct *dst)\n"
457 "{\n"
458 " int tid = get_global_id(0);\n"
459 "\n"
460 " dst[tid].a = ((1<<16)+1);\n"
461 " dst[tid].b = (float)3.40282346638528860e+38;\n"
462 "}\n";
463
464
465 //--- the verify functions
verify_read_int(void * ptr,int n)466 static int verify_read_int(void *ptr, int n)
467 {
468 int i;
469 cl_int *outptr = (cl_int *)ptr;
470
471 for (i=0; i<n; i++){
472 if ( outptr[i] != TEST_PRIME_INT )
473 return -1;
474 }
475
476 return 0;
477 }
478
479
verify_read_uint(void * ptr,int n)480 static int verify_read_uint(void *ptr, int n)
481 {
482 int i;
483 cl_uint *outptr = (cl_uint *)ptr;
484
485 for (i=0; i<n; i++){
486 if ( outptr[i] != TEST_PRIME_UINT )
487 return -1;
488 }
489
490 return 0;
491 }
492
493
verify_read_long(void * ptr,int n)494 static int verify_read_long(void *ptr, int n)
495 {
496 int i;
497 cl_long *outptr = (cl_long *)ptr;
498
499 for (i=0; i<n; i++){
500 if ( outptr[i] != TEST_PRIME_LONG )
501 return -1;
502 }
503
504 return 0;
505 }
506
507
verify_read_ulong(void * ptr,int n)508 static int verify_read_ulong(void *ptr, int n)
509 {
510 int i;
511 cl_ulong *outptr = (cl_ulong *)ptr;
512
513 for (i=0; i<n; i++){
514 if ( outptr[i] != TEST_PRIME_ULONG )
515 return -1;
516 }
517
518 return 0;
519 }
520
521
verify_read_short(void * ptr,int n)522 static int verify_read_short(void *ptr, int n)
523 {
524 int i;
525 cl_short *outptr = (cl_short *)ptr;
526
527 for (i=0; i<n; i++){
528 if ( outptr[i] != (cl_short)((1<<8)+1) )
529 return -1;
530 }
531
532 return 0;
533 }
534
535
verify_read_ushort(void * ptr,int n)536 static int verify_read_ushort(void *ptr, int n)
537 {
538 int i;
539 cl_ushort *outptr = (cl_ushort *)ptr;
540
541 for (i=0; i<n; i++){
542 if ( outptr[i] != (cl_ushort)((1<<8)+1) )
543 return -1;
544 }
545
546 return 0;
547 }
548
549
verify_read_float(void * ptr,int n)550 static int verify_read_float( void *ptr, int n )
551 {
552 int i;
553 cl_float *outptr = (cl_float *)ptr;
554
555 for (i=0; i<n; i++){
556 if ( outptr[i] != TEST_PRIME_FLOAT )
557 return -1;
558 }
559
560 return 0;
561 }
562
563
verify_read_half(void * ptr,int n)564 static int verify_read_half( void *ptr, int n )
565 {
566 int i;
567 cl_half *outptr = (cl_half *)ptr;
568
569 for (i = 0; i < n; i++)
570 {
571 if (cl_half_to_float(outptr[i]) != TEST_PRIME_HALF) return -1;
572 }
573
574 return 0;
575 }
576
577
verify_read_char(void * ptr,int n)578 static int verify_read_char(void *ptr, int n)
579 {
580 int i;
581 cl_char *outptr = (cl_char *)ptr;
582
583 for (i=0; i<n; i++){
584 if ( outptr[i] != TEST_PRIME_CHAR )
585 return -1;
586 }
587
588 return 0;
589 }
590
591
verify_read_uchar(void * ptr,int n)592 static int verify_read_uchar(void *ptr, int n)
593 {
594 int i;
595 cl_uchar *outptr = (cl_uchar *)ptr;
596
597 for (i=0; i<n; i++){
598 if ( outptr[i] != TEST_PRIME_CHAR )
599 return -1;
600 }
601
602 return 0;
603 }
604
605
verify_read_struct(TestStruct * outptr,int n)606 static int verify_read_struct(TestStruct *outptr, int n)
607 {
608 int i;
609
610 for (i=0; i<n; i++)
611 {
612 if ( ( outptr[i].a != TEST_PRIME_INT ) ||
613 ( outptr[i].b != TEST_PRIME_FLOAT ) )
614 return -1;
615 }
616
617 return 0;
618 }
619
620 //----- the test functions
test_buffer_read(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))621 int test_buffer_read( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
622 const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
623 {
624 void *outptr[5];
625 void *inptr[5];
626 clProgramWrapper program[5];
627 clKernelWrapper kernel[5];
628 size_t global_work_size[3];
629 cl_int err;
630 int i;
631 size_t ptrSizes[5];
632 int src_flag_id;
633 int total_errors = 0;
634
635 size_t min_alignment = get_min_alignment(context);
636
637 global_work_size[0] = (cl_uint)num_elements;
638
639 ptrSizes[0] = size;
640 ptrSizes[1] = ptrSizes[0] << 1;
641 ptrSizes[2] = ptrSizes[1] << 1;
642 ptrSizes[3] = ptrSizes[2] << 1;
643 ptrSizes[4] = ptrSizes[3] << 1;
644
645 //skip devices that don't support long
646 if (! gHasLong && strstr(type,"long") )
647 {
648 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
649 return CL_SUCCESS;
650 }
651
652 for (i = 0; i < loops; i++)
653 {
654
655 err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
656 &kernelCode[i], kernelName[i]);
657 if (err)
658 {
659 log_error("Creating program for %s\n", type);
660 print_error(err, " Error creating program ");
661 return -1;
662 }
663
664 for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
665 {
666 clMemWrapper buffer;
667 outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
668 if ( ! outptr[i] ){
669 log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
670 return -1;
671 }
672 inptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
673 if ( ! inptr[i] ){
674 log_error( " unable to allocate %d bytes for inptr\n", (int)( ptrSizes[i] * num_elements ) );
675 return -1;
676 }
677
678
679 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
680 buffer =
681 clCreateBuffer(context, flag_set[src_flag_id],
682 ptrSizes[i] * num_elements, inptr[i], &err);
683 else
684 buffer = clCreateBuffer(context, flag_set[src_flag_id],
685 ptrSizes[i] * num_elements, NULL, &err);
686 if (err != CL_SUCCESS)
687 {
688 print_error(err, " clCreateBuffer failed\n" );
689 align_free( outptr[i] );
690 align_free( inptr[i] );
691 return -1;
692 }
693
694 err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
695 if ( err != CL_SUCCESS ){
696 print_error( err, "clSetKernelArg failed" );
697 align_free( outptr[i] );
698 align_free( inptr[i] );
699 return -1;
700 }
701
702 err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL,
703 global_work_size, NULL, 0, NULL, NULL);
704 if ( err != CL_SUCCESS ){
705 print_error( err, "clEnqueueNDRangeKernel failed" );
706 align_free( outptr[i] );
707 align_free( inptr[i] );
708 return -1;
709 }
710
711 err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
712 ptrSizes[i] * num_elements, outptr[i], 0,
713 NULL, NULL);
714 if ( err != CL_SUCCESS ){
715 print_error( err, "clEnqueueReadBuffer failed" );
716 align_free( outptr[i] );
717 align_free( inptr[i] );
718 return -1;
719 }
720
721 if (fn(outptr[i], num_elements*(1<<i))){
722 log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
723 1 << i, flag_set_names[src_flag_id]);
724 total_errors++;
725 }
726 else{
727 log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
728 1 << i, flag_set_names[src_flag_id]);
729 }
730
731 err = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0,
732 ptrSizes[i] * num_elements, inptr[i], 0,
733 NULL, NULL);
734 if (err != CL_SUCCESS)
735 {
736 print_error( err, "clEnqueueReadBuffer failed" );
737 align_free( outptr[i] );
738 align_free( inptr[i] );
739 return -1;
740 }
741
742 if (fn(inptr[i], num_elements*(1<<i))){
743 log_error( " %s%d test failed in-place readback\n", type, 1<<i );
744 total_errors++;
745 }
746 else{
747 log_info( " %s%d test passed in-place readback\n", type, 1<<i );
748 }
749
750
751 // cleanup
752 align_free( outptr[i] );
753 align_free( inptr[i] );
754 }
755 } // mem flag
756
757 return total_errors;
758
759 } // end test_buffer_read()
760
test_buffer_read_async(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))761 int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
762 const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
763 {
764 clProgramWrapper program[5];
765 clKernelWrapper kernel[5];
766 clEventWrapper event;
767 void *outptr[5];
768 void *inptr[5];
769 size_t global_work_size[3];
770 cl_int err;
771 int i;
772 size_t lastIndex;
773 size_t ptrSizes[5];
774 int src_flag_id;
775 int total_errors = 0;
776
777 size_t min_alignment = get_min_alignment(context);
778
779 global_work_size[0] = (cl_uint)num_elements;
780
781 ptrSizes[0] = size;
782 ptrSizes[1] = ptrSizes[0] << 1;
783 ptrSizes[2] = ptrSizes[1] << 1;
784 ptrSizes[3] = ptrSizes[2] << 1;
785 ptrSizes[4] = ptrSizes[3] << 1;
786
787 //skip devices that don't support long
788 if (! gHasLong && strstr(type,"long") )
789 {
790 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
791 return CL_SUCCESS;
792 }
793
794 for (i = 0; i < loops; i++)
795 {
796
797 err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
798 &kernelCode[i], kernelName[i]);
799 if (err)
800 {
801 log_error(" Error creating program for %s\n", type);
802 return -1;
803 }
804
805 for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
806 {
807 clMemWrapper buffer;
808 outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
809 if ( ! outptr[i] ){
810 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
811 return -1;
812 }
813 memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
814 inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
815 if ( ! inptr[i] ){
816 log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
817 return -1;
818 }
819 memset( inptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
820
821
822 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
823 buffer =
824 clCreateBuffer(context, flag_set[src_flag_id],
825 ptrSizes[i] * num_elements, inptr[i], &err);
826 else
827 buffer = clCreateBuffer(context, flag_set[src_flag_id],
828 ptrSizes[i] * num_elements, NULL, &err);
829 if ( err != CL_SUCCESS ){
830 print_error(err, " clCreateBuffer failed\n" );
831 align_free( outptr[i] );
832 align_free( inptr[i] );
833 return -1;
834 }
835
836 err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
837 if ( err != CL_SUCCESS ){
838 print_error( err, "clSetKernelArg failed" );
839 align_free( outptr[i] );
840 align_free( inptr[i] );
841 return -1;
842 }
843
844 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
845 if ( err != CL_SUCCESS ){
846 print_error( err, "clEnqueueNDRangeKernel failed" );
847 align_free( outptr[i] );
848 align_free( inptr[i] );
849 return -1;
850 }
851
852 lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
853 err = clEnqueueReadBuffer(queue, buffer, false, 0,
854 ptrSizes[i] * num_elements, outptr[i], 0,
855 NULL, &event);
856 #ifdef CHECK_FOR_NON_WAIT
857 if ( ((uchar *)outptr[i])[lastIndex] ){
858 log_error( " clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
859 log_error( " Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
860 }
861 #endif
862 if ( err != CL_SUCCESS ){
863 print_error( err, "clEnqueueReadBuffer failed" );
864 align_free( outptr[i] );
865 align_free( inptr[i] );
866 return -1;
867 }
868 err = clWaitForEvents(1, &event );
869 if ( err != CL_SUCCESS ){
870 print_error( err, "clWaitForEvents() failed" );
871 align_free( outptr[i] );
872 align_free( inptr[i] );
873 return -1;
874 }
875
876 if ( fn(outptr[i], num_elements*(1<<i)) ){
877 log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
878 1 << i, flag_set_names[src_flag_id]);
879 total_errors++;
880 }
881 else{
882 log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
883 1 << i, flag_set_names[src_flag_id]);
884 }
885
886 // cleanup
887 align_free( outptr[i] );
888 align_free( inptr[i] );
889 }
890 } // mem flags
891
892
893 return total_errors;
894
895 } // end test_buffer_read_array_async()
896
897
test_buffer_read_array_barrier(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))898 int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
899 const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
900 {
901 clProgramWrapper program[5];
902 clKernelWrapper kernel[5];
903 clEventWrapper event;
904 void *outptr[5], *inptr[5];
905 size_t global_work_size[3];
906 cl_int err;
907 int i;
908 size_t lastIndex;
909 size_t ptrSizes[5];
910 int src_flag_id;
911 int total_errors = 0;
912
913 size_t min_alignment = get_min_alignment(context);
914
915 global_work_size[0] = (cl_uint)num_elements;
916
917 ptrSizes[0] = size;
918 ptrSizes[1] = ptrSizes[0] << 1;
919 ptrSizes[2] = ptrSizes[1] << 1;
920 ptrSizes[3] = ptrSizes[2] << 1;
921 ptrSizes[4] = ptrSizes[3] << 1;
922
923 //skip devices that don't support long
924 if (! gHasLong && strstr(type,"long") )
925 {
926 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
927 return CL_SUCCESS;
928 }
929
930 for (i = 0; i < loops; i++)
931 {
932
933 err = create_single_kernel_helper(context, &program[i], &kernel[i], 1,
934 &kernelCode[i], kernelName[i]);
935 if (err)
936 {
937 log_error(" Error creating program for %s\n", type);
938 return -1;
939 }
940
941 for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
942 {
943 clMemWrapper buffer;
944 outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
945 if ( ! outptr[i] ){
946 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
947 return -1;
948 }
949 memset( outptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
950 inptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
951 if ( ! inptr[i] ){
952 log_error( " unable to allocate %d bytes for inptr\n", (int)(ptrSizes[i] * num_elements) );
953 return -1;
954 }
955 memset( inptr[i], 0, ptrSizes[i] * num_elements ); // initialize to zero to tell difference
956
957 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
958 buffer =
959 clCreateBuffer(context, flag_set[src_flag_id],
960 ptrSizes[i] * num_elements, inptr[i], &err);
961 else
962 buffer = clCreateBuffer(context, flag_set[src_flag_id],
963 ptrSizes[i] * num_elements, NULL, &err);
964 if ( err != CL_SUCCESS ){
965 print_error(err, " clCreateBuffer failed\n" );
966 align_free( outptr[i] );
967 align_free( inptr[i] );
968 return -1;
969 }
970
971 err = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), (void *)&buffer);
972 if ( err != CL_SUCCESS ){
973 print_error( err, "clSetKernelArgs failed" );
974 align_free( outptr[i] );
975 align_free( inptr[i] );
976 return -1;
977 }
978
979 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
980 if ( err != CL_SUCCESS ){
981 print_error( err, "clEnqueueNDRangeKernel failed" );
982 align_free( outptr[i] );
983 align_free( inptr[i] );
984 return -1;
985 }
986
987 lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
988 err = clEnqueueReadBuffer(queue, buffer, false, 0,
989 ptrSizes[i] * num_elements,
990 (void *)(outptr[i]), 0, NULL, &event);
991 #ifdef CHECK_FOR_NON_WAIT
992 if ( ((uchar *)outptr[i])[lastIndex] ){
993 log_error( " clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
994 log_error( " Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
995 }
996 #endif
997 if ( err != CL_SUCCESS ){
998 print_error( err, "clEnqueueReadBuffer failed" );
999 align_free( outptr[i] );
1000 align_free( inptr[i] );
1001 return -1;
1002 }
1003 err = clEnqueueBarrierWithWaitList(queue, 0, NULL, NULL);
1004 if ( err != CL_SUCCESS ){
1005 print_error( err, "clEnqueueBarrierWithWaitList() failed" );
1006 align_free( outptr[i] );
1007 return -1;
1008 }
1009
1010 err = clWaitForEvents(1, &event);
1011 if ( err != CL_SUCCESS ){
1012 print_error( err, "clWaitForEvents() failed" );
1013 align_free( outptr[i] );
1014 align_free( inptr[i] );
1015 return -1;
1016 }
1017
1018 if ( fn(outptr[i], num_elements*(1<<i)) ){
1019 log_error(" %s%d test failed. cl_mem_flags src: %s\n", type,
1020 1 << i, flag_set_names[src_flag_id]);
1021 total_errors++;
1022 }
1023 else{
1024 log_info(" %s%d test passed. cl_mem_flags src: %s\n", type,
1025 1 << i, flag_set_names[src_flag_id]);
1026 }
1027
1028 // cleanup
1029 align_free( outptr[i] );
1030 align_free( inptr[i] );
1031 }
1032 } // cl_mem flags
1033 return total_errors;
1034
1035 } // end test_buffer_read_array_barrier()
1036
1037
1038 #define DECLARE_READ_TEST(type, realType) \
1039 int test_buffer_read_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) \
1040 { \
1041 return test_buffer_read( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1042 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1043 }
1044
DECLARE_READ_TEST(int,cl_int)1045 DECLARE_READ_TEST(int, cl_int)
1046 DECLARE_READ_TEST(uint, cl_uint)
1047 DECLARE_READ_TEST(long, cl_long)
1048 DECLARE_READ_TEST(ulong, cl_ulong)
1049 DECLARE_READ_TEST(short, cl_short)
1050 DECLARE_READ_TEST(ushort, cl_ushort)
1051 DECLARE_READ_TEST(float, cl_float)
1052 DECLARE_READ_TEST(char, cl_char)
1053 DECLARE_READ_TEST(uchar, cl_uchar)
1054
1055 int test_buffer_read_half(cl_device_id deviceID, cl_context context,
1056 cl_command_queue queue, int num_elements)
1057 {
1058 PASSIVE_REQUIRE_FP16_SUPPORT(deviceID)
1059 return test_buffer_read( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5,
1060 buffer_read_half_kernel_code, half_kernel_name, verify_read_half );
1061 }
1062
1063
1064 #define DECLARE_ASYNC_TEST(type, realType) \
1065 int test_buffer_read_async_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) \
1066 { \
1067 return test_buffer_read_async( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1068 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1069 }
1070
DECLARE_ASYNC_TEST(char,cl_char)1071 DECLARE_ASYNC_TEST(char, cl_char)
1072 DECLARE_ASYNC_TEST(uchar, cl_uchar)
1073 DECLARE_ASYNC_TEST(short, cl_short)
1074 DECLARE_ASYNC_TEST(ushort, cl_ushort)
1075 DECLARE_ASYNC_TEST(int, cl_int)
1076 DECLARE_ASYNC_TEST(uint, cl_uint)
1077 DECLARE_ASYNC_TEST(long, cl_long)
1078 DECLARE_ASYNC_TEST(ulong, cl_ulong)
1079 DECLARE_ASYNC_TEST(float, cl_float)
1080
1081
1082 #define DECLARE_BARRIER_TEST(type, realType) \
1083 int test_buffer_read_array_barrier_##type( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements ) \
1084 { \
1085 return test_buffer_read_array_barrier( deviceID, context, queue, num_elements, sizeof( realType ), (char*)#type, 5, \
1086 buffer_read_##type##_kernel_code, type##_kernel_name, verify_read_##type ); \
1087 }
1088
1089 DECLARE_BARRIER_TEST(int, cl_int)
1090 DECLARE_BARRIER_TEST(uint, cl_uint)
1091 DECLARE_BARRIER_TEST(long, cl_long)
1092 DECLARE_BARRIER_TEST(ulong, cl_ulong)
1093 DECLARE_BARRIER_TEST(short, cl_short)
1094 DECLARE_BARRIER_TEST(ushort, cl_ushort)
1095 DECLARE_BARRIER_TEST(char, cl_char)
1096 DECLARE_BARRIER_TEST(uchar, cl_uchar)
1097 DECLARE_BARRIER_TEST(float, cl_float)
1098
1099 int test_buffer_read_struct(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1100 {
1101 cl_mem buffers[1];
1102 TestStruct *output_ptr;
1103 cl_program program[1];
1104 cl_kernel kernel[1];
1105 size_t global_work_size[3];
1106 cl_int err;
1107 size_t objSize = sizeof(TestStruct);
1108
1109 size_t min_alignment = get_min_alignment(context);
1110
1111 global_work_size[0] = (cl_uint)num_elements;
1112
1113 output_ptr = (TestStruct*)align_malloc(objSize * num_elements, min_alignment);
1114 if ( ! output_ptr ){
1115 log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
1116 return -1;
1117 }
1118 buffers[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
1119 objSize * num_elements, NULL, &err);
1120 if ( err != CL_SUCCESS ){
1121 print_error( err, " clCreateBuffer failed\n" );
1122 align_free( output_ptr );
1123 return -1;
1124 }
1125
1126 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &buffer_read_struct_kernel_code, "test_buffer_read_struct" );
1127 if ( err ){
1128 clReleaseProgram( program[0] );
1129 align_free( output_ptr );
1130 return -1;
1131 }
1132
1133 err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&buffers[0] );
1134 if ( err != CL_SUCCESS){
1135 print_error( err, "clSetKernelArg failed" );
1136 clReleaseMemObject( buffers[0] );
1137 clReleaseKernel( kernel[0] );
1138 clReleaseProgram( program[0] );
1139 align_free( output_ptr );
1140 return -1;
1141 }
1142
1143 err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1144 if ( err != CL_SUCCESS ){
1145 print_error( err, "clEnqueueNDRangeKernel failed" );
1146 clReleaseMemObject( buffers[0] );
1147 clReleaseKernel( kernel[0] );
1148 clReleaseProgram( program[0] );
1149 align_free( output_ptr );
1150 return -1;
1151 }
1152
1153 err = clEnqueueReadBuffer( queue, buffers[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
1154 if ( err != CL_SUCCESS){
1155 print_error( err, "clEnqueueReadBuffer failed" );
1156 clReleaseMemObject( buffers[0] );
1157 clReleaseKernel( kernel[0] );
1158 clReleaseProgram( program[0] );
1159 align_free( output_ptr );
1160 return -1;
1161 }
1162
1163 if (verify_read_struct(output_ptr, num_elements)){
1164 log_error(" struct test failed\n");
1165 err = -1;
1166 }
1167 else{
1168 log_info(" struct test passed\n");
1169 err = 0;
1170 }
1171
1172 // cleanup
1173 clReleaseMemObject( buffers[0] );
1174 clReleaseKernel( kernel[0] );
1175 clReleaseProgram( program[0] );
1176 align_free( output_ptr );
1177
1178 return err;
1179 }
1180
1181
testRandomReadSize(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,cl_uint startOfRead,size_t sizeOfRead)1182 static int testRandomReadSize( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_uint startOfRead, size_t sizeOfRead )
1183 {
1184 cl_mem buffers[3];
1185 int *outptr[3];
1186 cl_program program[3];
1187 cl_kernel kernel[3];
1188 size_t global_work_size[3];
1189 cl_int err;
1190 int i, j;
1191 size_t ptrSizes[3]; // sizeof(int), sizeof(int2), sizeof(int4)
1192 int total_errors = 0;
1193 size_t min_alignment = get_min_alignment(context);
1194
1195 global_work_size[0] = (cl_uint)num_elements;
1196
1197 ptrSizes[0] = sizeof(cl_int);
1198 ptrSizes[1] = ptrSizes[0] << 1;
1199 ptrSizes[2] = ptrSizes[1] << 1;
1200 for ( i = 0; i < 3; i++ ){
1201 outptr[i] = (int *)align_malloc( ptrSizes[i] * num_elements, min_alignment);
1202 if ( ! outptr[i] ){
1203 log_error( " Unable to allocate %d bytes for outptr[%d]\n", (int)(ptrSizes[i] * num_elements), i );
1204 for ( j = 0; j < i; j++ ){
1205 clReleaseMemObject( buffers[j] );
1206 align_free( outptr[j] );
1207 }
1208 return -1;
1209 }
1210 buffers[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
1211 ptrSizes[i] * num_elements, NULL, &err);
1212 if ( err != CL_SUCCESS ){
1213 print_error(err, " clCreateBuffer failed\n" );
1214 for ( j = 0; j < i; j++ ){
1215 clReleaseMemObject( buffers[j] );
1216 align_free( outptr[j] );
1217 }
1218 align_free( outptr[i] );
1219 return -1;
1220 }
1221 }
1222
1223 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &buffer_read_int_kernel_code[0], "test_buffer_read_int" );
1224 if ( err ){
1225 log_error( " Error creating program for int\n" );
1226 for ( i = 0; i < 3; i++ ){
1227 clReleaseMemObject( buffers[i] );
1228 align_free( outptr[i] );
1229 }
1230 return -1;
1231 }
1232
1233 err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &buffer_read_int_kernel_code[1], "test_buffer_read_int2" );
1234 if ( err ){
1235 log_error( " Error creating program for int2\n" );
1236 clReleaseKernel( kernel[0] );
1237 clReleaseProgram( program[0] );
1238 for ( i = 0; i < 3; i++ ){
1239 clReleaseMemObject( buffers[i] );
1240 align_free( outptr[i] );
1241 }
1242 return -1;
1243 }
1244
1245 err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &buffer_read_int_kernel_code[2], "test_buffer_read_int4" );
1246 if ( err ){
1247 log_error( " Error creating program for int4\n" );
1248 clReleaseKernel( kernel[0] );
1249 clReleaseProgram( program[0] );
1250 clReleaseKernel( kernel[1] );
1251 clReleaseProgram( program[1] );
1252 for ( i = 0; i < 3; i++ ){
1253 clReleaseMemObject( buffers[i] );
1254 align_free( outptr[i] );
1255 }
1256 return -1;
1257 }
1258
1259 for (i=0; i<3; i++){
1260 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[i] );
1261 if ( err != CL_SUCCESS ){
1262 print_error( err, "clSetKernelArgs failed" );
1263 clReleaseMemObject( buffers[i] );
1264 clReleaseKernel( kernel[i] );
1265 clReleaseProgram( program[i] );
1266 align_free( outptr[i] );
1267 return -1;
1268 }
1269
1270 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1271 if ( err != CL_SUCCESS ){
1272 print_error( err, "clEnqueueNDRangeKernel failed" );
1273 clReleaseMemObject( buffers[i] );
1274 clReleaseKernel( kernel[i] );
1275 clReleaseProgram( program[i] );
1276 align_free( outptr[i] );
1277 return -1;
1278 }
1279
1280 err = clEnqueueReadBuffer( queue, buffers[i], true, startOfRead*ptrSizes[i], ptrSizes[i]*sizeOfRead, (void *)(outptr[i]), 0, NULL, NULL );
1281 if ( err != CL_SUCCESS ){
1282 print_error( err, "clEnqueueReadBuffer failed" );
1283 clReleaseMemObject( buffers[i] );
1284 clReleaseKernel( kernel[i] );
1285 clReleaseProgram( program[i] );
1286 align_free( outptr[i] );
1287 return -1;
1288 }
1289
1290 if ( verify_read_int( outptr[i], (int)sizeOfRead*(1<<i) ) ){
1291 log_error(" random size from %d, size: %d test failed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
1292 total_errors++;
1293 }
1294 else{
1295 log_info(" random size from %d, size: %d test passed on i%d\n", (int)startOfRead, (int)sizeOfRead, 1<<i);
1296 }
1297
1298 // cleanup
1299 clReleaseMemObject( buffers[i] );
1300 clReleaseKernel( kernel[i] );
1301 clReleaseProgram( program[i] );
1302 align_free( outptr[i] );
1303 }
1304
1305 return total_errors;
1306
1307 } // end testRandomReadSize()
1308
1309
test_buffer_read_random_size(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1310 int test_buffer_read_random_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1311 {
1312 int err = 0;
1313 int i;
1314 cl_uint start;
1315 size_t size;
1316 MTdata d = init_genrand( gRandomSeed );
1317
1318 // now test for random sizes of array being read
1319 for ( i = 0; i < 8; i++ ){
1320 start = (cl_uint)get_random_float( 0.f, (float)(num_elements - 8), d );
1321 size = (size_t)get_random_float( 8.f, (float)(num_elements - start), d );
1322 if (testRandomReadSize( deviceID, context, queue, num_elements, start, size ))
1323 err++;
1324 }
1325
1326 free_mtdata(d);
1327
1328 return err;
1329 }
1330
1331