1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23
24 #include "procs.h"
25 #include "harness/errorHelpers.h"
26
27 #define USE_LOCAL_WORK_GROUP 1
28
29 #ifndef uchar
30 typedef unsigned char uchar;
31 #endif
32
33 #ifndef TestStruct
34 typedef struct{
35 int a;
36 float b;
37 } TestStruct;
38 #endif
39
40 // If this is set to 1 the writes are done via map/unmap
41 static int gTestMap = 0;
42
43 const char *buffer_write_int_kernel_code[] = {
44 "__kernel void test_buffer_write_int(__global int *src, __global int *dst)\n"
45 "{\n"
46 " int tid = get_global_id(0);\n"
47 "\n"
48 " dst[tid] = src[tid];\n"
49 "}\n",
50
51 "__kernel void test_buffer_write_int2(__global int2 *src, __global int2 *dst)\n"
52 "{\n"
53 " int tid = get_global_id(0);\n"
54 "\n"
55 " dst[tid] = src[tid];\n"
56 "}\n",
57
58 "__kernel void test_buffer_write_int4(__global int4 *src, __global int4 *dst)\n"
59 "{\n"
60 " int tid = get_global_id(0);\n"
61 "\n"
62 " dst[tid] = src[tid];\n"
63 "}\n",
64
65 "__kernel void test_buffer_write_int8(__global int8 *src, __global int8 *dst)\n"
66 "{\n"
67 " int tid = get_global_id(0);\n"
68 "\n"
69 " dst[tid] = src[tid];\n"
70 "}\n",
71
72 "__kernel void test_buffer_write_int16(__global int16 *src, __global int16 *dst)\n"
73 "{\n"
74 " int tid = get_global_id(0);\n"
75 "\n"
76 " dst[tid] = src[tid];\n"
77 "}\n" };
78
79 static const char *int_kernel_name[] = { "test_buffer_write_int", "test_buffer_write_int2", "test_buffer_write_int4", "test_buffer_write_int8", "test_buffer_write_int16" };
80
81
82 const char *buffer_write_uint_kernel_code[] = {
83 "__kernel void test_buffer_write_uint(__global uint *src, __global uint *dst)\n"
84 "{\n"
85 " int tid = get_global_id(0);\n"
86 "\n"
87 " dst[tid] = src[tid];\n"
88 "}\n",
89
90 "__kernel void test_buffer_write_uint2(__global uint2 *src, __global uint2 *dst)\n"
91 "{\n"
92 " int tid = get_global_id(0);\n"
93 "\n"
94 " dst[tid] = src[tid];\n"
95 "}\n",
96
97 "__kernel void test_buffer_write_uint4(__global uint4 *src, __global uint4 *dst)\n"
98 "{\n"
99 " int tid = get_global_id(0);\n"
100 "\n"
101 " dst[tid] = src[tid];\n"
102 "}\n",
103
104 "__kernel void test_buffer_write_uint8(__global uint8 *src, __global uint8 *dst)\n"
105 "{\n"
106 " int tid = get_global_id(0);\n"
107 "\n"
108 " dst[tid] = src[tid];\n"
109 "}\n",
110
111 "__kernel void test_buffer_write_uint16(__global uint16 *src, __global uint16 *dst)\n"
112 "{\n"
113 " int tid = get_global_id(0);\n"
114 "\n"
115 " dst[tid] = src[tid];\n"
116 "}\n" };
117
118 static const char *uint_kernel_name[] = { "test_buffer_write_uint", "test_buffer_write_uint2", "test_buffer_write_uint4", "test_buffer_write_uint8", "test_buffer_write_uint16" };
119
120
121 const char *buffer_write_ushort_kernel_code[] = {
122 "__kernel void test_buffer_write_ushort(__global ushort *src, __global ushort *dst)\n"
123 "{\n"
124 " int tid = get_global_id(0);\n"
125 "\n"
126 " dst[tid] = src[tid];\n"
127 "}\n",
128
129 "__kernel void test_buffer_write_ushort2(__global ushort2 *src, __global ushort2 *dst)\n"
130 "{\n"
131 " int tid = get_global_id(0);\n"
132 "\n"
133 " dst[tid] = src[tid];\n"
134 "}\n",
135
136 "__kernel void test_buffer_write_ushort4(__global ushort4 *src, __global ushort4 *dst)\n"
137 "{\n"
138 " int tid = get_global_id(0);\n"
139 "\n"
140 " dst[tid] = src[tid];\n"
141 "}\n",
142
143 "__kernel void test_buffer_write_ushort8(__global ushort8 *src, __global ushort8 *dst)\n"
144 "{\n"
145 " int tid = get_global_id(0);\n"
146 "\n"
147 " dst[tid] = src[tid];\n"
148 "}\n",
149
150 "__kernel void test_buffer_write_ushort16(__global ushort16 *src, __global ushort16 *dst)\n"
151 "{\n"
152 " int tid = get_global_id(0);\n"
153 "\n"
154 " dst[tid] = src[tid];\n"
155 "}\n" };
156
157 static const char *ushort_kernel_name[] = { "test_buffer_write_ushort", "test_buffer_write_ushort2", "test_buffer_write_ushort4", "test_buffer_write_ushort8", "test_buffer_write_ushort16" };
158
159
160
161 const char *buffer_write_short_kernel_code[] = {
162 "__kernel void test_buffer_write_short(__global short *src, __global short *dst)\n"
163 "{\n"
164 " int tid = get_global_id(0);\n"
165 "\n"
166 " dst[tid] = src[tid];\n"
167 "}\n",
168
169 "__kernel void test_buffer_write_short2(__global short2 *src, __global short2 *dst)\n"
170 "{\n"
171 " int tid = get_global_id(0);\n"
172 "\n"
173 " dst[tid] = src[tid];\n"
174 "}\n",
175
176 "__kernel void test_buffer_write_short4(__global short4 *src, __global short4 *dst)\n"
177 "{\n"
178 " int tid = get_global_id(0);\n"
179 "\n"
180 " dst[tid] = src[tid];\n"
181 "}\n",
182
183 "__kernel void test_buffer_write_short8(__global short8 *src, __global short8 *dst)\n"
184 "{\n"
185 " int tid = get_global_id(0);\n"
186 "\n"
187 " dst[tid] = src[tid];\n"
188 "}\n",
189
190 "__kernel void test_buffer_write_short16(__global short16 *src, __global short16 *dst)\n"
191 "{\n"
192 " int tid = get_global_id(0);\n"
193 "\n"
194 " dst[tid] = src[tid];\n"
195 "}\n" };
196
197 static const char *short_kernel_name[] = { "test_buffer_write_short", "test_buffer_write_short2", "test_buffer_write_short4", "test_buffer_write_short8", "test_buffer_write_short16" };
198
199
200 const char *buffer_write_char_kernel_code[] = {
201 "__kernel void test_buffer_write_char(__global char *src, __global char *dst)\n"
202 "{\n"
203 " int tid = get_global_id(0);\n"
204 "\n"
205 " dst[tid] = src[tid];\n"
206 "}\n",
207
208 "__kernel void test_buffer_write_char2(__global char2 *src, __global char2 *dst)\n"
209 "{\n"
210 " int tid = get_global_id(0);\n"
211 "\n"
212 " dst[tid] = src[tid];\n"
213 "}\n",
214
215 "__kernel void test_buffer_write_char4(__global char4 *src, __global char4 *dst)\n"
216 "{\n"
217 " int tid = get_global_id(0);\n"
218 "\n"
219 " dst[tid] = src[tid];\n"
220 "}\n",
221
222 "__kernel void test_buffer_write_char8(__global char8 *src, __global char8 *dst)\n"
223 "{\n"
224 " int tid = get_global_id(0);\n"
225 "\n"
226 " dst[tid] = src[tid];\n"
227 "}\n",
228
229 "__kernel void test_buffer_write_char16(__global char16 *src, __global char16 *dst)\n"
230 "{\n"
231 " int tid = get_global_id(0);\n"
232 "\n"
233 " dst[tid] = src[tid];\n"
234 "}\n" };
235
236 static const char *char_kernel_name[] = { "test_buffer_write_char", "test_buffer_write_char2", "test_buffer_write_char4", "test_buffer_write_char8", "test_buffer_write_char16" };
237
238
239 const char *buffer_write_uchar_kernel_code[] = {
240 "__kernel void test_buffer_write_uchar(__global uchar *src, __global uchar *dst)\n"
241 "{\n"
242 " int tid = get_global_id(0);\n"
243 "\n"
244 " dst[tid] = src[tid];\n"
245 "}\n",
246
247 "__kernel void test_buffer_write_uchar2(__global uchar2 *src, __global uchar2 *dst)\n"
248 "{\n"
249 " int tid = get_global_id(0);\n"
250 "\n"
251 " dst[tid] = src[tid];\n"
252 "}\n",
253
254 "__kernel void test_buffer_write_uchar4(__global uchar4 *src, __global uchar4 *dst)\n"
255 "{\n"
256 " int tid = get_global_id(0);\n"
257 "\n"
258 " dst[tid] = src[tid];\n"
259 "}\n",
260
261 "__kernel void test_buffer_write_uchar8(__global uchar8 *src, __global uchar8 *dst)\n"
262 "{\n"
263 " int tid = get_global_id(0);\n"
264 "\n"
265 " dst[tid] = src[tid];\n"
266 "}\n",
267
268 "__kernel void test_buffer_write_uchar16(__global uchar16 *src, __global uchar16 *dst)\n"
269 "{\n"
270 " int tid = get_global_id(0);\n"
271 "\n"
272 " dst[tid] = src[tid];\n"
273 "}\n" };
274
275 static const char *uchar_kernel_name[] = { "test_buffer_write_uchar", "test_buffer_write_uchar2", "test_buffer_write_uchar4", "test_buffer_write_uchar8", "test_buffer_write_uchar16" };
276
277
278 const char *buffer_write_float_kernel_code[] = {
279 "__kernel void test_buffer_write_float(__global float *src, __global float *dst)\n"
280 "{\n"
281 " int tid = get_global_id(0);\n"
282 "\n"
283 " dst[tid] = src[tid];\n"
284 "}\n",
285
286 "__kernel void test_buffer_write_float2(__global float2 *src, __global float2 *dst)\n"
287 "{\n"
288 " int tid = get_global_id(0);\n"
289 "\n"
290 " dst[tid] = src[tid];\n"
291 "}\n",
292
293 "__kernel void test_buffer_write_float4(__global float4 *src, __global float4 *dst)\n"
294 "{\n"
295 " int tid = get_global_id(0);\n"
296 "\n"
297 " dst[tid] = src[tid];\n"
298 "}\n",
299
300 "__kernel void test_buffer_write_float8(__global float8 *src, __global float8 *dst)\n"
301 "{\n"
302 " int tid = get_global_id(0);\n"
303 "\n"
304 " dst[tid] = src[tid];\n"
305 "}\n",
306
307 "__kernel void test_buffer_write_float16(__global float16 *src, __global float16 *dst)\n"
308 "{\n"
309 " int tid = get_global_id(0);\n"
310 "\n"
311 " dst[tid] = src[tid];\n"
312 "}\n" };
313
314 static const char *float_kernel_name[] = { "test_buffer_write_float", "test_buffer_write_float2", "test_buffer_write_float4", "test_buffer_write_float8", "test_buffer_write_float16" };
315
316
317 const char *buffer_write_half_kernel_code[] = {
318 "__kernel void test_buffer_write_half(__global half *src, __global float *dst)\n"
319 "{\n"
320 " int tid = get_global_id(0);\n"
321 "\n"
322 " dst[tid] = vload_half( tid * 2, src );\n"
323 "}\n",
324
325 "__kernel void test_buffer_write_half2(__global half2 *src, __global float2 *dst)\n"
326 "{\n"
327 " int tid = get_global_id(0);\n"
328 "\n"
329 " dst[tid] = vload_half2( tid * 2, src );\n"
330 "}\n",
331
332 "__kernel void test_buffer_write_half4(__global half4 *src, __global float4 *dst)\n"
333 "{\n"
334 " int tid = get_global_id(0);\n"
335 "\n"
336 " dst[tid] = vload_half4( tid * 2, src );\n"
337 "}\n",
338
339 "__kernel void test_buffer_write_half8(__global half8 *src, __global float8 *dst)\n"
340 "{\n"
341 " int tid = get_global_id(0);\n"
342 "\n"
343 " dst[tid] = vload_half8( tid * 2, src );\n"
344 "}\n",
345
346 "__kernel void test_buffer_write_half16(__global half16 *src, __global float16 *dst)\n"
347 "{\n"
348 " int tid = get_global_id(0);\n"
349 "\n"
350 " dst[tid] = vload_half16( tid * 2, src );\n"
351 "}\n" };
352
353 static const char *half_kernel_name[] = { "test_buffer_write_half", "test_buffer_write_half2", "test_buffer_write_half4", "test_buffer_write_half8", "test_buffer_write_half16" };
354
355
356 const char *buffer_write_long_kernel_code[] = {
357 "__kernel void test_buffer_write_long(__global long *src, __global long *dst)\n"
358 "{\n"
359 " int tid = get_global_id(0);\n"
360 "\n"
361 " dst[tid] = src[tid];\n"
362 "}\n",
363
364 "__kernel void test_buffer_write_long2(__global long2 *src, __global long2 *dst)\n"
365 "{\n"
366 " int tid = get_global_id(0);\n"
367 "\n"
368 " dst[tid] = src[tid];\n"
369 "}\n",
370
371 "__kernel void test_buffer_write_long4(__global long4 *src, __global long4 *dst)\n"
372 "{\n"
373 " int tid = get_global_id(0);\n"
374 "\n"
375 " dst[tid] = src[tid];\n"
376 "}\n",
377
378 "__kernel void test_buffer_write_long8(__global long8 *src, __global long8 *dst)\n"
379 "{\n"
380 " int tid = get_global_id(0);\n"
381 "\n"
382 " dst[tid] = src[tid];\n"
383 "}\n",
384
385 "__kernel void test_buffer_write_long16(__global long16 *src, __global long16 *dst)\n"
386 "{\n"
387 " int tid = get_global_id(0);\n"
388 "\n"
389 " dst[tid] = src[tid];\n"
390 "}\n" };
391
392 static const char *long_kernel_name[] = { "test_buffer_write_long", "test_buffer_write_long2", "test_buffer_write_long4", "test_buffer_write_long8", "test_buffer_write_long16" };
393
394
395 const char *buffer_write_ulong_kernel_code[] = {
396 "__kernel void test_buffer_write_ulong(__global ulong *src, __global ulong *dst)\n"
397 "{\n"
398 " int tid = get_global_id(0);\n"
399 "\n"
400 " dst[tid] = src[tid];\n"
401 "}\n",
402
403 "__kernel void test_buffer_write_ulong2(__global ulong2 *src, __global ulong2 *dst)\n"
404 "{\n"
405 " int tid = get_global_id(0);\n"
406 "\n"
407 " dst[tid] = src[tid];\n"
408 "}\n",
409
410 "__kernel void test_buffer_write_ulong4(__global ulong4 *src, __global ulong4 *dst)\n"
411 "{\n"
412 " int tid = get_global_id(0);\n"
413 "\n"
414 " dst[tid] = src[tid];\n"
415 "}\n",
416
417 "__kernel void test_buffer_write_ulong8(__global ulong8 *src, __global ulong8 *dst)\n"
418 "{\n"
419 " int tid = get_global_id(0);\n"
420 "\n"
421 " dst[tid] = src[tid];\n"
422 "}\n",
423
424 "__kernel void test_buffer_write_ulong16(__global ulong16 *src, __global ulong16 *dst)\n"
425 "{\n"
426 " int tid = get_global_id(0);\n"
427 "\n"
428 " dst[tid] = src[tid];\n"
429 "}\n" };
430
431 static const char *ulong_kernel_name[] = { "test_buffer_write_ulong", "test_buffer_write_ulong2", "test_buffer_write_ulong4", "test_buffer_write_ulong8", "test_buffer_write_ulong16" };
432
433
434 static const char *struct_kernel_code =
435 "typedef struct{\n"
436 "int a;\n"
437 "float b;\n"
438 "} TestStruct;\n"
439 "__kernel void read_write_struct(__global TestStruct *src, __global TestStruct *dst)\n"
440 "{\n"
441 " int tid = get_global_id(0);\n"
442 "\n"
443 " dst[tid].a = src[tid].a;\n"
444 " dst[tid].b = src[tid].b;\n"
445 "}\n";
446
447
448
verify_write_int(void * ptr1,void * ptr2,int n)449 static int verify_write_int( void *ptr1, void *ptr2, int n )
450 {
451 int i;
452 int *inptr = (int *)ptr1;
453 int *outptr = (int *)ptr2;
454
455 for (i=0; i<n; i++){
456 if ( outptr[i] != inptr[i] )
457 return -1;
458 }
459
460 return 0;
461 }
462
463
verify_write_uint(void * ptr1,void * ptr2,int n)464 static int verify_write_uint( void *ptr1, void *ptr2, int n )
465 {
466 int i;
467 cl_uint *inptr = (cl_uint *)ptr1;
468 cl_uint *outptr = (cl_uint *)ptr2;
469
470 for (i=0; i<n; i++){
471 if ( outptr[i] != inptr[i] )
472 return -1;
473 }
474
475 return 0;
476 }
477
478
verify_write_short(void * ptr1,void * ptr2,int n)479 static int verify_write_short( void *ptr1, void *ptr2, int n )
480 {
481 int i;
482 short *inptr = (short *)ptr1;
483 short *outptr = (short *)ptr2;
484
485 for (i=0; i<n; i++){
486 if ( outptr[i] != inptr[i] )
487 return -1;
488 }
489
490 return 0;
491 }
492
493
verify_write_ushort(void * ptr1,void * ptr2,int n)494 static int verify_write_ushort( void *ptr1, void *ptr2, int n )
495 {
496 int i;
497 cl_ushort *inptr = (cl_ushort *)ptr1;
498 cl_ushort *outptr = (cl_ushort *)ptr2;
499
500 for (i=0; i<n; i++){
501 if ( outptr[i] != inptr[i] )
502 return -1;
503 }
504
505 return 0;
506 }
507
508
verify_write_char(void * ptr1,void * ptr2,int n)509 static int verify_write_char( void *ptr1, void *ptr2, int n )
510 {
511 int i;
512 char *inptr = (char *)ptr1;
513 char *outptr = (char *)ptr2;
514
515 for (i=0; i<n; i++){
516 if ( outptr[i] != inptr[i] )
517 return -1;
518 }
519
520 return 0;
521 }
522
523
verify_write_uchar(void * ptr1,void * ptr2,int n)524 static int verify_write_uchar( void *ptr1, void *ptr2, int n )
525 {
526 int i;
527 uchar *inptr = (uchar *)ptr1;
528 uchar *outptr = (uchar *)ptr2;
529
530 for (i=0; i<n; i++){
531 if ( outptr[i] != inptr[i] )
532 return -1;
533 }
534
535 return 0;
536 }
537
538
verify_write_float(void * ptr1,void * ptr2,int n)539 static int verify_write_float( void *ptr1, void *ptr2, int n )
540 {
541 int i;
542 float *inptr = (float *)ptr1;
543 float *outptr = (float *)ptr2;
544
545 for (i=0; i<n; i++){
546 if ( outptr[i] != inptr[i] )
547 return -1;
548 }
549
550 return 0;
551 }
552
553
verify_write_half(void * ptr1,void * ptr2,int n)554 static int verify_write_half( void *ptr1, void *ptr2, int n )
555 {
556 int i;
557 cl_ushort *inptr = (cl_ushort *)ptr1;
558 cl_ushort *outptr = (cl_ushort *)ptr2;
559
560 for ( i = 0; i < n; i++ ){
561 if ( outptr[i] != inptr[i] )
562 return -1;
563 }
564
565 return 0;
566 }
567
568
verify_write_long(void * ptr1,void * ptr2,int n)569 static int verify_write_long( void *ptr1, void *ptr2, int n )
570 {
571 int i;
572 cl_long *inptr = (cl_long *)ptr1;
573 cl_long *outptr = (cl_long *)ptr2;
574
575 for (i=0; i<n; i++){
576 if ( outptr[i] != inptr[i] )
577 return -1;
578 }
579
580 return 0;
581 }
582
583
verify_write_ulong(void * ptr1,void * ptr2,int n)584 static int verify_write_ulong( void *ptr1, void *ptr2, int n )
585 {
586 int i;
587 cl_ulong *inptr = (cl_ulong *)ptr1;
588 cl_ulong *outptr = (cl_ulong *)ptr2;
589
590 for (i=0; i<n; i++){
591 if ( outptr[i] != inptr[i] )
592 return -1;
593 }
594
595 return 0;
596 }
597
598
verify_write_struct(void * ptr1,void * ptr2,int n)599 static int verify_write_struct( void *ptr1, void *ptr2, int n )
600 {
601 int i;
602 TestStruct *inptr = (TestStruct *)ptr1;
603 TestStruct *outptr = (TestStruct *)ptr2;
604
605 for (i=0; i<n; i++){
606 if ( ( outptr[i].a != inptr[i].a ) || ( outptr[i].b != outptr[i].b ) )
607 return -1;
608 }
609
610 return 0;
611 }
612
613
test_buffer_write(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,void * inptr[5],const char * kernelCode[],const char * kernelName[],int (* fn)(void *,void *,int),MTdata d)614 int test_buffer_write( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
615 void *inptr[5], const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int), MTdata d )
616 {
617 cl_mem buffers[10];
618 void *outptr[5];
619 cl_program program[5];
620 cl_kernel kernel[5];
621 size_t ptrSizes[5];
622 size_t global_work_size[3];
623 #ifdef USE_LOCAL_WORK_GROUP
624 size_t local_work_size[3];
625 #endif
626 cl_int err;
627 int i, ii;
628 int src_flag_id, dst_flag_id;
629 int total_errors = 0;
630
631 size_t min_alignment = get_min_alignment(context);
632
633 global_work_size[0] = (size_t)num_elements;
634
635 ptrSizes[0] = size;
636 ptrSizes[1] = ptrSizes[0] << 1;
637 ptrSizes[2] = ptrSizes[1] << 1;
638 ptrSizes[3] = ptrSizes[2] << 1;
639 ptrSizes[4] = ptrSizes[3] << 1;
640
641 for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
642 for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
643 log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
644
645 loops = ( loops < 5 ? loops : 5 );
646 for ( i = 0; i < loops; i++ ){
647 ii = i << 1;
648 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
649 buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, inptr[i], &err);
650 else
651 buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, NULL, &err);
652
653 if ( ! buffers[ii] || err){
654 align_free( outptr[i] );
655 print_error(err, " clCreateBuffer failed\n" );
656 return -1;
657 }
658 if ( ! strcmp( type, "half" ) ){
659 outptr[i] = align_malloc( ptrSizes[i] * (num_elements * 2 ), min_alignment);
660 if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
661 buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id], ptrSizes[i] * 2 * num_elements, outptr[i], &err);
662 else
663 buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id], ptrSizes[i] * 2 * num_elements, NULL, &err);
664 }
665 else{
666 outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
667 if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
668 buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id], ptrSizes[i] * num_elements, outptr[i], &err);
669 else
670 buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id], ptrSizes[i] * num_elements, NULL, &err);
671 }
672 if ( err ){
673 clReleaseMemObject(buffers[ii]);
674 align_free( outptr[i] );
675 print_error(err, " clCreateBuffer failed\n" );
676 return -1;
677 }
678
679 if (gTestMap) {
680 void *dataPtr;
681 dataPtr = clEnqueueMapBuffer(queue, buffers[ii], CL_TRUE, CL_MAP_WRITE, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
682 if (err) {
683 print_error(err, "clEnqueueMapBuffer failed");
684 clReleaseMemObject(buffers[ii]);
685 clReleaseMemObject(buffers[ii+1]);
686 align_free( outptr[i] );
687 return -1;
688 }
689
690 memcpy(dataPtr, inptr[i], ptrSizes[i]*num_elements);
691
692 err = clEnqueueUnmapMemObject(queue, buffers[ii], dataPtr, 0, NULL, NULL);
693 if (err) {
694 print_error(err, "clEnqueueUnmapMemObject failed");
695 clReleaseMemObject(buffers[ii]);
696 clReleaseMemObject(buffers[ii+1]);
697 align_free( outptr[i] );
698 return -1;
699 }
700 }
701 else if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
702 err = clEnqueueWriteBuffer(queue, buffers[ii], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL);
703 if ( err != CL_SUCCESS ){
704 clReleaseMemObject(buffers[ii]);
705 clReleaseMemObject(buffers[ii+1]);
706 align_free( outptr[i] );
707 print_error( err, " clWriteBuffer failed" );
708 return -1;
709 }
710 }
711
712 err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
713 if ( err ){
714 clReleaseMemObject(buffers[ii]);
715 clReleaseMemObject(buffers[ii+1]);
716 align_free( outptr[i] );
717 log_error( " Error creating program for %s\n", type );
718 return -1;
719 }
720
721 #ifdef USE_LOCAL_WORK_GROUP
722 err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
723 test_error( err, "Unable to get work group size to use" );
724 #endif
725
726 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
727 err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
728 if ( err != CL_SUCCESS ){
729 clReleaseMemObject( buffers[ii] );
730 clReleaseMemObject( buffers[ii+1] );
731 clReleaseKernel( kernel[i] );
732 clReleaseProgram( program[i] );
733 align_free( outptr[i] );
734 print_error( err, " clSetKernelArg failed" );
735 return -1;
736 }
737
738 #ifdef USE_LOCAL_WORK_GROUP
739 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
740 #else
741 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
742 #endif
743 if ( err != CL_SUCCESS ){
744 print_error( err, " clEnqueueNDRangeKernel failed" );
745 clReleaseMemObject( buffers[ii] );
746 clReleaseMemObject( buffers[ii+1] );
747 clReleaseKernel( kernel[i] );
748 clReleaseProgram( program[i] );
749 align_free( outptr[i] );
750 return -1;
751 }
752
753 if ( ! strcmp( type, "half" ) ){
754 err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
755 }
756 else{
757 err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
758 }
759 if ( err != CL_SUCCESS ){
760 clReleaseMemObject( buffers[ii] );
761 clReleaseMemObject( buffers[ii+1] );
762 clReleaseKernel( kernel[i] );
763 clReleaseProgram( program[i] );
764 align_free( outptr[i] );
765 print_error( err, " clEnqueueReadBuffer failed" );
766 return -1;
767 }
768
769 if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
770 log_error( " %s%d test failed\n", type, 1<<i );
771 total_errors++;
772 }
773 else{
774 log_info( " %s%d test passed\n", type, 1<<i );
775 }
776 // cleanup
777 clReleaseMemObject( buffers[ii] );
778 clReleaseMemObject( buffers[ii+1] );
779 clReleaseKernel( kernel[i] );
780 clReleaseProgram( program[i] );
781 align_free( outptr[i] );
782 }
783 } // dst cl_mem_flag
784 } // src cl_mem_flag
785
786 return total_errors;
787
788 } // end test_buffer_write()
789
790
791
792
test_buffer_write_struct(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)793 int test_buffer_write_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
794 {
795 cl_mem buffers[10];
796 void *outptr[5];
797 TestStruct *inptr[5];
798 cl_program program[5];
799 cl_kernel kernel[5];
800 size_t ptrSizes[5];
801 size_t size = sizeof( TestStruct );
802 size_t global_work_size[3];
803 #ifdef USE_LOCAL_WORK_GROUP
804 size_t local_work_size[3];
805 #endif
806 cl_int err;
807 int i, ii;
808 cl_uint j;
809 int loops = 1; // no vector for structs
810 int src_flag_id, dst_flag_id;
811 int total_errors = 0;
812 MTdata d = init_genrand( gRandomSeed );
813
814 size_t min_alignment = get_min_alignment(context);
815
816 global_work_size[0] = (size_t)num_elements;
817
818 ptrSizes[0] = size;
819 ptrSizes[1] = ptrSizes[0] << 1;
820 ptrSizes[2] = ptrSizes[1] << 1;
821 ptrSizes[3] = ptrSizes[2] << 1;
822 ptrSizes[4] = ptrSizes[3] << 1;
823
824 for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
825 for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
826 log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
827
828 loops = ( loops < 5 ? loops : 5 );
829 for ( i = 0; i < loops; i++ ){
830
831 inptr[i] = (TestStruct *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
832
833 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ ){
834 inptr[i][j].a = (int)genrand_int32(d);
835 inptr[i][j].b = get_random_float( -FLT_MAX, FLT_MAX, d );
836 }
837
838 ii = i << 1;
839 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
840 buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, inptr[i], &err);
841 else
842 buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, NULL, &err);
843 if ( err ){
844 align_free( outptr[i] );
845 print_error(err, " clCreateBuffer failed\n" );
846 free_mtdata(d);
847 return -1;
848 }
849 outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
850 if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
851 buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id], ptrSizes[i] * num_elements, outptr[i], &err);
852 else
853 buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id], ptrSizes[i] * num_elements, NULL, &err);
854 if ( ! buffers[ii+1] || err){
855 clReleaseMemObject(buffers[ii]);
856 align_free( outptr[i] );
857 print_error(err, " clCreateBuffer failed\n" );
858 free_mtdata(d);
859 return -1;
860 }
861
862 if (gTestMap) {
863 void *dataPtr;
864 dataPtr = clEnqueueMapBuffer(queue, buffers[ii], CL_TRUE, CL_MAP_WRITE, 0, ptrSizes[i]*num_elements, 0, NULL, NULL, &err);
865 if (err) {
866 print_error(err, "clEnqueueMapBuffer failed");
867 clReleaseMemObject(buffers[ii]);
868 clReleaseMemObject(buffers[ii+1]);
869 align_free( outptr[i] );
870 free_mtdata(d);
871 return -1;
872 }
873
874 memcpy(dataPtr, inptr[i], ptrSizes[i]*num_elements);
875
876 err = clEnqueueUnmapMemObject(queue, buffers[ii], dataPtr, 0, NULL, NULL);
877 if (err) {
878 print_error(err, "clEnqueueUnmapMemObject failed");
879 clReleaseMemObject(buffers[ii]);
880 clReleaseMemObject(buffers[ii+1]);
881 align_free( outptr[i] );
882 free_mtdata(d);
883 return -1;
884 }
885 }
886 else if (!(flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) && !(flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR)) {
887 err = clEnqueueWriteBuffer(queue, buffers[ii], CL_TRUE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, NULL);
888 if ( err != CL_SUCCESS ){
889 clReleaseMemObject(buffers[ii]);
890 clReleaseMemObject(buffers[ii+1]);
891 align_free( outptr[i] );
892 print_error( err, " clWriteBuffer failed" );
893 free_mtdata(d);
894 return -1;
895 }
896 }
897
898 err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &struct_kernel_code, "read_write_struct" );
899 if ( err ){
900 clReleaseMemObject(buffers[ii]);
901 clReleaseMemObject(buffers[ii+1]);
902 align_free( outptr[i] );
903 log_error( " Error creating program for struct\n" );
904 free_mtdata(d);
905 return -1;
906 }
907
908 #ifdef USE_LOCAL_WORK_GROUP
909 err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
910 test_error( err, "Unable to get work group size to use" );
911 #endif
912
913 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
914 err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
915 if ( err != CL_SUCCESS ){
916 clReleaseMemObject( buffers[ii] );
917 clReleaseMemObject( buffers[ii+1] );
918 clReleaseKernel( kernel[i] );
919 clReleaseProgram( program[i] );
920 align_free( outptr[i] );
921 print_error( err, " clSetKernelArg failed" );
922 free_mtdata(d);
923 return -1;
924 }
925
926 #ifdef USE_LOCAL_WORK_GROUP
927 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
928 #else
929 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
930 #endif
931 if ( err != CL_SUCCESS ){
932 print_error( err, " clEnqueueNDRangeKernel failed" );
933 clReleaseMemObject( buffers[ii] );
934 clReleaseMemObject( buffers[ii+1] );
935 clReleaseKernel( kernel[i] );
936 clReleaseProgram( program[i] );
937 align_free( outptr[i] );
938 free_mtdata(d);
939 return -1;
940 }
941
942 err = clEnqueueReadBuffer( queue, buffers[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
943 if ( err != CL_SUCCESS ){
944 clReleaseMemObject( buffers[ii] );
945 clReleaseMemObject( buffers[ii+1] );
946 clReleaseKernel( kernel[i] );
947 clReleaseProgram( program[i] );
948 align_free( outptr[i] );
949 print_error( err, " clEnqueueReadBuffer failed" );
950 free_mtdata(d);
951 return -1;
952 }
953
954 if ( verify_write_struct( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
955 log_error( " buffer_WRITE struct%d test failed\n", 1<<i );
956 total_errors++;
957 }
958 else{
959 log_info( " buffer_WRITE struct%d test passed\n", 1<<i );
960 }
961 // cleanup
962 clReleaseMemObject( buffers[ii] );
963 clReleaseMemObject( buffers[ii+1] );
964 clReleaseKernel( kernel[i] );
965 clReleaseProgram( program[i] );
966 align_free( outptr[i] );
967 align_free( (void *)inptr[i] );
968 }
969 } // dst cl_mem_flag
970 } // src cl_mem_flag
971
972 free_mtdata(d);
973
974 return total_errors;
975
976 } // end test_buffer_struct_write()
977
978
test_buffer_write_array_async(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,size_t size,char * type,int loops,void * inptr[5],const char * kernelCode[],const char * kernelName[],int (* fn)(void *,void *,int))979 int test_buffer_write_array_async( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, size_t size, char *type, int loops,
980 void *inptr[5], const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int) )
981 {
982 cl_mem buffers[10];
983 void *outptr[5];
984 cl_program program[5];
985 cl_kernel kernel[5];
986 cl_event event[2];
987 size_t ptrSizes[5];
988 size_t global_work_size[3];
989 #ifdef USE_LOCAL_WORK_GROUP
990 size_t local_work_size[3];
991 #endif
992 cl_int err;
993 int i, ii;
994 int src_flag_id, dst_flag_id;
995 int total_errors = 0;
996
997 size_t min_alignment = get_min_alignment(context);
998
999 global_work_size[0] = (size_t)num_elements;
1000
1001 ptrSizes[0] = size;
1002 ptrSizes[1] = ptrSizes[0] << 1;
1003 ptrSizes[2] = ptrSizes[1] << 1;
1004 ptrSizes[3] = ptrSizes[2] << 1;
1005 ptrSizes[4] = ptrSizes[3] << 1;
1006
1007 for (src_flag_id=0; src_flag_id < NUM_FLAGS; src_flag_id++) {
1008 for (dst_flag_id=0; dst_flag_id < NUM_FLAGS; dst_flag_id++) {
1009 log_info("Testing with cl_mem_flags src: %s dst: %s\n", flag_set_names[src_flag_id], flag_set_names[dst_flag_id]);
1010
1011 loops = ( loops < 5 ? loops : 5 );
1012 for ( i = 0; i < loops; i++ ){
1013 ii = i << 1;
1014 if ((flag_set[src_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[src_flag_id] & CL_MEM_COPY_HOST_PTR))
1015 buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, inptr[i], &err);
1016 else
1017 buffers[ii] = clCreateBuffer(context, flag_set[src_flag_id], ptrSizes[i] * num_elements, NULL, &err);
1018 if ( !buffers[ii] || err){
1019 print_error(err, "clCreateBuffer failed\n" );
1020 return -1;
1021 }
1022
1023 outptr[i] = align_malloc( ptrSizes[i] * num_elements, min_alignment);
1024 if ((flag_set[dst_flag_id] & CL_MEM_USE_HOST_PTR) || (flag_set[dst_flag_id] & CL_MEM_COPY_HOST_PTR))
1025 buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id], ptrSizes[i] * num_elements, outptr[i], &err);
1026 else
1027 buffers[ii+1] = clCreateBuffer(context, flag_set[dst_flag_id], ptrSizes[i] * num_elements, NULL, &err);
1028 if ( !buffers[ii+1] || err){
1029 print_error(err, "clCreateBuffer failed\n" );
1030 return -1;
1031 }
1032
1033 err = clEnqueueWriteBuffer(queue, buffers[ii], CL_FALSE, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &(event[0]));
1034 if ( err != CL_SUCCESS ){
1035 print_error( err, "clEnqueueWriteBuffer failed" );
1036 return -1;
1037 }
1038
1039 err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
1040 if ( err ){
1041 log_error( " Error creating program for %s\n", type );
1042 clReleaseMemObject( buffers[ii] );
1043 clReleaseMemObject( buffers[ii+1] );
1044 align_free( outptr[i] );
1045 return -1;
1046 }
1047
1048 #ifdef USE_LOCAL_WORK_GROUP
1049 err = get_max_common_work_group_size( context, kernel[i], global_work_size[0], &local_work_size[0] );
1050 test_error( err, "Unable to get work group size to use" );
1051 #endif
1052
1053 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&buffers[ii] );
1054 err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&buffers[ii+1] );
1055 if ( err != CL_SUCCESS ){
1056 print_error( err, "clSetKernelArg failed" );
1057 clReleaseKernel( kernel[i] );
1058 clReleaseProgram( program[i] );
1059 clReleaseMemObject( buffers[ii] );
1060 clReleaseMemObject( buffers[ii+1] );
1061 align_free( outptr[i] );
1062 return -1;
1063 }
1064
1065 err = clWaitForEvents( 1, &(event[0]) );
1066 if ( err != CL_SUCCESS ){
1067 print_error( err, "clWaitForEvents() failed" );
1068 clReleaseKernel( kernel[i] );
1069 clReleaseProgram( program[i] );
1070 clReleaseMemObject( buffers[ii] );
1071 clReleaseMemObject( buffers[ii+1] );
1072 align_free( outptr[i] );
1073 return -1;
1074 }
1075
1076 #ifdef USE_LOCAL_WORK_GROUP
1077 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, local_work_size, 0, NULL, NULL );
1078 #else
1079 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, global_work_size, NULL, 0, NULL, NULL );
1080 #endif
1081 if (err != CL_SUCCESS){
1082 print_error( err, "clEnqueueNDRangeKernel failed" );
1083 return -1;
1084 }
1085
1086 err = clEnqueueReadBuffer( queue, buffers[ii+1], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &(event[1]) );
1087 if (err != CL_SUCCESS){
1088 print_error( err, "clEnqueueReadBuffer failed" );
1089 return -1;
1090 }
1091
1092 err = clWaitForEvents( 1, &(event[1]) );
1093 if ( err != CL_SUCCESS ){
1094 print_error( err, "clWaitForEvents() failed" );
1095 }
1096
1097 if ( fn( inptr[i], outptr[i], (int)(ptrSizes[i] * (size_t)num_elements / ptrSizes[0]) ) ){
1098 log_error( " %s%d test failed\n", type, 1<<i );
1099 total_errors++;
1100 }
1101 else{
1102 log_info( " %s%d test passed\n", type, 1<<i );
1103 }
1104
1105 // cleanup
1106 clReleaseEvent( event[0] );
1107 clReleaseEvent( event[1] );
1108 clReleaseMemObject( buffers[ii] );
1109 clReleaseMemObject( buffers[ii+1] );
1110 clReleaseKernel( kernel[i] );
1111 clReleaseProgram( program[i] );
1112 align_free( outptr[i] );
1113 }
1114 } // dst cl_mem_flag
1115 } // src cl_mem_flag
1116
1117 return total_errors;
1118
1119 } // end test_buffer_write_array_async()
1120
1121
test_buffer_write_int(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1122 int test_buffer_write_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1123 {
1124 int *inptr[5];
1125 size_t ptrSizes[5];
1126 int i, err;
1127 cl_uint j;
1128 int (*foo)(void *,void *,int);
1129 MTdata d = init_genrand( gRandomSeed );
1130
1131 size_t min_alignment = get_min_alignment(context);
1132
1133 foo = verify_write_int;
1134
1135 ptrSizes[0] = sizeof(cl_int);
1136 ptrSizes[1] = ptrSizes[0] << 1;
1137 ptrSizes[2] = ptrSizes[1] << 1;
1138 ptrSizes[3] = ptrSizes[2] << 1;
1139 ptrSizes[4] = ptrSizes[3] << 1;
1140
1141 for ( i = 0; i < 5; i++ ){
1142 inptr[i] = (int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1143
1144 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1145 inptr[i][j] = (int)genrand_int32(d);
1146 }
1147
1148 err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_int ), (char*)"int", 5, (void**)inptr,
1149 buffer_write_int_kernel_code, int_kernel_name, foo, d );
1150
1151 for ( i = 0; i < 5; i++ ){
1152 align_free( (void *)inptr[i] );
1153 }
1154 free_mtdata(d);
1155
1156 return err;
1157
1158 } // end test_buffer_int_write()
1159
1160
test_buffer_write_uint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1161 int test_buffer_write_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1162 {
1163 cl_uint *inptr[5];
1164 size_t ptrSizes[5];
1165 int i, err;
1166 cl_uint j;
1167 MTdata d = init_genrand( gRandomSeed );
1168 int (*foo)(void *,void *,int);
1169
1170 size_t min_alignment = get_min_alignment(context);
1171
1172 foo = verify_write_uint;
1173
1174 ptrSizes[0] = sizeof(cl_uint);
1175 ptrSizes[1] = ptrSizes[0] << 1;
1176 ptrSizes[2] = ptrSizes[1] << 1;
1177 ptrSizes[3] = ptrSizes[2] << 1;
1178 ptrSizes[4] = ptrSizes[3] << 1;
1179
1180 for ( i = 0; i < 5; i++ ){
1181 inptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1182
1183 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1184 inptr[i][j] = genrand_int32(d);
1185 }
1186
1187 err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_uint ), (char*)"uint", 5, (void**)inptr,
1188 buffer_write_uint_kernel_code, uint_kernel_name, foo, d );
1189
1190 for ( i = 0; i < 5; i++ ){
1191 align_free( (void *)inptr[i] );
1192 }
1193
1194 free_mtdata(d);
1195 return err;
1196
1197 } // end test_buffer_uint_write()
1198
1199
test_buffer_write_short(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1200 int test_buffer_write_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1201 {
1202 short *inptr[5];
1203 size_t ptrSizes[5];
1204 int i, err;
1205 cl_uint j;
1206 MTdata d = init_genrand( gRandomSeed );
1207 int (*foo)(void *,void *,int);
1208
1209 size_t min_alignment = get_min_alignment(context);
1210
1211 foo = verify_write_short;
1212
1213 ptrSizes[0] = sizeof(cl_short);
1214 ptrSizes[1] = ptrSizes[0] << 1;
1215 ptrSizes[2] = ptrSizes[1] << 1;
1216 ptrSizes[3] = ptrSizes[2] << 1;
1217 ptrSizes[4] = ptrSizes[3] << 1;
1218
1219 for ( i = 0; i < 5; i++ ){
1220 inptr[i] = (cl_short *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1221
1222 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1223 inptr[i][j] = (cl_short)genrand_int32(d);
1224 }
1225
1226 err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_short ), (char*)"short", 5, (void**)inptr,
1227 buffer_write_short_kernel_code, short_kernel_name, foo, d );
1228
1229 for ( i = 0; i < 5; i++ ){
1230 align_free( (void *)inptr[i] );
1231
1232 }
1233
1234 free_mtdata(d);
1235 return err;
1236
1237 } // end test_buffer_short_write()
1238
1239
test_buffer_write_ushort(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1240 int test_buffer_write_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1241 {
1242 cl_ushort *inptr[5];
1243 size_t ptrSizes[5];
1244 int i, err;
1245 cl_uint j;
1246 MTdata d = init_genrand( gRandomSeed );
1247 int (*foo)(void *,void *,int);
1248
1249 size_t min_alignment = get_min_alignment(context);
1250
1251 foo = verify_write_ushort;
1252
1253 ptrSizes[0] = sizeof(cl_ushort);
1254 ptrSizes[1] = ptrSizes[0] << 1;
1255 ptrSizes[2] = ptrSizes[1] << 1;
1256 ptrSizes[3] = ptrSizes[2] << 1;
1257 ptrSizes[4] = ptrSizes[3] << 1;
1258
1259 for ( i = 0; i < 5; i++ ){
1260 inptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1261
1262 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1263 inptr[i][j] = (cl_ushort)genrand_int32(d);
1264 }
1265
1266 err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_ushort ), (char*)"ushort", 5, (void**)inptr,
1267 buffer_write_ushort_kernel_code, ushort_kernel_name, foo, d );
1268
1269 for ( i = 0; i < 5; i++ ){
1270 align_free( (void *)inptr[i] );
1271
1272 }
1273
1274 free_mtdata(d);
1275 return err;
1276
1277 } // end test_buffer_ushort_write()
1278
1279
test_buffer_write_char(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1280 int test_buffer_write_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1281 {
1282 char *inptr[5];
1283 size_t ptrSizes[5];
1284 int i, err;
1285 cl_uint j;
1286 MTdata d = init_genrand( gRandomSeed );
1287 int (*foo)(void *,void *,int);
1288
1289 size_t min_alignment = get_min_alignment(context);
1290
1291 foo = verify_write_char;
1292
1293 ptrSizes[0] = sizeof(cl_char);
1294 ptrSizes[1] = ptrSizes[0] << 1;
1295 ptrSizes[2] = ptrSizes[1] << 1;
1296 ptrSizes[3] = ptrSizes[2] << 1;
1297 ptrSizes[4] = ptrSizes[3] << 1;
1298
1299 for ( i = 0; i < 5; i++ ){
1300 inptr[i] = (char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1301
1302 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1303 inptr[i][j] = (char)genrand_int32(d);
1304 }
1305
1306 err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_char ), (char*)"char", 5, (void**)inptr,
1307 buffer_write_char_kernel_code, char_kernel_name, foo, d );
1308
1309 for ( i = 0; i < 5; i++ ){
1310 align_free( (void *)inptr[i] );
1311
1312 }
1313
1314 free_mtdata(d);
1315 return err;
1316
1317 } // end test_buffer_char_write()
1318
1319
test_buffer_write_uchar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1320 int test_buffer_write_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1321 {
1322 uchar *inptr[5];
1323 size_t ptrSizes[5];
1324 int i, err;
1325 cl_uint j;
1326 MTdata d = init_genrand( gRandomSeed );
1327 int (*foo)(void *,void *,int);
1328
1329 size_t min_alignment = get_min_alignment(context);
1330
1331 foo = verify_write_uchar;
1332
1333 ptrSizes[0] = sizeof(cl_uchar);
1334 ptrSizes[1] = ptrSizes[0] << 1;
1335 ptrSizes[2] = ptrSizes[1] << 1;
1336 ptrSizes[3] = ptrSizes[2] << 1;
1337 ptrSizes[4] = ptrSizes[3] << 1;
1338
1339 for ( i = 0; i < 5; i++ ){
1340 inptr[i] = (uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1341
1342 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1343 inptr[i][j] = (uchar)genrand_int32(d);
1344 }
1345
1346 err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_uchar ), (char*)"uchar", 5, (void**)inptr,
1347 buffer_write_uchar_kernel_code, uchar_kernel_name, foo, d );
1348
1349 for ( i = 0; i < 5; i++ ){
1350 align_free( (void *)inptr[i] );
1351
1352 }
1353
1354 free_mtdata(d);
1355 return err;
1356
1357 } // end test_buffer_uchar_write()
1358
1359
test_buffer_write_float(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1360 int test_buffer_write_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1361 {
1362 float *inptr[5];
1363 size_t ptrSizes[5];
1364 int i, err;
1365 cl_uint j;
1366 MTdata d = init_genrand( gRandomSeed );
1367 int (*foo)(void *,void *,int);
1368
1369 size_t min_alignment = get_min_alignment(context);
1370
1371 foo = verify_write_float;
1372
1373 ptrSizes[0] = sizeof(cl_float);
1374 ptrSizes[1] = ptrSizes[0] << 1;
1375 ptrSizes[2] = ptrSizes[1] << 1;
1376 ptrSizes[3] = ptrSizes[2] << 1;
1377 ptrSizes[4] = ptrSizes[3] << 1;
1378
1379 for ( i = 0; i < 5; i++ ){
1380 inptr[i] = (float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1381
1382 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1383 inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
1384 }
1385
1386 err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_float ), (char*)"float", 5, (void**)inptr,
1387 buffer_write_float_kernel_code, float_kernel_name, foo, d );
1388
1389 for ( i = 0; i < 5; i++ ){
1390 align_free( (void *)inptr[i] );
1391 }
1392
1393 free_mtdata(d);
1394 return err;
1395
1396 } // end test_buffer_float_write()
1397
1398
test_buffer_write_half(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1399 int test_buffer_write_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1400 {
1401 float *inptr[5];
1402 size_t ptrSizes[5];
1403 int i, err;
1404 cl_uint j;
1405 MTdata d = init_genrand( gRandomSeed );
1406 int (*foo)(void *,void *,int);
1407
1408 size_t min_alignment = get_min_alignment(context);
1409
1410 foo = verify_write_half;
1411
1412 ptrSizes[0] = sizeof( cl_float ) / 2;
1413 ptrSizes[1] = ptrSizes[0] << 1;
1414 ptrSizes[2] = ptrSizes[1] << 1;
1415 ptrSizes[3] = ptrSizes[2] << 1;
1416 ptrSizes[4] = ptrSizes[3] << 1;
1417
1418 for ( i = 0; i < 5; i++ ){
1419 inptr[i] = (float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1420
1421 for ( j = 0; j < ptrSizes[i] * num_elements / ( ptrSizes[0] * 2 ); j++ )
1422 inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
1423 }
1424
1425 err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_float ) / 2, (char*)"half", 5, (void**)inptr,
1426 buffer_write_half_kernel_code, half_kernel_name, foo, d );
1427
1428 for ( i = 0; i < 5; i++ ){
1429 align_free( (void *)inptr[i] );
1430 }
1431
1432 free_mtdata(d);
1433 return err;
1434
1435 } // end test_buffer_half_write()
1436
1437
test_buffer_write_long(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1438 int test_buffer_write_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1439 {
1440 cl_long *inptr[5];
1441 size_t ptrSizes[5];
1442 int i, err;
1443 cl_uint j;
1444 MTdata d = init_genrand( gRandomSeed );
1445 int (*foo)(void *,void *,int);
1446
1447 size_t min_alignment = get_min_alignment(context);
1448
1449 foo = verify_write_long;
1450
1451 ptrSizes[0] = sizeof(cl_long);
1452 ptrSizes[1] = ptrSizes[0] << 1;
1453 ptrSizes[2] = ptrSizes[1] << 1;
1454 ptrSizes[3] = ptrSizes[2] << 1;
1455 ptrSizes[4] = ptrSizes[3] << 1;
1456
1457 //skip devices that don't support long
1458 if (! gHasLong )
1459 {
1460 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
1461 return CL_SUCCESS;
1462 }
1463
1464 for ( i = 0; i < 5; i++ ){
1465 inptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1466
1467 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1468 inptr[i][j] = (cl_long) genrand_int32(d) ^ ((cl_long) genrand_int32(d) << 32);
1469 }
1470
1471 err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_long ), (char*)"cl_long", 5, (void**)inptr,
1472 buffer_write_long_kernel_code, long_kernel_name, foo, d );
1473
1474 for ( i = 0; i < 5; i++ ){
1475 align_free( (void *)inptr[i] );
1476 }
1477
1478 free_mtdata(d);
1479 return err;
1480
1481 } // end test_buffer_long_write()
1482
1483
test_buffer_write_ulong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1484 int test_buffer_write_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1485 {
1486 cl_ulong *inptr[5];
1487 size_t ptrSizes[5];
1488 int i, err;
1489 cl_uint j;
1490 MTdata d = init_genrand( gRandomSeed );
1491 int (*foo)(void *,void *,int);
1492
1493 size_t min_alignment = get_min_alignment(context);
1494
1495 foo = verify_write_ulong;
1496
1497 ptrSizes[0] = sizeof(cl_ulong);
1498 ptrSizes[1] = ptrSizes[0] << 1;
1499 ptrSizes[2] = ptrSizes[1] << 1;
1500 ptrSizes[3] = ptrSizes[2] << 1;
1501 ptrSizes[4] = ptrSizes[3] << 1;
1502
1503 if (! gHasLong )
1504 {
1505 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
1506 return CL_SUCCESS;
1507 }
1508
1509 for ( i = 0; i < 5; i++ ){
1510 inptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1511
1512 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1513 inptr[i][j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
1514 }
1515
1516 err = test_buffer_write( deviceID, context, queue, num_elements, sizeof( cl_ulong ), (char*)"ulong long", 5, (void**)inptr,
1517 buffer_write_ulong_kernel_code, ulong_kernel_name, foo, d );
1518
1519 for ( i = 0; i < 5; i++ ){
1520 align_free( (void *)inptr[i] );
1521 }
1522
1523 free_mtdata(d);
1524
1525 return err;
1526
1527 } // end test_buffer_ulong_write()
1528
1529
test_buffer_map_write_int(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1530 int test_buffer_map_write_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1531 {
1532 gTestMap = 1;
1533 return test_buffer_write_int(deviceID, context, queue, num_elements);
1534 }
1535
test_buffer_map_write_uint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1536 int test_buffer_map_write_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1537 {
1538 gTestMap = 1;
1539 return test_buffer_write_uint(deviceID, context, queue, num_elements);
1540 }
1541
test_buffer_map_write_long(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1542 int test_buffer_map_write_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1543 {
1544 gTestMap = 1;
1545 return test_buffer_write_long(deviceID, context, queue, num_elements);
1546 }
1547
test_buffer_map_write_ulong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1548 int test_buffer_map_write_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1549 {
1550 gTestMap = 1;
1551 return test_buffer_write_ulong(deviceID, context, queue, num_elements);
1552 }
1553
test_buffer_map_write_short(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1554 int test_buffer_map_write_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1555 {
1556 gTestMap = 1;
1557 return test_buffer_write_short(deviceID, context, queue, num_elements);
1558 }
1559
test_buffer_map_write_ushort(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1560 int test_buffer_map_write_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1561 {
1562 gTestMap = 1;
1563 return test_buffer_write_ushort(deviceID, context, queue, num_elements);
1564 }
1565
test_buffer_map_write_char(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1566 int test_buffer_map_write_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1567 {
1568 gTestMap = 1;
1569 return test_buffer_write_char(deviceID, context, queue, num_elements);
1570 }
1571
test_buffer_map_write_uchar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1572 int test_buffer_map_write_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1573 {
1574 gTestMap = 1;
1575 return test_buffer_write_uchar(deviceID, context, queue, num_elements);
1576 }
1577
test_buffer_map_write_float(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1578 int test_buffer_map_write_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1579 {
1580 gTestMap = 1;
1581 return test_buffer_write_float(deviceID, context, queue, num_elements);
1582 }
1583
test_buffer_map_write_struct(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1584 int test_buffer_map_write_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1585 {
1586 gTestMap = 1;
1587 return test_buffer_write_struct(deviceID, context, queue, num_elements);
1588 }
1589
1590
test_buffer_write_async_int(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1591 int test_buffer_write_async_int( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1592 {
1593 int *inptr[5];
1594 size_t ptrSizes[5];
1595 int i, err;
1596 cl_uint j;
1597 MTdata d = init_genrand( gRandomSeed );
1598 int (*foo)(void *,void *,int);
1599
1600 size_t min_alignment = get_min_alignment(context);
1601
1602 foo = verify_write_int;
1603
1604 ptrSizes[0] = sizeof(cl_int);
1605 ptrSizes[1] = ptrSizes[0] << 1;
1606 ptrSizes[2] = ptrSizes[1] << 1;
1607 ptrSizes[3] = ptrSizes[2] << 1;
1608 ptrSizes[4] = ptrSizes[3] << 1;
1609
1610 for ( i = 0; i < 5; i++ ){
1611 inptr[i] = (int *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1612
1613 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1614 inptr[i][j] = (int)genrand_int32(d);
1615 }
1616
1617 err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_int ), (char*)"int", 5, (void**)inptr,
1618 buffer_write_int_kernel_code, int_kernel_name, foo );
1619
1620 for ( i = 0; i < 5; i++ ){
1621 align_free( (void *)inptr[i] );
1622 }
1623
1624 free_mtdata(d);
1625 return err;
1626
1627 } // end test_buffer_int_write_array_async()
1628
1629
test_buffer_write_async_uint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1630 int test_buffer_write_async_uint( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1631 {
1632 cl_uint *inptr[5];
1633 size_t ptrSizes[5];
1634 int i, err;
1635 cl_uint j;
1636 MTdata d = init_genrand( gRandomSeed );
1637 int (*foo)(void *,void *,int);
1638
1639 size_t min_alignment = get_min_alignment(context);
1640
1641 foo = verify_write_uint;
1642
1643 ptrSizes[0] = sizeof(cl_uint);
1644 ptrSizes[1] = ptrSizes[0] << 1;
1645 ptrSizes[2] = ptrSizes[1] << 1;
1646 ptrSizes[3] = ptrSizes[2] << 1;
1647 ptrSizes[4] = ptrSizes[3] << 1;
1648
1649 for ( i = 0; i < 5; i++ ){
1650 inptr[i] = (cl_uint *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1651
1652 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1653 inptr[i][j] = (cl_uint)genrand_int32(d);
1654 }
1655
1656 err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_uint ), (char*)"uint", 5, (void**)inptr,
1657 buffer_write_uint_kernel_code, uint_kernel_name, foo );
1658
1659 for ( i = 0; i < 5; i++ ){
1660 align_free( (void *)inptr[i] );
1661 }
1662
1663 free_mtdata(d);
1664 return err;
1665
1666 } // end test_buffer_uint_write_array_async()
1667
1668
test_buffer_write_async_short(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1669 int test_buffer_write_async_short( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1670 {
1671 short *inptr[5];
1672 size_t ptrSizes[5];
1673 int i, err;
1674 cl_uint j;
1675 MTdata d = init_genrand( gRandomSeed );
1676 int (*foo)(void *,void *,int);
1677
1678 size_t min_alignment = get_min_alignment(context);
1679
1680 foo = verify_write_short;
1681
1682 ptrSizes[0] = sizeof(cl_short);
1683 ptrSizes[1] = ptrSizes[0] << 1;
1684 ptrSizes[2] = ptrSizes[1] << 1;
1685 ptrSizes[3] = ptrSizes[2] << 1;
1686 ptrSizes[4] = ptrSizes[3] << 1;
1687
1688 for ( i = 0; i < 5; i++ ){
1689 inptr[i] = (short *)align_malloc(ptrSizes[i] * num_elements + min_alignment, min_alignment);
1690
1691 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1692 inptr[i][j] = (short)genrand_int32(d);
1693 }
1694
1695 err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_short ), (char*)"short", 5, (void**)inptr,
1696 buffer_write_short_kernel_code, short_kernel_name, foo );
1697
1698 for ( i = 0; i < 5; i++ ){
1699 align_free( (void *)inptr[i] );
1700
1701 }
1702
1703 free_mtdata(d);
1704 return err;
1705
1706 } // end test_buffer_short_write_array_async()
1707
1708
test_buffer_write_async_ushort(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1709 int test_buffer_write_async_ushort( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1710 {
1711 cl_ushort *inptr[5];
1712 size_t ptrSizes[5];
1713 int i, err;
1714 cl_uint j;
1715 MTdata d = init_genrand( gRandomSeed );
1716 int (*foo)(void *,void *,int);
1717
1718 size_t min_alignment = get_min_alignment(context);
1719
1720 foo = verify_write_ushort;
1721
1722 ptrSizes[0] = sizeof(cl_ushort);
1723 ptrSizes[1] = ptrSizes[0] << 1;
1724 ptrSizes[2] = ptrSizes[1] << 1;
1725 ptrSizes[3] = ptrSizes[2] << 1;
1726 ptrSizes[4] = ptrSizes[3] << 1;
1727
1728 for ( i = 0; i < 5; i++ ){
1729 inptr[i] = (cl_ushort *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1730
1731 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1732 inptr[i][j] = (cl_ushort)genrand_int32(d);
1733 }
1734
1735 err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_ushort ), (char*)"ushort", 5, (void**)inptr,
1736 buffer_write_ushort_kernel_code, ushort_kernel_name, foo );
1737
1738 for ( i = 0; i < 5; i++ ){
1739 align_free( (void *)inptr[i] );
1740
1741 }
1742
1743 free_mtdata(d);
1744 return err;
1745
1746 } // end test_buffer_ushort_write_array_async()
1747
1748
test_buffer_write_async_char(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1749 int test_buffer_write_async_char( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1750 {
1751 char *inptr[5];
1752 size_t ptrSizes[5];
1753 int i, err;
1754 cl_uint j;
1755 MTdata d = init_genrand( gRandomSeed );
1756 int (*foo)(void *,void *,int);
1757
1758 size_t min_alignment = get_min_alignment(context);
1759
1760 foo = verify_write_char;
1761
1762 ptrSizes[0] = sizeof(cl_char);
1763 ptrSizes[1] = ptrSizes[0] << 1;
1764 ptrSizes[2] = ptrSizes[1] << 1;
1765 ptrSizes[3] = ptrSizes[2] << 1;
1766 ptrSizes[4] = ptrSizes[3] << 1;
1767
1768 for ( i = 0; i < 5; i++ ){
1769 inptr[i] = (char *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1770
1771 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1772 inptr[i][j] = (char)genrand_int32(d);
1773 }
1774
1775 err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_char ), (char*)"char", 5, (void**)inptr,
1776 buffer_write_char_kernel_code, char_kernel_name, foo );
1777
1778 for ( i = 0; i < 5; i++ ){
1779 align_free( (void *)inptr[i] );
1780
1781 }
1782
1783 free_mtdata(d);
1784 return err;
1785
1786 } // end test_buffer_char_write_array_async()
1787
1788
test_buffer_write_async_uchar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1789 int test_buffer_write_async_uchar( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1790 {
1791 uchar *inptr[5];
1792 size_t ptrSizes[5];
1793 int i, err;
1794 cl_uint j;
1795 MTdata d = init_genrand( gRandomSeed );
1796 int (*foo)(void *,void *,int);
1797
1798 size_t min_alignment = get_min_alignment(context);
1799
1800 foo = verify_write_uchar;
1801
1802 ptrSizes[0] = sizeof(cl_uchar);
1803 ptrSizes[1] = ptrSizes[0] << 1;
1804 ptrSizes[2] = ptrSizes[1] << 1;
1805 ptrSizes[3] = ptrSizes[2] << 1;
1806 ptrSizes[4] = ptrSizes[3] << 1;
1807
1808 for ( i = 0; i < 5; i++ ){
1809 inptr[i] = (uchar *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1810
1811 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1812 inptr[i][j] = (uchar)genrand_int32(d);
1813 }
1814
1815 err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_uchar ), (char*)"uchar", 5, (void**)inptr,
1816 buffer_write_uchar_kernel_code, uchar_kernel_name, foo );
1817
1818 for ( i = 0; i < 5; i++ ){
1819 align_free( (void *)inptr[i] );
1820
1821 }
1822
1823 free_mtdata(d);
1824 return err;
1825
1826 } // end test_buffer_uchar_write_array_async()
1827
1828
test_buffer_write_async_float(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1829 int test_buffer_write_async_float( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1830 {
1831 float *inptr[5];
1832 size_t ptrSizes[5];
1833 int i, err;
1834 cl_uint j;
1835 MTdata d = init_genrand( gRandomSeed );
1836 int (*foo)(void *,void *,int);
1837
1838 size_t min_alignment = get_min_alignment(context);
1839
1840 foo = verify_write_float;
1841
1842 ptrSizes[0] = sizeof(cl_float);
1843 ptrSizes[1] = ptrSizes[0] << 1;
1844 ptrSizes[2] = ptrSizes[1] << 1;
1845 ptrSizes[3] = ptrSizes[2] << 1;
1846 ptrSizes[4] = ptrSizes[3] << 1;
1847
1848 for ( i = 0; i < 5; i++ ){
1849 inptr[i] = (float *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1850
1851 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1852 inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
1853 }
1854
1855 err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_float ), (char*)"float", 5, (void**)inptr,
1856 buffer_write_float_kernel_code, float_kernel_name, foo );
1857
1858 for ( i = 0; i < 5; i++ ){
1859 align_free( (void *)inptr[i] );
1860 }
1861
1862 free_mtdata(d);
1863 return err;
1864
1865 } // end test_buffer_float_write_array_async()
1866
1867
test_buffer_write_async_long(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1868 int test_buffer_write_async_long( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1869 {
1870 cl_long *inptr[5];
1871 size_t ptrSizes[5];
1872 int i, err;
1873 cl_uint j;
1874 MTdata d = init_genrand( gRandomSeed );
1875 int (*foo)(void *,void *,int);
1876
1877 size_t min_alignment = get_min_alignment(context);
1878
1879 foo = verify_write_long;
1880
1881 ptrSizes[0] = sizeof(cl_long);
1882 ptrSizes[1] = ptrSizes[0] << 1;
1883 ptrSizes[2] = ptrSizes[1] << 1;
1884 ptrSizes[3] = ptrSizes[2] << 1;
1885 ptrSizes[4] = ptrSizes[3] << 1;
1886
1887 if (! gHasLong )
1888 {
1889 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
1890 return CL_SUCCESS;
1891 }
1892
1893 for ( i = 0; i < 5; i++ ){
1894 inptr[i] = (cl_long *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1895
1896 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1897 inptr[i][j] = ((cl_long) genrand_int32(d)) ^ ((cl_long) genrand_int32(d) << 32);
1898 }
1899
1900 err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_long ), (char*)"cl_long", 5, (void**)inptr,
1901 buffer_write_long_kernel_code, long_kernel_name, foo );
1902
1903 for ( i = 0; i < 5; i++ ){
1904 align_free( (void *)inptr[i] );
1905 }
1906
1907 free_mtdata(d);
1908 return err;
1909
1910 } // end test_buffer_long_write_array_async()
1911
1912
test_buffer_write_async_ulong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1913 int test_buffer_write_async_ulong( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
1914 {
1915 cl_ulong *inptr[5];
1916 size_t ptrSizes[5];
1917 int i, err;
1918 cl_uint j;
1919 MTdata d = init_genrand( gRandomSeed );
1920 int (*foo)(void *,void *,int);
1921
1922 size_t min_alignment = get_min_alignment(context);
1923
1924 foo = verify_write_ulong;
1925
1926 ptrSizes[0] = sizeof(cl_ulong);
1927 ptrSizes[1] = ptrSizes[0] << 1;
1928 ptrSizes[2] = ptrSizes[1] << 1;
1929 ptrSizes[3] = ptrSizes[2] << 1;
1930 ptrSizes[4] = ptrSizes[3] << 1;
1931
1932 if (! gHasLong )
1933 {
1934 log_info( "Device does not support 64-bit integers. Skipping test.\n" );
1935 return CL_SUCCESS;
1936 }
1937
1938 for ( i = 0; i < 5; i++ ){
1939 inptr[i] = (cl_ulong *)align_malloc(ptrSizes[i] * num_elements, min_alignment);
1940
1941 for ( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1942 inptr[i][j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
1943 }
1944
1945 err = test_buffer_write_array_async( deviceID, context, queue, num_elements, sizeof( cl_ulong ), (char*)"ulong long", 5, (void**)inptr,
1946 buffer_write_ulong_kernel_code, ulong_kernel_name, foo );
1947
1948 for ( i = 0; i < 5; i++ ){
1949 align_free( (void *)inptr[i] );
1950 }
1951
1952 free_mtdata(d);
1953 return err;
1954
1955 } // end test_buffer_ulong_write_array_async()
1956
1957