1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24 #include "harness/testHarness.h"
25 #include "harness/errorHelpers.h"
26 #include "harness/conversions.h"
27
28 //#define USE_LOCAL_THREADS 1
29
30 #ifndef uchar
31 typedef unsigned char uchar;
32 #endif
33
34 #ifndef TestStruct
35 typedef struct{
36 int a;
37 float b;
38 } TestStruct;
39 #endif
40
41 const char *stream_write_int_kernel_code[] = {
42 "__kernel void test_stream_write_int(__global int *src, __global int *dst)\n"
43 "{\n"
44 " int tid = get_global_id(0);\n"
45 "\n"
46 " dst[tid] = src[tid];\n"
47 "}\n",
48
49 "__kernel void test_stream_write_int2(__global int2 *src, __global int2 *dst)\n"
50 "{\n"
51 " int tid = get_global_id(0);\n"
52 "\n"
53 " dst[tid] = src[tid];\n"
54 "}\n",
55
56 "__kernel void test_stream_write_int4(__global int4 *src, __global int4 *dst)\n"
57 "{\n"
58 " int tid = get_global_id(0);\n"
59 "\n"
60 " dst[tid] = src[tid];\n"
61 "}\n",
62
63 "__kernel void test_stream_write_int8(__global int8 *src, __global int8 *dst)\n"
64 "{\n"
65 " int tid = get_global_id(0);\n"
66 "\n"
67 " dst[tid] = src[tid];\n"
68 "}\n",
69
70 "__kernel void test_stream_write_int16(__global int16 *src, __global int16 *dst)\n"
71 "{\n"
72 " int tid = get_global_id(0);\n"
73 "\n"
74 " dst[tid] = src[tid];\n"
75 "}\n" };
76
77 static const char *int_kernel_name[] = { "test_stream_write_int", "test_stream_write_int2", "test_stream_write_int4", "test_stream_write_int8", "test_stream_write_int16" };
78
79
80 const char *stream_write_uint_kernel_code[] = {
81 "__kernel void test_stream_write_uint(__global uint *src, __global uint *dst)\n"
82 "{\n"
83 " int tid = get_global_id(0);\n"
84 "\n"
85 " dst[tid] = src[tid];\n"
86 "}\n",
87
88 "__kernel void test_stream_write_uint2(__global uint2 *src, __global uint2 *dst)\n"
89 "{\n"
90 " int tid = get_global_id(0);\n"
91 "\n"
92 " dst[tid] = src[tid];\n"
93 "}\n",
94
95 "__kernel void test_stream_write_uint4(__global uint4 *src, __global uint4 *dst)\n"
96 "{\n"
97 " int tid = get_global_id(0);\n"
98 "\n"
99 " dst[tid] = src[tid];\n"
100 "}\n",
101
102 "__kernel void test_stream_write_uint8(__global uint8 *src, __global uint8 *dst)\n"
103 "{\n"
104 " int tid = get_global_id(0);\n"
105 "\n"
106 " dst[tid] = src[tid];\n"
107 "}\n",
108
109 "__kernel void test_stream_write_uint16(__global uint16 *src, __global uint16 *dst)\n"
110 "{\n"
111 " int tid = get_global_id(0);\n"
112 "\n"
113 " dst[tid] = src[tid];\n"
114 "}\n" };
115
116 static const char *uint_kernel_name[] = { "test_stream_write_uint", "test_stream_write_uint2", "test_stream_write_uint4", "test_stream_write_uint8", "test_stream_write_uint16" };
117
118
119 const char *stream_write_ushort_kernel_code[] = {
120 "__kernel void test_stream_write_ushort(__global ushort *src, __global ushort *dst)\n"
121 "{\n"
122 " int tid = get_global_id(0);\n"
123 "\n"
124 " dst[tid] = src[tid];\n"
125 "}\n",
126
127 "__kernel void test_stream_write_ushort2(__global ushort2 *src, __global ushort2 *dst)\n"
128 "{\n"
129 " int tid = get_global_id(0);\n"
130 "\n"
131 " dst[tid] = src[tid];\n"
132 "}\n",
133
134 "__kernel void test_stream_write_ushort4(__global ushort4 *src, __global ushort4 *dst)\n"
135 "{\n"
136 " int tid = get_global_id(0);\n"
137 "\n"
138 " dst[tid] = src[tid];\n"
139 "}\n",
140
141 "__kernel void test_stream_write_ushort8(__global ushort8 *src, __global ushort8 *dst)\n"
142 "{\n"
143 " int tid = get_global_id(0);\n"
144 "\n"
145 " dst[tid] = src[tid];\n"
146 "}\n",
147
148 "__kernel void test_stream_write_ushort16(__global ushort16 *src, __global ushort16 *dst)\n"
149 "{\n"
150 " int tid = get_global_id(0);\n"
151 "\n"
152 " dst[tid] = src[tid];\n"
153 "}\n" };
154
155 static const char *ushort_kernel_name[] = { "test_stream_write_ushort", "test_stream_write_ushort2", "test_stream_write_ushort4", "test_stream_write_ushort8", "test_stream_write_ushort16" };
156
157
158
159 const char *stream_write_short_kernel_code[] = {
160 "__kernel void test_stream_write_short(__global short *src, __global short *dst)\n"
161 "{\n"
162 " int tid = get_global_id(0);\n"
163 "\n"
164 " dst[tid] = src[tid];\n"
165 "}\n",
166
167 "__kernel void test_stream_write_short2(__global short2 *src, __global short2 *dst)\n"
168 "{\n"
169 " int tid = get_global_id(0);\n"
170 "\n"
171 " dst[tid] = src[tid];\n"
172 "}\n",
173
174 "__kernel void test_stream_write_short4(__global short4 *src, __global short4 *dst)\n"
175 "{\n"
176 " int tid = get_global_id(0);\n"
177 "\n"
178 " dst[tid] = src[tid];\n"
179 "}\n",
180
181 "__kernel void test_stream_write_short8(__global short8 *src, __global short8 *dst)\n"
182 "{\n"
183 " int tid = get_global_id(0);\n"
184 "\n"
185 " dst[tid] = src[tid];\n"
186 "}\n",
187
188 "__kernel void test_stream_write_short16(__global short16 *src, __global short16 *dst)\n"
189 "{\n"
190 " int tid = get_global_id(0);\n"
191 "\n"
192 " dst[tid] = src[tid];\n"
193 "}\n" };
194
195 static const char *short_kernel_name[] = { "test_stream_write_short", "test_stream_write_short2", "test_stream_write_short4", "test_stream_write_short8", "test_stream_write_short16" };
196
197
198 const char *stream_write_char_kernel_code[] = {
199 "__kernel void test_stream_write_char(__global char *src, __global char *dst)\n"
200 "{\n"
201 " int tid = get_global_id(0);\n"
202 "\n"
203 " dst[tid] = src[tid];\n"
204 "}\n",
205
206 "__kernel void test_stream_write_char2(__global char2 *src, __global char2 *dst)\n"
207 "{\n"
208 " int tid = get_global_id(0);\n"
209 "\n"
210 " dst[tid] = src[tid];\n"
211 "}\n",
212
213 "__kernel void test_stream_write_char4(__global char4 *src, __global char4 *dst)\n"
214 "{\n"
215 " int tid = get_global_id(0);\n"
216 "\n"
217 " dst[tid] = src[tid];\n"
218 "}\n",
219
220 "__kernel void test_stream_write_char8(__global char8 *src, __global char8 *dst)\n"
221 "{\n"
222 " int tid = get_global_id(0);\n"
223 "\n"
224 " dst[tid] = src[tid];\n"
225 "}\n",
226
227 "__kernel void test_stream_write_char16(__global char16 *src, __global char16 *dst)\n"
228 "{\n"
229 " int tid = get_global_id(0);\n"
230 "\n"
231 " dst[tid] = src[tid];\n"
232 "}\n" };
233
234 static const char *char_kernel_name[] = { "test_stream_write_char", "test_stream_write_char2", "test_stream_write_char4", "test_stream_write_char8", "test_stream_write_char16" };
235
236
237 const char *stream_write_uchar_kernel_code[] = {
238 "__kernel void test_stream_write_uchar(__global uchar *src, __global uchar *dst)\n"
239 "{\n"
240 " int tid = get_global_id(0);\n"
241 "\n"
242 " dst[tid] = src[tid];\n"
243 "}\n",
244
245 "__kernel void test_stream_write_uchar2(__global uchar2 *src, __global uchar2 *dst)\n"
246 "{\n"
247 " int tid = get_global_id(0);\n"
248 "\n"
249 " dst[tid] = src[tid];\n"
250 "}\n",
251
252 "__kernel void test_stream_write_uchar4(__global uchar4 *src, __global uchar4 *dst)\n"
253 "{\n"
254 " int tid = get_global_id(0);\n"
255 "\n"
256 " dst[tid] = src[tid];\n"
257 "}\n",
258
259 "__kernel void test_stream_write_uchar8(__global uchar8 *src, __global uchar8 *dst)\n"
260 "{\n"
261 " int tid = get_global_id(0);\n"
262 "\n"
263 " dst[tid] = src[tid];\n"
264 "}\n",
265
266 "__kernel void test_stream_write_uchar16(__global uchar16 *src, __global uchar16 *dst)\n"
267 "{\n"
268 " int tid = get_global_id(0);\n"
269 "\n"
270 " dst[tid] = src[tid];\n"
271 "}\n" };
272
273 static const char *uchar_kernel_name[] = { "test_stream_write_uchar", "test_stream_write_uchar2", "test_stream_write_uchar4", "test_stream_write_uchar8", "test_stream_write_uchar16" };
274
275
276 const char *stream_write_float_kernel_code[] = {
277 "__kernel void test_stream_write_float(__global float *src, __global float *dst)\n"
278 "{\n"
279 " int tid = get_global_id(0);\n"
280 "\n"
281 " dst[tid] = src[tid];\n"
282 "}\n",
283
284 "__kernel void test_stream_write_float2(__global float2 *src, __global float2 *dst)\n"
285 "{\n"
286 " int tid = get_global_id(0);\n"
287 "\n"
288 " dst[tid] = src[tid];\n"
289 "}\n",
290
291 "__kernel void test_stream_write_float4(__global float4 *src, __global float4 *dst)\n"
292 "{\n"
293 " int tid = get_global_id(0);\n"
294 "\n"
295 " dst[tid] = src[tid];\n"
296 "}\n",
297
298 "__kernel void test_stream_write_float8(__global float8 *src, __global float8 *dst)\n"
299 "{\n"
300 " int tid = get_global_id(0);\n"
301 "\n"
302 " dst[tid] = src[tid];\n"
303 "}\n",
304
305 "__kernel void test_stream_write_float16(__global float16 *src, __global float16 *dst)\n"
306 "{\n"
307 " int tid = get_global_id(0);\n"
308 "\n"
309 " dst[tid] = src[tid];\n"
310 "}\n" };
311
312 static const char *float_kernel_name[] = { "test_stream_write_float", "test_stream_write_float2", "test_stream_write_float4", "test_stream_write_float8", "test_stream_write_float16" };
313
314
315 const char *stream_write_half_kernel_code[] = {
316 "__kernel void test_stream_write_half(__global half *src, __global float *dst)\n"
317 "{\n"
318 " int tid = get_global_id(0);\n"
319 "\n"
320 " dst[tid] = vload_half( tid * 2, src );\n"
321 "}\n",
322
323 "__kernel void test_stream_write_half2(__global half2 *src, __global float2 *dst)\n"
324 "{\n"
325 " int tid = get_global_id(0);\n"
326 "\n"
327 " dst[tid] = vload_half2( tid * 2, src );\n"
328 "}\n",
329
330 "__kernel void test_stream_write_half4(__global half4 *src, __global float4 *dst)\n"
331 "{\n"
332 " int tid = get_global_id(0);\n"
333 "\n"
334 " dst[tid] = vload_half4( tid * 2, src );\n"
335 "}\n",
336
337 "__kernel void test_stream_write_half8(__global half8 *src, __global float8 *dst)\n"
338 "{\n"
339 " int tid = get_global_id(0);\n"
340 "\n"
341 " dst[tid] = vload_half8( tid * 2, src );\n"
342 "}\n",
343
344 "__kernel void test_stream_write_half16(__global half16 *src, __global float16 *dst)\n"
345 "{\n"
346 " int tid = get_global_id(0);\n"
347 "\n"
348 " dst[tid] = vload_half16( tid * 2, src );\n"
349 "}\n" };
350
351 static const char *half_kernel_name[] = { "test_stream_write_half", "test_stream_write_half2", "test_stream_write_half4", "test_stream_write_half8", "test_stream_write_half16" };
352
353
354 const char *stream_write_long_kernel_code[] = {
355 "__kernel void test_stream_write_long(__global long *src, __global long *dst)\n"
356 "{\n"
357 " int tid = get_global_id(0);\n"
358 "\n"
359 " dst[tid] = src[tid];\n"
360 "}\n",
361
362 "__kernel void test_stream_write_long2(__global long2 *src, __global long2 *dst)\n"
363 "{\n"
364 " int tid = get_global_id(0);\n"
365 "\n"
366 " dst[tid] = src[tid];\n"
367 "}\n",
368
369 "__kernel void test_stream_write_long4(__global long4 *src, __global long4 *dst)\n"
370 "{\n"
371 " int tid = get_global_id(0);\n"
372 "\n"
373 " dst[tid] = src[tid];\n"
374 "}\n",
375
376 "__kernel void test_stream_write_long8(__global long8 *src, __global long8 *dst)\n"
377 "{\n"
378 " int tid = get_global_id(0);\n"
379 "\n"
380 " dst[tid] = src[tid];\n"
381 "}\n",
382
383 "__kernel void test_stream_write_long16(__global long16 *src, __global long16 *dst)\n"
384 "{\n"
385 " int tid = get_global_id(0);\n"
386 "\n"
387 " dst[tid] = src[tid];\n"
388 "}\n" };
389
390 static const char *long_kernel_name[] = { "test_stream_write_long", "test_stream_write_long2", "test_stream_write_long4", "test_stream_write_long8", "test_stream_write_long16" };
391
392
393 const char *stream_write_ulong_kernel_code[] = {
394 "__kernel void test_stream_write_ulong(__global ulong *src, __global ulong *dst)\n"
395 "{\n"
396 " int tid = get_global_id(0);\n"
397 "\n"
398 " dst[tid] = src[tid];\n"
399 "}\n",
400
401 "__kernel void test_stream_write_ulong2(__global ulong2 *src, __global ulong2 *dst)\n"
402 "{\n"
403 " int tid = get_global_id(0);\n"
404 "\n"
405 " dst[tid] = src[tid];\n"
406 "}\n",
407
408 "__kernel void test_stream_write_ulong4(__global ulong4 *src, __global ulong4 *dst)\n"
409 "{\n"
410 " int tid = get_global_id(0);\n"
411 "\n"
412 " dst[tid] = src[tid];\n"
413 "}\n",
414
415 "__kernel void test_stream_write_ulong8(__global ulong8 *src, __global ulong8 *dst)\n"
416 "{\n"
417 " int tid = get_global_id(0);\n"
418 "\n"
419 " dst[tid] = src[tid];\n"
420 "}\n",
421
422 "__kernel void test_stream_write_ulong16(__global ulong16 *src, __global ulong16 *dst)\n"
423 "{\n"
424 " int tid = get_global_id(0);\n"
425 "\n"
426 " dst[tid] = src[tid];\n"
427 "}\n" };
428
429 static const char *ulong_kernel_name[] = { "test_stream_write_ulong", "test_stream_write_ulong2", "test_stream_write_ulong4", "test_stream_write_ulong8", "test_stream_write_ulong16" };
430
431
432 static const char *stream_write_struct_kernel_code[] = {
433 "typedef struct{\n"
434 "int a;\n"
435 "float b;\n"
436 "} TestStruct;\n"
437 "__kernel void read_write_struct(__global TestStruct *src, __global TestStruct *dst)\n"
438 "{\n"
439 " int tid = get_global_id(0);\n"
440 "\n"
441 " dst[tid].a = src[tid].a;\n"
442 " dst[tid].b = src[tid].b;\n"
443 "}\n" };
444
445 static const char *struct_kernel_name[] = { "read_write_struct" };
446
447
verify_write_int(void * ptr1,void * ptr2,int n)448 static int verify_write_int( void *ptr1, void *ptr2, int n )
449 {
450 int i;
451 int *inptr = (int *)ptr1;
452 int *outptr = (int *)ptr2;
453
454 for (i=0; i<n; i++){
455 if( outptr[i] != inptr[i] )
456 return -1;
457 }
458
459 return 0;
460 }
461
462
verify_write_uint(void * ptr1,void * ptr2,int n)463 static int verify_write_uint( void *ptr1, void *ptr2, int n )
464 {
465 int i;
466 cl_uint *inptr = (cl_uint *)ptr1;
467 cl_uint *outptr = (cl_uint *)ptr2;
468
469 for (i=0; i<n; i++){
470 if( outptr[i] != inptr[i] )
471 return -1;
472 }
473
474 return 0;
475 }
476
477
verify_write_short(void * ptr1,void * ptr2,int n)478 static int verify_write_short( void *ptr1, void *ptr2, int n )
479 {
480 int i;
481 short *inptr = (short *)ptr1;
482 short *outptr = (short *)ptr2;
483
484 for (i=0; i<n; i++){
485 if( outptr[i] != inptr[i] )
486 return -1;
487 }
488
489 return 0;
490 }
491
492
verify_write_ushort(void * ptr1,void * ptr2,int n)493 static int verify_write_ushort( void *ptr1, void *ptr2, int n )
494 {
495 int i;
496 cl_ushort *inptr = (cl_ushort *)ptr1;
497 cl_ushort *outptr = (cl_ushort *)ptr2;
498
499 for (i=0; i<n; i++){
500 if( outptr[i] != inptr[i] )
501 return -1;
502 }
503
504 return 0;
505 }
506
507
verify_write_char(void * ptr1,void * ptr2,int n)508 static int verify_write_char( void *ptr1, void *ptr2, int n )
509 {
510 int i;
511 char *inptr = (char *)ptr1;
512 char *outptr = (char *)ptr2;
513
514 for (i=0; i<n; i++){
515 if( outptr[i] != inptr[i] )
516 return -1;
517 }
518
519 return 0;
520 }
521
522
verify_write_uchar(void * ptr1,void * ptr2,int n)523 static int verify_write_uchar( void *ptr1, void *ptr2, int n )
524 {
525 int i;
526 uchar *inptr = (uchar *)ptr1;
527 uchar *outptr = (uchar *)ptr2;
528
529 for (i=0; i<n; i++){
530 if( outptr[i] != inptr[i] )
531 return -1;
532 }
533
534 return 0;
535 }
536
537
verify_write_float(void * ptr1,void * ptr2,int n)538 static int verify_write_float( void *ptr1, void *ptr2, int n )
539 {
540 int i;
541 float *inptr = (float *)ptr1;
542 float *outptr = (float *)ptr2;
543
544 for (i=0; i<n; i++){
545 if( outptr[i] != inptr[i] )
546 return -1;
547 }
548
549 return 0;
550 }
551
552
verify_write_half(void * ptr1,void * ptr2,int n)553 static int verify_write_half( void *ptr1, void *ptr2, int n )
554 {
555 int i;
556 cl_ushort *inptr = (cl_ushort *)ptr1;
557 cl_ushort *outptr = (cl_ushort *)ptr2;
558
559 for( i = 0; i < n; i++ ){
560 if( outptr[i] != inptr[i] )
561 return -1;
562 }
563
564 return 0;
565 }
566
567
verify_write_long(void * ptr1,void * ptr2,int n)568 static int verify_write_long( void *ptr1, void *ptr2, int n )
569 {
570 int i;
571 cl_long *inptr = (cl_long *)ptr1;
572 cl_long *outptr = (cl_long *)ptr2;
573
574 for (i=0; i<n; i++){
575 if( outptr[i] != inptr[i] )
576 return -1;
577 }
578
579 return 0;
580 }
581
582
verify_write_ulong(void * ptr1,void * ptr2,int n)583 static int verify_write_ulong( void *ptr1, void *ptr2, int n )
584 {
585 int i;
586 cl_ulong *inptr = (cl_ulong *)ptr1;
587 cl_ulong *outptr = (cl_ulong *)ptr2;
588
589 for (i=0; i<n; i++){
590 if( outptr[i] != inptr[i] )
591 return -1;
592 }
593
594 return 0;
595 }
596
597
verify_write_struct(void * ptr1,void * ptr2,int n)598 static int verify_write_struct( void *ptr1, void *ptr2, int n )
599 {
600 int i;
601 TestStruct *inptr = (TestStruct *)ptr1;
602 TestStruct *outptr = (TestStruct *)ptr2;
603
604 for (i=0; i<n; i++){
605 if( ( outptr[i].a != inptr[i].a ) || ( outptr[i].b != outptr[i].b ) )
606 return -1;
607 }
608
609 return 0;
610 }
611
612
test_stream_write(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,size_t size,const char * type,int loops,void * inptr[5],const char * kernelCode[],const char * kernelName[],int (* fn)(void *,void *,int),MTdata d)613 int test_stream_write( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, size_t size, const char *type, int loops,
614 void *inptr[5], const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int), MTdata d )
615 {
616 cl_mem streams[10];
617 void *outptr[5];
618 cl_program program[5];
619 cl_kernel kernel[5];
620 cl_event writeEvent;
621 cl_ulong queueStart, submitStart, writeStart, writeEnd;
622 size_t ptrSizes[5], outPtrSizes[5];
623 size_t threads[1];
624 #ifdef USE_LOCAL_THREADS
625 size_t localThreads[1];
626 #endif
627 int err, err_count = 0;
628 int i, ii;
629
630 threads[0] = (size_t)num_elements;
631
632 #ifdef USE_LOCAL_THREADS
633 err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
634 if( err != CL_SUCCESS ){
635 print_error( err, " Unable to get thread group max size" );
636 return -1;
637 }
638 if( localThreads[0] > threads[0] )
639 localThreads[0] = threads[0];
640 #endif
641
642 ptrSizes[0] = size;
643 ptrSizes[1] = ptrSizes[0] << 1;
644 ptrSizes[2] = ptrSizes[1] << 1;
645 ptrSizes[3] = ptrSizes[2] << 1;
646 ptrSizes[4] = ptrSizes[3] << 1;
647
648 loops = ( loops < 5 ? loops : 5 );
649
650 for( i = 0; i < loops; i++ )
651 {
652 outPtrSizes[i] = ptrSizes[i];
653 }
654
655 for( i = 0; i < loops; i++ ){
656 ii = i << 1;
657 streams[ii] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSizes[i] * num_elements, NULL, &err );
658 if( ! streams[ii] ){
659 free( outptr[i] );
660 log_error( " clCreateBuffer failed\n" );
661 return -1;
662 }
663 if( ! strcmp( type, "half" ) ){
664 outptr[i] = malloc( outPtrSizes[i] * num_elements * 2 );
665 streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), outPtrSizes[i] * 2 * num_elements, NULL, &err );
666 }
667 else{
668 outptr[i] = malloc( outPtrSizes[i] * num_elements );
669 streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), outPtrSizes[i] * num_elements, NULL, &err );
670 }
671 if( ! streams[ii+1] ){
672 clReleaseMemObject(streams[ii]);
673 free( outptr[i] );
674 log_error( " clCreateBuffer failed\n" );
675 return -1;
676 }
677
678 err = clEnqueueWriteBuffer( queue, streams[ii], false, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &writeEvent );
679 if( err != CL_SUCCESS ){
680 clReleaseMemObject( streams[ii] );
681 clReleaseMemObject( streams[ii+1] );
682 free( outptr[i] );
683 print_error( err, " clWriteArray failed" );
684 return -1;
685 }
686
687 // This synchronization point is needed in order to assume the data is valid.
688 // Getting profiling information is not a synchronization point.
689 err = clWaitForEvents( 1, &writeEvent );
690 if( err != CL_SUCCESS )
691 {
692 print_error( err, "Unable to wait for event completion" );
693 clReleaseEvent(writeEvent);
694 clReleaseMemObject( streams[ii] );
695 clReleaseMemObject( streams[ii+1] );
696 free( outptr[i] );
697 return -1;
698 }
699
700 // test profiling
701 while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
702 CL_PROFILING_INFO_NOT_AVAILABLE );
703 if( err != CL_SUCCESS ){
704 print_error( err, "clGetEventProfilingInfo failed" );
705 clReleaseEvent(writeEvent);
706 clReleaseMemObject( streams[ii] );
707 clReleaseMemObject( streams[ii+1] );
708 free( outptr[i] );
709 return -1;
710 }
711
712 while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
713 CL_PROFILING_INFO_NOT_AVAILABLE );
714 if( err != CL_SUCCESS ){
715 print_error( err, "clGetEventProfilingInfo failed" );
716 clReleaseEvent(writeEvent);
717 clReleaseMemObject( streams[ii] );
718 clReleaseMemObject( streams[ii+1] );
719 free( outptr[i] );
720 return -1;
721 }
722
723 err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
724 if( err != CL_SUCCESS ){
725 print_error( err, "clGetEventProfilingInfo failed" );
726 clReleaseEvent(writeEvent);
727 clReleaseMemObject( streams[ii] );
728 clReleaseMemObject( streams[ii+1] );
729 free( outptr[i] );
730 return -1;
731 }
732
733 err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
734 if( err != CL_SUCCESS ){
735 print_error( err, "clGetEventProfilingInfo failed" );
736 clReleaseEvent(writeEvent);
737 clReleaseMemObject( streams[ii] );
738 clReleaseMemObject( streams[ii+1] );
739 free( outptr[i] );
740 return -1;
741 }
742
743
744 err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
745 if( err ){
746 clReleaseEvent(writeEvent);
747 clReleaseMemObject(streams[ii]);
748 clReleaseMemObject(streams[ii+1]);
749 free( outptr[i] );
750 log_error( " Error creating program for %s\n", type );
751 return -1;
752 }
753
754 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[ii] );
755 err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&streams[ii+1] );
756 if (err != CL_SUCCESS){
757 clReleaseEvent(writeEvent);
758 clReleaseKernel( kernel[i] );
759 clReleaseProgram( program[i] );
760 clReleaseMemObject( streams[ii] );
761 clReleaseMemObject( streams[ii+1] );
762 free( outptr[i] );
763 print_error( err, " clSetKernelArg failed" );
764 return -1;
765 }
766
767 #ifdef USE_LOCAL_THREADS
768 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
769 #else
770 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
771 #endif
772 if( err != CL_SUCCESS ){
773 print_error( err, " clEnqueueNDRangeKernel failed" );
774 clReleaseEvent(writeEvent);
775 clReleaseKernel( kernel[i] );
776 clReleaseProgram( program[i] );
777 clReleaseMemObject( streams[ii] );
778 clReleaseMemObject( streams[ii+1] );
779 free( outptr[i] );
780 return -1;
781 }
782
783 if( ! strcmp( type, "half" ) ){
784 err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, outPtrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
785 }
786 else{
787 err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, outPtrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
788 }
789 if( err != CL_SUCCESS ){
790 clReleaseEvent(writeEvent);
791 clReleaseKernel( kernel[i] );
792 clReleaseProgram( program[i] );
793 clReleaseMemObject( streams[ii] );
794 clReleaseMemObject( streams[ii+1] );
795 free( outptr[i] );
796 print_error( err, " clEnqueueReadBuffer failed" );
797 return -1;
798 }
799
800 char *inP = (char *)inptr[i];
801 char *outP = (char *)outptr[i];
802 int err2 = 0;
803 for( size_t p = 0; p < (size_t)num_elements; p++ )
804 {
805 if( fn( inP, outP, (int)(ptrSizes[i] / ptrSizes[0]) ) )
806 {
807 log_error( " %s%d data failed to verify\n", type, 1<<i );
808 err2 = -1;
809 err_count++;
810 }
811 inP += ptrSizes[i];
812 outP += outPtrSizes[i];
813 }
814 if( !err2 )
815 {
816 log_info( " %s%d data verified\n", type, 1<<i );
817 }
818 err = err2;
819
820 if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
821 err_count++;
822
823 // cleanup
824 clReleaseEvent(writeEvent);
825 clReleaseKernel( kernel[i] );
826 clReleaseProgram( program[i] );
827 clReleaseMemObject( streams[ii] );
828 clReleaseMemObject( streams[ii+1] );
829 free( outptr[i] );
830 }
831
832 return err_count;
833
834 } // end test_stream_write()
835
836
837
838 /*
839 int test_stream_struct_write( cl_device_group device, cl_device id, cl_context context, int num_elements )
840 {
841 cl_mem streams[10];
842 void *outptr[5];
843 TestStruct *inptr[5];
844 cl_program program[5];
845 cl_kernel kernel[5];
846 void *values[2];
847 size_t sizes[2] = { sizeof(cl_stream), sizeof(cl_stream) };
848 size_t ptrSizes[5];
849 size_t size = sizeof( TestStruct );
850 size_t threads[1];
851 #ifdef USE_LOCAL_THREADS
852 size_t localThreads[1];
853 #endif
854 int err;
855 int i, ii, j;
856 int loops = 1; // no vector for structs
857
858 threads[0] = (size_t)num_elements;
859
860 #ifdef USE_LOCAL_THREADS
861 err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
862 if( err != CL_SUCCESS ){
863 log_error( "Unable to get thread group max size: %d", err );
864 return -1;
865 }
866 if( localThreads[0] > threads[0] )
867 localThreads[0] = threads[0];
868 #endif
869
870 ptrSizes[0] = size;
871 ptrSizes[1] = ptrSizes[0] << 1;
872 ptrSizes[2] = ptrSizes[1] << 1;
873 ptrSizes[3] = ptrSizes[2] << 1;
874 ptrSizes[4] = ptrSizes[3] << 1;
875
876
877 loops = ( loops < 5 ? loops : 5 );
878 for( i = 0; i < loops; i++ ){
879
880 inptr[i] = (TestStruct *)malloc(ptrSizes[i] * num_elements);
881
882 for( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ ){
883 inptr[i][j].a = (int)random_float( -2147483648.f, 2147483647.0f );
884 inptr[i][j].b = random_float( -FLT_MAX, FLT_MAX );
885 }
886
887 ii = i << 1;
888 streams[ii] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSizes[i] * num_elements, NULL);
889 if( ! streams[ii] ){
890 free( outptr[i] );
891 log_error( " clCreateBuffer failed\n" );
892 return -1;
893 }
894 outptr[i] = malloc( ptrSizes[i] * num_elements );
895 streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSizes[i] * num_elements, NULL);
896 if( ! streams[ii+1] ){
897 clReleaseMemObject(streams[ii]);
898 free( outptr[i] );
899 log_error( " clCreateBuffer failed\n" );
900 return -1;
901 }
902
903 err = clWriteArray(context, streams[ii], false, 0, ptrSizes[i]*num_elements, inptr[i], NULL);
904 if( err != CL_SUCCESS ){
905 clReleaseMemObject(streams[ii]);
906 clReleaseMemObject(streams[ii+1]);
907 free( outptr[i] );
908 print_error( err, " clWriteArray failed" );
909 return -1;
910 }
911
912 err = create_program_and_kernel( device, struct_kernel_code, "read_write_struct", &program[i], &kernel[i] );
913 if( err ){
914 clReleaseMemObject(streams[ii]);
915 clReleaseMemObject(streams[ii+1]);
916 free( outptr[i] );
917 log_error( " Error creating program for struct\n" );
918 return -1;
919 }
920
921 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[ii] );
922 err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&streams[ii+1] );
923 if (err != CL_SUCCESS){
924 clReleaseProgram( program[i] );
925 clReleaseKernel( kernel[i] );
926 clReleaseMemObject( streams[ii] );
927 clReleaseMemObject( streams[ii+1] );
928 free( outptr[i] );
929 print_error( err, " clSetKernelArg failed" );
930 return -1;
931 }
932
933 #ifdef USE_LOCAL_THREADS
934 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
935 #else
936 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
937 #endif
938 if( err != CL_SUCCESS ){
939 print_error( err, " clEnqueueNDRangeKernel failed" );
940 clReleaseMemObject( streams[ii] );
941 clReleaseMemObject( streams[ii+1] );
942 clReleaseKernel( kernel[i] );
943 clReleaseProgram( program[i] );
944 free( outptr[i] );
945 return -1;
946 }
947
948 err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
949 if( err != CL_SUCCESS ){
950 clReleaseMemObject( streams[ii] );
951 clReleaseMemObject( streams[ii+1] );
952 clReleaseKernel( kernel[i] );
953 clReleaseProgram( program[i] );
954 free( outptr[i] );
955 print_error( err, " clEnqueueReadBuffer failed" );
956 return -1;
957 }
958
959 if( verify_write_struct( inptr[i], outptr[i], ptrSizes[i] * num_elements / ptrSizes[0] ) ){
960 log_error( " STREAM_WRITE struct%d test failed\n", 1<<i );
961 err = -1;
962 }
963 else{
964 log_info( " STREAM_WRITE struct%d test passed\n", 1<<i );
965 err = 0;
966 }
967 // cleanup
968 clReleaseMemObject( streams[ii] );
969 clReleaseMemObject( streams[ii+1] );
970 clReleaseKernel( kernel[i] );
971 clReleaseProgram( program[i] );
972 free( outptr[i] );
973 free( (void *)inptr[i] );
974 }
975
976 return err;
977
978 } // end test_stream_struct_write()
979 */
980
test_write_array_int(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)981 int test_write_array_int( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
982 {
983 int *inptr[5];
984 size_t ptrSizes[5];
985 int i, j, err;
986 int (*foo)(void *,void *,int);
987 MTdata d = init_genrand( gRandomSeed );
988 foo = verify_write_int;
989
990 ptrSizes[0] = sizeof(cl_int);
991 ptrSizes[1] = ptrSizes[0] << 1;
992 ptrSizes[2] = ptrSizes[1] << 1;
993 ptrSizes[3] = ptrSizes[2] << 1;
994 ptrSizes[4] = ptrSizes[3] << 1;
995
996 for( i = 0; i < 5; i++ ){
997 inptr[i] = (int *)malloc(ptrSizes[i] * num_elements);
998
999 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1000 inptr[i][j] = genrand_int32(d);
1001 }
1002
1003 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_int ), "int", 5, (void**)inptr,
1004 stream_write_int_kernel_code, int_kernel_name, foo, d );
1005
1006 for( i = 0; i < 5; i++ ){
1007 free( (void *)inptr[i] );
1008 }
1009
1010 free_mtdata(d);
1011
1012 return err;
1013
1014 } // end write_int_array()
1015
1016
test_write_array_uint(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1017 int test_write_array_uint( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1018 {
1019 cl_uint *inptr[5];
1020 size_t ptrSizes[5];
1021 int i, j, err;
1022 int (*foo)(void *,void *,int);
1023 MTdata d = init_genrand( gRandomSeed );
1024 foo = verify_write_uint;
1025
1026 ptrSizes[0] = sizeof(cl_uint);
1027 ptrSizes[1] = ptrSizes[0] << 1;
1028 ptrSizes[2] = ptrSizes[1] << 1;
1029 ptrSizes[3] = ptrSizes[2] << 1;
1030 ptrSizes[4] = ptrSizes[3] << 1;
1031
1032 for( i = 0; i < 5; i++ ){
1033 inptr[i] = (cl_uint *)malloc(ptrSizes[i] * num_elements);
1034
1035 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1036 inptr[i][j] = genrand_int32(d);
1037 }
1038
1039 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_uint ), "uint", 5, (void **)inptr,
1040 stream_write_uint_kernel_code, uint_kernel_name, foo, d );
1041
1042 for( i = 0; i < 5; i++ ){
1043 free( (void *)inptr[i] );
1044 }
1045
1046 free_mtdata(d);
1047 return err;
1048
1049 } // end write_uint_array()
1050
1051
test_write_array_short(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1052 int test_write_array_short( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1053 {
1054 short *inptr[5];
1055 size_t ptrSizes[5];
1056 int i, j, err;
1057 int (*foo)(void *,void *,int);
1058 MTdata d = init_genrand( gRandomSeed );
1059 foo = verify_write_short;
1060
1061 ptrSizes[0] = sizeof(cl_short);
1062 ptrSizes[1] = ptrSizes[0] << 1;
1063 ptrSizes[2] = ptrSizes[1] << 1;
1064 ptrSizes[3] = ptrSizes[2] << 1;
1065 ptrSizes[4] = ptrSizes[3] << 1;
1066
1067 for( i = 0; i < 5; i++ ){
1068 inptr[i] = (short *)malloc(ptrSizes[i] * num_elements);
1069
1070 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1071 inptr[i][j] = (short)genrand_int32(d);
1072 }
1073
1074 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_short ), "short", 5, (void **)inptr,
1075 stream_write_short_kernel_code, short_kernel_name, foo, d );
1076
1077 for( i = 0; i < 5; i++ ){
1078 free( (void *)inptr[i] );
1079 }
1080
1081 free_mtdata(d);
1082 return err;
1083
1084 } // end write_short_array()
1085
1086
test_write_array_ushort(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1087 int test_write_array_ushort( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1088 {
1089 cl_ushort *inptr[5];
1090 size_t ptrSizes[5];
1091 int i, j, err;
1092 int (*foo)(void *,void *,int);
1093 MTdata d = init_genrand( gRandomSeed );
1094 foo = verify_write_ushort;
1095
1096 ptrSizes[0] = sizeof(cl_ushort);
1097 ptrSizes[1] = ptrSizes[0] << 1;
1098 ptrSizes[2] = ptrSizes[1] << 1;
1099 ptrSizes[3] = ptrSizes[2] << 1;
1100 ptrSizes[4] = ptrSizes[3] << 1;
1101
1102 for( i = 0; i < 5; i++ ){
1103 inptr[i] = (cl_ushort *)malloc(ptrSizes[i] * num_elements);
1104
1105 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1106 inptr[i][j] = (cl_ushort)genrand_int32(d);
1107 }
1108
1109 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_ushort ), "ushort", 5, (void **)inptr,
1110 stream_write_ushort_kernel_code, ushort_kernel_name, foo, d );
1111
1112 for( i = 0; i < 5; i++ ){
1113 free( (void *)inptr[i] );
1114 }
1115
1116 free_mtdata(d);
1117 return err;
1118
1119 } // end write_ushort_array()
1120
1121
test_write_array_char(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1122 int test_write_array_char( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1123 {
1124 char *inptr[5];
1125 size_t ptrSizes[5];
1126 int i, j, err;
1127 int (*foo)(void *,void *,int);
1128 MTdata d = init_genrand( gRandomSeed );
1129 foo = verify_write_char;
1130
1131 ptrSizes[0] = sizeof(cl_char);
1132 ptrSizes[1] = ptrSizes[0] << 1;
1133 ptrSizes[2] = ptrSizes[1] << 1;
1134 ptrSizes[3] = ptrSizes[2] << 1;
1135 ptrSizes[4] = ptrSizes[3] << 1;
1136
1137 for( i = 0; i < 5; i++ ){
1138 inptr[i] = (char *)malloc(ptrSizes[i] * num_elements);
1139
1140 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1141 inptr[i][j] = (char)genrand_int32(d);
1142 }
1143
1144 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_char ), "char", 5, (void **)inptr,
1145 stream_write_char_kernel_code, char_kernel_name, foo, d );
1146
1147 for( i = 0; i < 5; i++ ){
1148 free( (void *)inptr[i] );
1149 }
1150
1151 free_mtdata(d);
1152 return err;
1153
1154 } // end write_char_array()
1155
1156
test_write_array_uchar(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1157 int test_write_array_uchar( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1158 {
1159 uchar *inptr[5];
1160 size_t ptrSizes[5];
1161 int i, j, err;
1162 int (*foo)(void *,void *,int);
1163 MTdata d = init_genrand( gRandomSeed );
1164 foo = verify_write_uchar;
1165
1166 ptrSizes[0] = sizeof(cl_uchar);
1167 ptrSizes[1] = ptrSizes[0] << 1;
1168 ptrSizes[2] = ptrSizes[1] << 1;
1169 ptrSizes[3] = ptrSizes[2] << 1;
1170 ptrSizes[4] = ptrSizes[3] << 1;
1171
1172 for( i = 0; i < 5; i++ ){
1173 inptr[i] = (uchar *)malloc(ptrSizes[i] * num_elements);
1174
1175 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1176 inptr[i][j] = (uchar)genrand_int32(d);
1177 }
1178
1179 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_uchar ), "uchar", 5, (void **)inptr,
1180 stream_write_uchar_kernel_code, uchar_kernel_name, foo, d );
1181
1182 for( i = 0; i < 5; i++ ){
1183 free( (void *)inptr[i] );
1184 }
1185
1186 free_mtdata(d);
1187 return err;
1188
1189 } // end write_uchar_array()
1190
1191
test_write_array_float(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1192 int test_write_array_float( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1193 {
1194 float *inptr[5];
1195 size_t ptrSizes[5];
1196 int i, j, err;
1197 int (*foo)(void *,void *,int);
1198 MTdata d = init_genrand( gRandomSeed );
1199 foo = verify_write_float;
1200
1201 ptrSizes[0] = sizeof(cl_float);
1202 ptrSizes[1] = ptrSizes[0] << 1;
1203 ptrSizes[2] = ptrSizes[1] << 1;
1204 ptrSizes[3] = ptrSizes[2] << 1;
1205 ptrSizes[4] = ptrSizes[3] << 1;
1206
1207 for( i = 0; i < 5; i++ ){
1208 inptr[i] = (float *)malloc(ptrSizes[i] * num_elements);
1209
1210 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1211 inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
1212 }
1213
1214 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_float ), "float", 5, (void **)inptr,
1215 stream_write_float_kernel_code, float_kernel_name, foo, d );
1216
1217 for( i = 0; i < 5; i++ ){
1218 free( (void *)inptr[i] );
1219 }
1220
1221 free_mtdata(d);
1222 return err;
1223
1224 } // end write_float_array()
1225
1226
test_write_array_half(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1227 int test_write_array_half( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1228 {
1229 float *inptr[5];
1230 size_t ptrSizes[5];
1231 int i, j, err;
1232 int (*foo)(void *,void *,int);
1233 MTdata d = init_genrand( gRandomSeed );
1234 foo = verify_write_half;
1235
1236 ptrSizes[0] = sizeof( cl_half );
1237 ptrSizes[1] = ptrSizes[0] << 1;
1238 ptrSizes[2] = ptrSizes[1] << 1;
1239 ptrSizes[3] = ptrSizes[2] << 1;
1240 ptrSizes[4] = ptrSizes[3] << 1;
1241
1242 for( i = 0; i < 5; i++ ){
1243 inptr[i] = (float *)malloc(ptrSizes[i] * num_elements);
1244
1245 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ( ptrSizes[0] * 2 ); j++ )
1246 inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
1247 }
1248
1249 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_half ), "half", 5, (void **)inptr,
1250 stream_write_half_kernel_code, half_kernel_name, foo, d );
1251
1252 for( i = 0; i < 5; i++ ){
1253 free( (void *)inptr[i] );
1254 }
1255
1256 free_mtdata(d);
1257 return err;
1258
1259 } // end write_half_array()
1260
1261
test_write_array_long(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1262 int test_write_array_long( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1263 {
1264 cl_long *inptr[5];
1265 size_t ptrSizes[5];
1266 int i, j, err;
1267 int (*foo)(void *,void *,int);
1268 MTdata d = init_genrand( gRandomSeed );
1269 foo = verify_write_long;
1270
1271 if (!gHasLong)
1272 {
1273 log_info("write_long_array: Long types unsupported, skipping.");
1274 return CL_SUCCESS;
1275 }
1276
1277 ptrSizes[0] = sizeof(cl_long);
1278 ptrSizes[1] = ptrSizes[0] << 1;
1279 ptrSizes[2] = ptrSizes[1] << 1;
1280 ptrSizes[3] = ptrSizes[2] << 1;
1281 ptrSizes[4] = ptrSizes[3] << 1;
1282
1283 for( i = 0; i < 5; i++ ){
1284 inptr[i] = (cl_long *)malloc(ptrSizes[i] * num_elements);
1285
1286 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1287 inptr[i][j] = (cl_long) genrand_int32(d) ^ ((cl_long) genrand_int32(d) << 32);
1288 }
1289
1290 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_long ), "cl_long", 5, (void **)inptr,
1291 stream_write_long_kernel_code, long_kernel_name, foo, d );
1292
1293 for( i = 0; i < 5; i++ ){
1294 free( (void *)inptr[i] );
1295 }
1296
1297 free_mtdata(d);
1298 return err;
1299
1300 } // end write_long_array()
1301
1302
test_write_array_ulong(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1303 int test_write_array_ulong( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1304 {
1305 cl_ulong *inptr[5];
1306 size_t ptrSizes[5];
1307 int i, j, err;
1308 int (*foo)(void *,void *,int);
1309 MTdata d = init_genrand( gRandomSeed );
1310 foo = verify_write_ulong;
1311
1312 if (!gHasLong)
1313 {
1314 log_info("write_long_array: Long types unsupported, skipping.");
1315 return CL_SUCCESS;
1316 }
1317
1318 ptrSizes[0] = sizeof(cl_ulong);
1319 ptrSizes[1] = ptrSizes[0] << 1;
1320 ptrSizes[2] = ptrSizes[1] << 1;
1321 ptrSizes[3] = ptrSizes[2] << 1;
1322 ptrSizes[4] = ptrSizes[3] << 1;
1323
1324 for( i = 0; i < 5; i++ ){
1325 inptr[i] = (cl_ulong *)malloc(ptrSizes[i] * num_elements);
1326
1327 for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1328 inptr[i][j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
1329 }
1330
1331 err = test_stream_write( device, context, queue, num_elements, sizeof( cl_ulong ), "ulong long", 5, (void **)inptr,
1332 stream_write_ulong_kernel_code, ulong_kernel_name, foo, d );
1333
1334 for( i = 0; i < 5; i++ ){
1335 free( (void *)inptr[i] );
1336 }
1337
1338 free_mtdata(d);
1339 return err;
1340
1341 } // end write_ulong_array()
1342
1343
test_write_array_struct(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1344 int test_write_array_struct( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1345 {
1346 TestStruct *inptr[1];
1347 size_t ptrSizes[1];
1348 int j, err;
1349 int (*foo)(void *,void *,int);
1350 MTdata d = init_genrand( gRandomSeed );
1351 foo = verify_write_struct;
1352
1353 ptrSizes[0] = sizeof( TestStruct );
1354
1355 inptr[0] = (TestStruct *)malloc( ptrSizes[0] * num_elements );
1356
1357 for( j = 0; (unsigned int)j < ptrSizes[0] * num_elements / ptrSizes[0]; j++ ){
1358 inptr[0][j].a = (int)genrand_int32(d);
1359 inptr[0][j].b = get_random_float( 0.f, 1.844674407370954e+19f, d );
1360 }
1361
1362 err = test_stream_write( device, context, queue, num_elements, sizeof( TestStruct ), "struct", 1, (void **)inptr,
1363 stream_write_struct_kernel_code, struct_kernel_name, foo, d );
1364
1365 free( (void *)inptr[0] );
1366
1367 free_mtdata(d);
1368 return err;
1369
1370 } // end write_struct_array()
1371
1372
1373
1374
1375
1376