• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 #include "harness/testHarness.h"
25 #include "harness/errorHelpers.h"
26 #include "harness/conversions.h"
27 
28 //#define USE_LOCAL_THREADS    1
29 
30 #ifndef uchar
31 typedef unsigned char uchar;
32 #endif
33 
34 #ifndef TestStruct
35 typedef struct{
36     int        a;
37     float    b;
38 } TestStruct;
39 #endif
40 
41 const char *stream_write_int_kernel_code[] = {
42     "__kernel void test_stream_write_int(__global int *src, __global int *dst)\n"
43     "{\n"
44     "    int  tid = get_global_id(0);\n"
45     "\n"
46     "    dst[tid] = src[tid];\n"
47     "}\n",
48 
49     "__kernel void test_stream_write_int2(__global int2 *src, __global int2 *dst)\n"
50     "{\n"
51     "    int  tid = get_global_id(0);\n"
52     "\n"
53     "    dst[tid] = src[tid];\n"
54     "}\n",
55 
56     "__kernel void test_stream_write_int4(__global int4 *src, __global int4 *dst)\n"
57     "{\n"
58     "    int  tid = get_global_id(0);\n"
59     "\n"
60     "    dst[tid] = src[tid];\n"
61     "}\n",
62 
63     "__kernel void test_stream_write_int8(__global int8 *src, __global int8 *dst)\n"
64     "{\n"
65     "    int  tid = get_global_id(0);\n"
66     "\n"
67     "    dst[tid] = src[tid];\n"
68     "}\n",
69 
70     "__kernel void test_stream_write_int16(__global int16 *src, __global int16 *dst)\n"
71     "{\n"
72     "    int  tid = get_global_id(0);\n"
73     "\n"
74     "    dst[tid] = src[tid];\n"
75     "}\n" };
76 
77 static const char *int_kernel_name[] = { "test_stream_write_int", "test_stream_write_int2", "test_stream_write_int4", "test_stream_write_int8", "test_stream_write_int16" };
78 
79 
80 const char *stream_write_uint_kernel_code[] = {
81     "__kernel void test_stream_write_uint(__global uint *src, __global uint *dst)\n"
82     "{\n"
83     "    int  tid = get_global_id(0);\n"
84     "\n"
85     "    dst[tid] = src[tid];\n"
86     "}\n",
87 
88     "__kernel void test_stream_write_uint2(__global uint2 *src, __global uint2 *dst)\n"
89     "{\n"
90     "    int  tid = get_global_id(0);\n"
91     "\n"
92     "    dst[tid] = src[tid];\n"
93     "}\n",
94 
95     "__kernel void test_stream_write_uint4(__global uint4 *src, __global uint4 *dst)\n"
96     "{\n"
97     "    int  tid = get_global_id(0);\n"
98     "\n"
99     "    dst[tid] = src[tid];\n"
100     "}\n",
101 
102     "__kernel void test_stream_write_uint8(__global uint8 *src, __global uint8 *dst)\n"
103     "{\n"
104     "    int  tid = get_global_id(0);\n"
105     "\n"
106     "    dst[tid] = src[tid];\n"
107     "}\n",
108 
109     "__kernel void test_stream_write_uint16(__global uint16 *src, __global uint16 *dst)\n"
110     "{\n"
111     "    int  tid = get_global_id(0);\n"
112     "\n"
113     "    dst[tid] = src[tid];\n"
114     "}\n" };
115 
116 static const char *uint_kernel_name[] = { "test_stream_write_uint", "test_stream_write_uint2", "test_stream_write_uint4", "test_stream_write_uint8", "test_stream_write_uint16" };
117 
118 
119 const char *stream_write_ushort_kernel_code[] = {
120     "__kernel void test_stream_write_ushort(__global ushort *src, __global ushort *dst)\n"
121     "{\n"
122     "    int  tid = get_global_id(0);\n"
123     "\n"
124     "    dst[tid] = src[tid];\n"
125     "}\n",
126 
127     "__kernel void test_stream_write_ushort2(__global ushort2 *src, __global ushort2 *dst)\n"
128     "{\n"
129     "    int  tid = get_global_id(0);\n"
130     "\n"
131     "    dst[tid] = src[tid];\n"
132     "}\n",
133 
134     "__kernel void test_stream_write_ushort4(__global ushort4 *src, __global ushort4 *dst)\n"
135     "{\n"
136     "    int  tid = get_global_id(0);\n"
137     "\n"
138     "    dst[tid] = src[tid];\n"
139     "}\n",
140 
141     "__kernel void test_stream_write_ushort8(__global ushort8 *src, __global ushort8 *dst)\n"
142     "{\n"
143     "    int  tid = get_global_id(0);\n"
144     "\n"
145     "    dst[tid] = src[tid];\n"
146     "}\n",
147 
148     "__kernel void test_stream_write_ushort16(__global ushort16 *src, __global ushort16 *dst)\n"
149     "{\n"
150     "    int  tid = get_global_id(0);\n"
151     "\n"
152     "    dst[tid] = src[tid];\n"
153     "}\n" };
154 
155 static const char *ushort_kernel_name[] = { "test_stream_write_ushort", "test_stream_write_ushort2", "test_stream_write_ushort4", "test_stream_write_ushort8", "test_stream_write_ushort16" };
156 
157 
158 
159 const char *stream_write_short_kernel_code[] = {
160     "__kernel void test_stream_write_short(__global short *src, __global short *dst)\n"
161     "{\n"
162     "    int  tid = get_global_id(0);\n"
163     "\n"
164     "    dst[tid] = src[tid];\n"
165     "}\n",
166 
167     "__kernel void test_stream_write_short2(__global short2 *src, __global short2 *dst)\n"
168     "{\n"
169     "    int  tid = get_global_id(0);\n"
170     "\n"
171     "    dst[tid] = src[tid];\n"
172     "}\n",
173 
174     "__kernel void test_stream_write_short4(__global short4 *src, __global short4 *dst)\n"
175     "{\n"
176     "    int  tid = get_global_id(0);\n"
177     "\n"
178     "    dst[tid] = src[tid];\n"
179     "}\n",
180 
181     "__kernel void test_stream_write_short8(__global short8 *src, __global short8 *dst)\n"
182     "{\n"
183     "    int  tid = get_global_id(0);\n"
184     "\n"
185     "    dst[tid] = src[tid];\n"
186     "}\n",
187 
188     "__kernel void test_stream_write_short16(__global short16 *src, __global short16 *dst)\n"
189     "{\n"
190     "    int  tid = get_global_id(0);\n"
191     "\n"
192     "    dst[tid] = src[tid];\n"
193     "}\n" };
194 
195 static const char *short_kernel_name[] = { "test_stream_write_short", "test_stream_write_short2", "test_stream_write_short4", "test_stream_write_short8", "test_stream_write_short16" };
196 
197 
198 const char *stream_write_char_kernel_code[] = {
199     "__kernel void test_stream_write_char(__global char *src, __global char *dst)\n"
200     "{\n"
201     "    int  tid = get_global_id(0);\n"
202     "\n"
203     "    dst[tid] = src[tid];\n"
204     "}\n",
205 
206     "__kernel void test_stream_write_char2(__global char2 *src, __global char2 *dst)\n"
207     "{\n"
208     "    int  tid = get_global_id(0);\n"
209     "\n"
210     "    dst[tid] = src[tid];\n"
211     "}\n",
212 
213     "__kernel void test_stream_write_char4(__global char4 *src, __global char4 *dst)\n"
214     "{\n"
215     "    int  tid = get_global_id(0);\n"
216     "\n"
217     "    dst[tid] = src[tid];\n"
218     "}\n",
219 
220     "__kernel void test_stream_write_char8(__global char8 *src, __global char8 *dst)\n"
221     "{\n"
222     "    int  tid = get_global_id(0);\n"
223     "\n"
224     "    dst[tid] = src[tid];\n"
225     "}\n",
226 
227     "__kernel void test_stream_write_char16(__global char16 *src, __global char16 *dst)\n"
228     "{\n"
229     "    int  tid = get_global_id(0);\n"
230     "\n"
231     "    dst[tid] = src[tid];\n"
232     "}\n" };
233 
234 static const char *char_kernel_name[] = { "test_stream_write_char", "test_stream_write_char2", "test_stream_write_char4", "test_stream_write_char8", "test_stream_write_char16" };
235 
236 
237 const char *stream_write_uchar_kernel_code[] = {
238     "__kernel void test_stream_write_uchar(__global uchar *src, __global uchar *dst)\n"
239     "{\n"
240     "    int  tid = get_global_id(0);\n"
241     "\n"
242     "    dst[tid] = src[tid];\n"
243     "}\n",
244 
245     "__kernel void test_stream_write_uchar2(__global uchar2 *src, __global uchar2 *dst)\n"
246     "{\n"
247     "    int  tid = get_global_id(0);\n"
248     "\n"
249     "    dst[tid] = src[tid];\n"
250     "}\n",
251 
252     "__kernel void test_stream_write_uchar4(__global uchar4 *src, __global uchar4 *dst)\n"
253     "{\n"
254     "    int  tid = get_global_id(0);\n"
255     "\n"
256     "    dst[tid] = src[tid];\n"
257     "}\n",
258 
259     "__kernel void test_stream_write_uchar8(__global uchar8 *src, __global uchar8 *dst)\n"
260     "{\n"
261     "    int  tid = get_global_id(0);\n"
262     "\n"
263     "    dst[tid] = src[tid];\n"
264     "}\n",
265 
266     "__kernel void test_stream_write_uchar16(__global uchar16 *src, __global uchar16 *dst)\n"
267     "{\n"
268     "    int  tid = get_global_id(0);\n"
269     "\n"
270     "    dst[tid] = src[tid];\n"
271     "}\n" };
272 
273 static const char *uchar_kernel_name[] = { "test_stream_write_uchar", "test_stream_write_uchar2", "test_stream_write_uchar4", "test_stream_write_uchar8", "test_stream_write_uchar16" };
274 
275 
276 const char *stream_write_float_kernel_code[] = {
277     "__kernel void test_stream_write_float(__global float *src, __global float *dst)\n"
278     "{\n"
279     "    int  tid = get_global_id(0);\n"
280     "\n"
281     "    dst[tid] = src[tid];\n"
282     "}\n",
283 
284     "__kernel void test_stream_write_float2(__global float2 *src, __global float2 *dst)\n"
285     "{\n"
286     "    int  tid = get_global_id(0);\n"
287     "\n"
288     "    dst[tid] = src[tid];\n"
289     "}\n",
290 
291     "__kernel void test_stream_write_float4(__global float4 *src, __global float4 *dst)\n"
292     "{\n"
293     "    int  tid = get_global_id(0);\n"
294     "\n"
295     "    dst[tid] = src[tid];\n"
296     "}\n",
297 
298     "__kernel void test_stream_write_float8(__global float8 *src, __global float8 *dst)\n"
299     "{\n"
300     "    int  tid = get_global_id(0);\n"
301     "\n"
302     "    dst[tid] = src[tid];\n"
303     "}\n",
304 
305     "__kernel void test_stream_write_float16(__global float16 *src, __global float16 *dst)\n"
306     "{\n"
307     "    int  tid = get_global_id(0);\n"
308     "\n"
309     "    dst[tid] = src[tid];\n"
310     "}\n" };
311 
312 static const char *float_kernel_name[] = { "test_stream_write_float", "test_stream_write_float2", "test_stream_write_float4", "test_stream_write_float8", "test_stream_write_float16" };
313 
314 
315 const char *stream_write_half_kernel_code[] = {
316     "__kernel void test_stream_write_half(__global half *src, __global float *dst)\n"
317     "{\n"
318     "    int  tid = get_global_id(0);\n"
319     "\n"
320     "    dst[tid] = vload_half( tid * 2, src );\n"
321     "}\n",
322 
323     "__kernel void test_stream_write_half2(__global half2 *src, __global float2 *dst)\n"
324     "{\n"
325     "    int  tid = get_global_id(0);\n"
326     "\n"
327     "    dst[tid] = vload_half2( tid * 2, src );\n"
328     "}\n",
329 
330     "__kernel void test_stream_write_half4(__global half4 *src, __global float4 *dst)\n"
331     "{\n"
332     "    int  tid = get_global_id(0);\n"
333     "\n"
334     "    dst[tid] = vload_half4( tid * 2, src );\n"
335     "}\n",
336 
337     "__kernel void test_stream_write_half8(__global half8 *src, __global float8 *dst)\n"
338     "{\n"
339     "    int  tid = get_global_id(0);\n"
340     "\n"
341     "    dst[tid] = vload_half8( tid * 2, src );\n"
342     "}\n",
343 
344     "__kernel void test_stream_write_half16(__global half16 *src, __global float16 *dst)\n"
345     "{\n"
346     "    int  tid = get_global_id(0);\n"
347     "\n"
348     "    dst[tid] = vload_half16( tid * 2, src );\n"
349     "}\n" };
350 
351 static const char *half_kernel_name[] = { "test_stream_write_half", "test_stream_write_half2", "test_stream_write_half4", "test_stream_write_half8", "test_stream_write_half16" };
352 
353 
354 const char *stream_write_long_kernel_code[] = {
355     "__kernel void test_stream_write_long(__global long *src, __global long *dst)\n"
356     "{\n"
357     "    int  tid = get_global_id(0);\n"
358     "\n"
359     "    dst[tid] = src[tid];\n"
360     "}\n",
361 
362     "__kernel void test_stream_write_long2(__global long2 *src, __global long2 *dst)\n"
363     "{\n"
364     "    int  tid = get_global_id(0);\n"
365     "\n"
366     "    dst[tid] = src[tid];\n"
367     "}\n",
368 
369     "__kernel void test_stream_write_long4(__global long4 *src, __global long4 *dst)\n"
370     "{\n"
371     "    int  tid = get_global_id(0);\n"
372     "\n"
373     "    dst[tid] = src[tid];\n"
374     "}\n",
375 
376     "__kernel void test_stream_write_long8(__global long8 *src, __global long8 *dst)\n"
377     "{\n"
378     "    int  tid = get_global_id(0);\n"
379     "\n"
380     "    dst[tid] = src[tid];\n"
381     "}\n",
382 
383     "__kernel void test_stream_write_long16(__global long16 *src, __global long16 *dst)\n"
384     "{\n"
385     "    int  tid = get_global_id(0);\n"
386     "\n"
387     "    dst[tid] = src[tid];\n"
388     "}\n" };
389 
390 static const char *long_kernel_name[] = { "test_stream_write_long", "test_stream_write_long2", "test_stream_write_long4", "test_stream_write_long8", "test_stream_write_long16" };
391 
392 
393 const char *stream_write_ulong_kernel_code[] = {
394     "__kernel void test_stream_write_ulong(__global ulong *src, __global ulong *dst)\n"
395     "{\n"
396     "    int  tid = get_global_id(0);\n"
397     "\n"
398     "    dst[tid] = src[tid];\n"
399     "}\n",
400 
401     "__kernel void test_stream_write_ulong2(__global ulong2 *src, __global ulong2 *dst)\n"
402     "{\n"
403     "    int  tid = get_global_id(0);\n"
404     "\n"
405     "    dst[tid] = src[tid];\n"
406     "}\n",
407 
408     "__kernel void test_stream_write_ulong4(__global ulong4 *src, __global ulong4 *dst)\n"
409     "{\n"
410     "    int  tid = get_global_id(0);\n"
411     "\n"
412     "    dst[tid] = src[tid];\n"
413     "}\n",
414 
415     "__kernel void test_stream_write_ulong8(__global ulong8 *src, __global ulong8 *dst)\n"
416     "{\n"
417     "    int  tid = get_global_id(0);\n"
418     "\n"
419     "    dst[tid] = src[tid];\n"
420     "}\n",
421 
422     "__kernel void test_stream_write_ulong16(__global ulong16 *src, __global ulong16 *dst)\n"
423     "{\n"
424     "    int  tid = get_global_id(0);\n"
425     "\n"
426     "    dst[tid] = src[tid];\n"
427     "}\n" };
428 
429 static const char *ulong_kernel_name[] = { "test_stream_write_ulong", "test_stream_write_ulong2", "test_stream_write_ulong4", "test_stream_write_ulong8", "test_stream_write_ulong16" };
430 
431 
432 static const char *stream_write_struct_kernel_code[] = {
433     "typedef struct{\n"
434     "int    a;\n"
435     "float    b;\n"
436     "} TestStruct;\n"
437     "__kernel void read_write_struct(__global TestStruct *src, __global TestStruct *dst)\n"
438     "{\n"
439     "    int  tid = get_global_id(0);\n"
440     "\n"
441     "    dst[tid].a = src[tid].a;\n"
442     "     dst[tid].b = src[tid].b;\n"
443     "}\n" };
444 
445 static const char *struct_kernel_name[] = { "read_write_struct" };
446 
447 
verify_write_int(void * ptr1,void * ptr2,int n)448 static int verify_write_int( void *ptr1, void *ptr2, int n )
449 {
450     int        i;
451     int        *inptr = (int *)ptr1;
452     int        *outptr = (int *)ptr2;
453 
454     for (i=0; i<n; i++){
455         if( outptr[i] != inptr[i] )
456             return -1;
457     }
458 
459     return 0;
460 }
461 
462 
verify_write_uint(void * ptr1,void * ptr2,int n)463 static int verify_write_uint( void *ptr1, void *ptr2, int n )
464 {
465     int        i;
466     cl_uint    *inptr = (cl_uint *)ptr1;
467     cl_uint    *outptr = (cl_uint *)ptr2;
468 
469     for (i=0; i<n; i++){
470         if( outptr[i] != inptr[i] )
471             return -1;
472     }
473 
474     return 0;
475 }
476 
477 
verify_write_short(void * ptr1,void * ptr2,int n)478 static int verify_write_short( void *ptr1, void *ptr2, int n )
479 {
480     int        i;
481     short    *inptr = (short *)ptr1;
482     short    *outptr = (short *)ptr2;
483 
484     for (i=0; i<n; i++){
485         if( outptr[i] != inptr[i] )
486             return -1;
487     }
488 
489     return 0;
490 }
491 
492 
verify_write_ushort(void * ptr1,void * ptr2,int n)493 static int verify_write_ushort( void *ptr1, void *ptr2, int n )
494 {
495     int        i;
496     cl_ushort    *inptr = (cl_ushort *)ptr1;
497     cl_ushort    *outptr = (cl_ushort *)ptr2;
498 
499     for (i=0; i<n; i++){
500         if( outptr[i] != inptr[i] )
501             return -1;
502     }
503 
504     return 0;
505 }
506 
507 
verify_write_char(void * ptr1,void * ptr2,int n)508 static int verify_write_char( void *ptr1, void *ptr2, int n )
509 {
510     int        i;
511     char    *inptr = (char *)ptr1;
512     char    *outptr = (char *)ptr2;
513 
514     for (i=0; i<n; i++){
515         if( outptr[i] != inptr[i] )
516             return -1;
517     }
518 
519     return 0;
520 }
521 
522 
verify_write_uchar(void * ptr1,void * ptr2,int n)523 static int verify_write_uchar( void *ptr1, void *ptr2, int n )
524 {
525     int        i;
526     uchar    *inptr = (uchar *)ptr1;
527     uchar    *outptr = (uchar *)ptr2;
528 
529     for (i=0; i<n; i++){
530         if( outptr[i] != inptr[i] )
531             return -1;
532     }
533 
534     return 0;
535 }
536 
537 
verify_write_float(void * ptr1,void * ptr2,int n)538 static int verify_write_float( void *ptr1, void *ptr2, int n )
539 {
540     int        i;
541     float    *inptr = (float *)ptr1;
542     float    *outptr = (float *)ptr2;
543 
544     for (i=0; i<n; i++){
545         if( outptr[i] != inptr[i] )
546             return -1;
547     }
548 
549     return 0;
550 }
551 
552 
verify_write_half(void * ptr1,void * ptr2,int n)553 static int verify_write_half( void *ptr1, void *ptr2, int n )
554 {
555     int        i;
556     cl_ushort    *inptr = (cl_ushort *)ptr1;
557     cl_ushort    *outptr = (cl_ushort *)ptr2;
558 
559     for( i = 0; i < n; i++ ){
560         if( outptr[i] != inptr[i] )
561             return -1;
562     }
563 
564     return 0;
565 }
566 
567 
verify_write_long(void * ptr1,void * ptr2,int n)568 static int verify_write_long( void *ptr1, void *ptr2, int n )
569 {
570     int        i;
571     cl_long    *inptr = (cl_long *)ptr1;
572     cl_long    *outptr = (cl_long *)ptr2;
573 
574     for (i=0; i<n; i++){
575         if( outptr[i] != inptr[i] )
576             return -1;
577     }
578 
579     return 0;
580 }
581 
582 
verify_write_ulong(void * ptr1,void * ptr2,int n)583 static int verify_write_ulong( void *ptr1, void *ptr2, int n )
584 {
585     int        i;
586     cl_ulong    *inptr = (cl_ulong *)ptr1;
587     cl_ulong    *outptr = (cl_ulong *)ptr2;
588 
589     for (i=0; i<n; i++){
590         if( outptr[i] != inptr[i] )
591             return -1;
592     }
593 
594     return 0;
595 }
596 
597 
verify_write_struct(void * ptr1,void * ptr2,int n)598 static int verify_write_struct( void *ptr1, void *ptr2, int n )
599 {
600     int            i;
601     TestStruct    *inptr = (TestStruct *)ptr1;
602     TestStruct    *outptr = (TestStruct *)ptr2;
603 
604     for (i=0; i<n; i++){
605         if( ( outptr[i].a != inptr[i].a ) || ( outptr[i].b != outptr[i].b ) )
606             return -1;
607     }
608 
609     return 0;
610 }
611 
612 
test_stream_write(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,size_t size,const char * type,int loops,void * inptr[5],const char * kernelCode[],const char * kernelName[],int (* fn)(void *,void *,int),MTdata d)613 int test_stream_write( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, size_t size, const char *type, int loops,
614                       void *inptr[5], const char *kernelCode[], const char *kernelName[], int (*fn)(void *,void *,int), MTdata d )
615 {
616     cl_mem            streams[10];
617     void            *outptr[5];
618     cl_program        program[5];
619     cl_kernel        kernel[5];
620     cl_event        writeEvent;
621     cl_ulong    queueStart, submitStart, writeStart, writeEnd;
622     size_t            ptrSizes[5], outPtrSizes[5];
623     size_t            threads[1];
624 #ifdef USE_LOCAL_THREADS
625     size_t            localThreads[1];
626 #endif
627     int                err, err_count = 0;
628     int                i, ii;
629 
630     threads[0] = (size_t)num_elements;
631 
632 #ifdef USE_LOCAL_THREADS
633     err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
634     if( err != CL_SUCCESS ){
635         print_error( err, " Unable to get thread group max size" );
636         return -1;
637     }
638     if( localThreads[0] > threads[0] )
639         localThreads[0] = threads[0];
640 #endif
641 
642     ptrSizes[0] = size;
643     ptrSizes[1] = ptrSizes[0] << 1;
644     ptrSizes[2] = ptrSizes[1] << 1;
645     ptrSizes[3] = ptrSizes[2] << 1;
646     ptrSizes[4] = ptrSizes[3] << 1;
647 
648     loops = ( loops < 5 ? loops : 5 );
649 
650     for( i = 0; i < loops; i++ )
651     {
652         outPtrSizes[i] = ptrSizes[i];
653     }
654 
655     for( i = 0; i < loops; i++ ){
656         ii = i << 1;
657         streams[ii] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL, &err );
658         if( ! streams[ii] ){
659             free( outptr[i] );
660             log_error( " clCreateBuffer failed\n" );
661             return -1;
662         }
663         if( ! strcmp( type, "half" ) ){
664             outptr[i] = malloc( outPtrSizes[i] * num_elements * 2 );
665             streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  outPtrSizes[i] * 2 * num_elements, NULL, &err );
666         }
667         else{
668             outptr[i] = malloc( outPtrSizes[i] * num_elements );
669             streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  outPtrSizes[i] * num_elements, NULL, &err );
670         }
671         if( ! streams[ii+1] ){
672             clReleaseMemObject(streams[ii]);
673             free( outptr[i] );
674             log_error( " clCreateBuffer failed\n" );
675             return -1;
676         }
677 
678         err = clEnqueueWriteBuffer( queue, streams[ii], false, 0, ptrSizes[i]*num_elements, inptr[i], 0, NULL, &writeEvent );
679         if( err != CL_SUCCESS ){
680             clReleaseMemObject( streams[ii] );
681             clReleaseMemObject( streams[ii+1] );
682             free( outptr[i] );
683             print_error( err, " clWriteArray failed" );
684             return -1;
685         }
686 
687         // This synchronization point is needed in order to assume the data is valid.
688         // Getting profiling information is not a synchronization point.
689         err = clWaitForEvents( 1, &writeEvent );
690         if( err != CL_SUCCESS )
691         {
692             print_error( err, "Unable to wait for event completion" );
693             clReleaseEvent(writeEvent);
694             clReleaseMemObject( streams[ii] );
695             clReleaseMemObject( streams[ii+1] );
696             free( outptr[i] );
697             return -1;
698         }
699 
700         // test profiling
701         while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
702               CL_PROFILING_INFO_NOT_AVAILABLE );
703         if( err != CL_SUCCESS ){
704             print_error( err, "clGetEventProfilingInfo failed" );
705             clReleaseEvent(writeEvent);
706             clReleaseMemObject( streams[ii] );
707             clReleaseMemObject( streams[ii+1] );
708             free( outptr[i] );
709             return -1;
710         }
711 
712         while( ( err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
713               CL_PROFILING_INFO_NOT_AVAILABLE );
714         if( err != CL_SUCCESS ){
715             print_error( err, "clGetEventProfilingInfo failed" );
716             clReleaseEvent(writeEvent);
717             clReleaseMemObject( streams[ii] );
718             clReleaseMemObject( streams[ii+1] );
719             free( outptr[i] );
720             return -1;
721         }
722 
723         err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
724         if( err != CL_SUCCESS ){
725             print_error( err, "clGetEventProfilingInfo failed" );
726             clReleaseEvent(writeEvent);
727             clReleaseMemObject( streams[ii] );
728             clReleaseMemObject( streams[ii+1] );
729             free( outptr[i] );
730             return -1;
731         }
732 
733         err = clGetEventProfilingInfo( writeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
734         if( err != CL_SUCCESS ){
735             print_error( err, "clGetEventProfilingInfo failed" );
736             clReleaseEvent(writeEvent);
737             clReleaseMemObject( streams[ii] );
738             clReleaseMemObject( streams[ii+1] );
739             free( outptr[i] );
740             return -1;
741         }
742 
743 
744         err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
745         if( err ){
746             clReleaseEvent(writeEvent);
747             clReleaseMemObject(streams[ii]);
748             clReleaseMemObject(streams[ii+1]);
749             free( outptr[i] );
750             log_error( " Error creating program for %s\n", type );
751             return -1;
752         }
753 
754         err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[ii] );
755         err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&streams[ii+1] );
756         if (err != CL_SUCCESS){
757             clReleaseEvent(writeEvent);
758             clReleaseKernel( kernel[i] );
759             clReleaseProgram( program[i] );
760             clReleaseMemObject( streams[ii] );
761             clReleaseMemObject( streams[ii+1] );
762             free( outptr[i] );
763             print_error( err, " clSetKernelArg failed" );
764             return -1;
765         }
766 
767 #ifdef USE_LOCAL_THREADS
768         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
769 #else
770         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
771 #endif
772         if( err != CL_SUCCESS ){
773             print_error( err, " clEnqueueNDRangeKernel failed" );
774             clReleaseEvent(writeEvent);
775             clReleaseKernel( kernel[i] );
776             clReleaseProgram( program[i] );
777             clReleaseMemObject( streams[ii] );
778             clReleaseMemObject( streams[ii+1] );
779             free( outptr[i] );
780             return -1;
781         }
782 
783         if( ! strcmp( type, "half" ) ){
784             err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, outPtrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
785         }
786         else{
787             err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, outPtrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
788         }
789         if( err != CL_SUCCESS ){
790             clReleaseEvent(writeEvent);
791             clReleaseKernel( kernel[i] );
792             clReleaseProgram( program[i] );
793             clReleaseMemObject( streams[ii] );
794             clReleaseMemObject( streams[ii+1] );
795             free( outptr[i] );
796             print_error( err, " clEnqueueReadBuffer failed" );
797             return -1;
798         }
799 
800         char *inP = (char *)inptr[i];
801         char *outP = (char *)outptr[i];
802         int err2 = 0;
803         for( size_t p = 0; p < (size_t)num_elements; p++ )
804         {
805             if( fn( inP, outP, (int)(ptrSizes[i] / ptrSizes[0]) ) )
806             {
807                 log_error( " %s%d data failed to verify\n", type, 1<<i );
808                 err2 = -1;
809                 err_count++;
810             }
811             inP += ptrSizes[i];
812             outP += outPtrSizes[i];
813         }
814         if( !err2 )
815         {
816             log_info( " %s%d data verified\n", type, 1<<i );
817         }
818         err = err2;
819 
820         if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
821             err_count++;
822 
823         // cleanup
824         clReleaseEvent(writeEvent);
825         clReleaseKernel( kernel[i] );
826         clReleaseProgram( program[i] );
827         clReleaseMemObject( streams[ii] );
828         clReleaseMemObject( streams[ii+1] );
829         free( outptr[i] );
830     }
831 
832     return err_count;
833 
834 }    // end test_stream_write()
835 
836 
837 
838 /*
839  int test_stream_struct_write( cl_device_group device, cl_device id, cl_context context, int num_elements )
840  {
841  cl_mem            streams[10];
842  void            *outptr[5];
843  TestStruct        *inptr[5];
844  cl_program        program[5];
845  cl_kernel        kernel[5];
846  void            *values[2];
847  size_t            sizes[2] = { sizeof(cl_stream), sizeof(cl_stream) };
848  size_t            ptrSizes[5];
849  size_t            size = sizeof( TestStruct );
850  size_t            threads[1];
851  #ifdef USE_LOCAL_THREADS
852  size_t            localThreads[1];
853  #endif
854  int                err;
855  int                i, ii, j;
856  int                loops = 1;        // no vector for structs
857 
858  threads[0] = (size_t)num_elements;
859 
860  #ifdef USE_LOCAL_THREADS
861  err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
862  if( err != CL_SUCCESS ){
863  log_error( "Unable to get thread group max size: %d", err );
864  return -1;
865  }
866  if( localThreads[0] > threads[0] )
867  localThreads[0] = threads[0];
868  #endif
869 
870  ptrSizes[0] = size;
871  ptrSizes[1] = ptrSizes[0] << 1;
872  ptrSizes[2] = ptrSizes[1] << 1;
873  ptrSizes[3] = ptrSizes[2] << 1;
874  ptrSizes[4] = ptrSizes[3] << 1;
875 
876 
877  loops = ( loops < 5 ? loops : 5 );
878  for( i = 0; i < loops; i++ ){
879 
880  inptr[i] = (TestStruct *)malloc(ptrSizes[i] * num_elements);
881 
882  for( j = 0; j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ ){
883  inptr[i][j].a = (int)random_float( -2147483648.f, 2147483647.0f );
884  inptr[i][j].b = random_float( -FLT_MAX, FLT_MAX );
885  }
886 
887  ii = i << 1;
888  streams[ii] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL);
889  if( ! streams[ii] ){
890  free( outptr[i] );
891  log_error( " clCreateBuffer failed\n" );
892  return -1;
893  }
894  outptr[i] = malloc( ptrSizes[i] * num_elements );
895  streams[ii+1] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL);
896  if( ! streams[ii+1] ){
897  clReleaseMemObject(streams[ii]);
898  free( outptr[i] );
899  log_error( " clCreateBuffer failed\n" );
900  return -1;
901  }
902 
903  err = clWriteArray(context, streams[ii], false, 0, ptrSizes[i]*num_elements, inptr[i], NULL);
904  if( err != CL_SUCCESS ){
905  clReleaseMemObject(streams[ii]);
906  clReleaseMemObject(streams[ii+1]);
907  free( outptr[i] );
908  print_error( err, " clWriteArray failed" );
909  return -1;
910  }
911 
912  err = create_program_and_kernel( device, struct_kernel_code, "read_write_struct", &program[i], &kernel[i] );
913  if( err ){
914  clReleaseMemObject(streams[ii]);
915  clReleaseMemObject(streams[ii+1]);
916  free( outptr[i] );
917  log_error( " Error creating program for struct\n" );
918  return -1;
919  }
920 
921  err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[ii] );
922  err |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), (void *)&streams[ii+1] );
923  if (err != CL_SUCCESS){
924  clReleaseProgram( program[i] );
925  clReleaseKernel( kernel[i] );
926  clReleaseMemObject( streams[ii] );
927  clReleaseMemObject( streams[ii+1] );
928  free( outptr[i] );
929  print_error( err, " clSetKernelArg failed" );
930  return -1;
931  }
932 
933  #ifdef USE_LOCAL_THREADS
934  err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
935  #else
936  err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
937  #endif
938  if( err != CL_SUCCESS ){
939  print_error( err, " clEnqueueNDRangeKernel failed" );
940  clReleaseMemObject( streams[ii] );
941  clReleaseMemObject( streams[ii+1] );
942  clReleaseKernel( kernel[i] );
943  clReleaseProgram( program[i] );
944  free( outptr[i] );
945  return -1;
946  }
947 
948  err = clEnqueueReadBuffer( queue, streams[ii+1], true, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, NULL );
949  if( err != CL_SUCCESS ){
950  clReleaseMemObject( streams[ii] );
951  clReleaseMemObject( streams[ii+1] );
952  clReleaseKernel( kernel[i] );
953  clReleaseProgram( program[i] );
954  free( outptr[i] );
955  print_error( err, " clEnqueueReadBuffer failed" );
956  return -1;
957  }
958 
959  if( verify_write_struct( inptr[i], outptr[i], ptrSizes[i] * num_elements / ptrSizes[0] ) ){
960  log_error( " STREAM_WRITE struct%d test failed\n", 1<<i );
961  err = -1;
962  }
963  else{
964  log_info( " STREAM_WRITE struct%d test passed\n", 1<<i );
965  err = 0;
966  }
967  // cleanup
968  clReleaseMemObject( streams[ii] );
969  clReleaseMemObject( streams[ii+1] );
970  clReleaseKernel( kernel[i] );
971  clReleaseProgram( program[i] );
972  free( outptr[i] );
973  free( (void *)inptr[i] );
974  }
975 
976  return err;
977 
978  }    // end test_stream_struct_write()
979  */
980 
test_write_array_int(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)981 int test_write_array_int( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
982 {
983     int    *inptr[5];
984     size_t    ptrSizes[5];
985     int        i, j, err;
986     int    (*foo)(void *,void *,int);
987     MTdata d = init_genrand( gRandomSeed );
988     foo = verify_write_int;
989 
990     ptrSizes[0] = sizeof(cl_int);
991     ptrSizes[1] = ptrSizes[0] << 1;
992     ptrSizes[2] = ptrSizes[1] << 1;
993     ptrSizes[3] = ptrSizes[2] << 1;
994     ptrSizes[4] = ptrSizes[3] << 1;
995 
996     for( i = 0; i < 5; i++ ){
997         inptr[i] = (int *)malloc(ptrSizes[i] * num_elements);
998 
999         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1000             inptr[i][j] = genrand_int32(d);
1001     }
1002 
1003     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_int ), "int", 5, (void**)inptr,
1004                             stream_write_int_kernel_code, int_kernel_name, foo, d );
1005 
1006     for( i = 0; i < 5; i++ ){
1007         free( (void *)inptr[i] );
1008     }
1009 
1010     free_mtdata(d);
1011 
1012     return err;
1013 
1014 }    // end write_int_array()
1015 
1016 
test_write_array_uint(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1017 int test_write_array_uint( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1018 {
1019     cl_uint    *inptr[5];
1020     size_t    ptrSizes[5];
1021     int        i, j, err;
1022     int    (*foo)(void *,void *,int);
1023     MTdata d = init_genrand( gRandomSeed );
1024     foo = verify_write_uint;
1025 
1026     ptrSizes[0] = sizeof(cl_uint);
1027     ptrSizes[1] = ptrSizes[0] << 1;
1028     ptrSizes[2] = ptrSizes[1] << 1;
1029     ptrSizes[3] = ptrSizes[2] << 1;
1030     ptrSizes[4] = ptrSizes[3] << 1;
1031 
1032     for( i = 0; i < 5; i++ ){
1033         inptr[i] = (cl_uint *)malloc(ptrSizes[i] * num_elements);
1034 
1035         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1036             inptr[i][j] = genrand_int32(d);
1037     }
1038 
1039     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_uint ), "uint", 5, (void **)inptr,
1040                             stream_write_uint_kernel_code, uint_kernel_name, foo, d );
1041 
1042     for( i = 0; i < 5; i++ ){
1043         free( (void *)inptr[i] );
1044     }
1045 
1046     free_mtdata(d);
1047     return err;
1048 
1049 }    // end write_uint_array()
1050 
1051 
test_write_array_short(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1052 int test_write_array_short( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1053 {
1054     short    *inptr[5];
1055     size_t    ptrSizes[5];
1056     int        i, j, err;
1057     int    (*foo)(void *,void *,int);
1058     MTdata d = init_genrand( gRandomSeed );
1059     foo = verify_write_short;
1060 
1061     ptrSizes[0] = sizeof(cl_short);
1062     ptrSizes[1] = ptrSizes[0] << 1;
1063     ptrSizes[2] = ptrSizes[1] << 1;
1064     ptrSizes[3] = ptrSizes[2] << 1;
1065     ptrSizes[4] = ptrSizes[3] << 1;
1066 
1067     for( i = 0; i < 5; i++ ){
1068         inptr[i] = (short *)malloc(ptrSizes[i] * num_elements);
1069 
1070         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1071             inptr[i][j] = (short)genrand_int32(d);
1072     }
1073 
1074     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_short ), "short", 5, (void **)inptr,
1075                             stream_write_short_kernel_code, short_kernel_name, foo, d );
1076 
1077     for( i = 0; i < 5; i++ ){
1078         free( (void *)inptr[i] );
1079     }
1080 
1081     free_mtdata(d);
1082     return err;
1083 
1084 }    // end write_short_array()
1085 
1086 
test_write_array_ushort(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1087 int test_write_array_ushort( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1088 {
1089     cl_ushort    *inptr[5];
1090     size_t    ptrSizes[5];
1091     int        i, j, err;
1092     int    (*foo)(void *,void *,int);
1093     MTdata d = init_genrand( gRandomSeed );
1094     foo = verify_write_ushort;
1095 
1096     ptrSizes[0] = sizeof(cl_ushort);
1097     ptrSizes[1] = ptrSizes[0] << 1;
1098     ptrSizes[2] = ptrSizes[1] << 1;
1099     ptrSizes[3] = ptrSizes[2] << 1;
1100     ptrSizes[4] = ptrSizes[3] << 1;
1101 
1102     for( i = 0; i < 5; i++ ){
1103         inptr[i] = (cl_ushort *)malloc(ptrSizes[i] * num_elements);
1104 
1105         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1106             inptr[i][j] = (cl_ushort)genrand_int32(d);
1107     }
1108 
1109     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_ushort ), "ushort", 5, (void **)inptr,
1110                             stream_write_ushort_kernel_code, ushort_kernel_name, foo, d );
1111 
1112     for( i = 0; i < 5; i++ ){
1113         free( (void *)inptr[i] );
1114     }
1115 
1116     free_mtdata(d);
1117     return err;
1118 
1119 }    // end write_ushort_array()
1120 
1121 
test_write_array_char(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1122 int test_write_array_char( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1123 {
1124     char    *inptr[5];
1125     size_t    ptrSizes[5];
1126     int        i, j, err;
1127     int    (*foo)(void *,void *,int);
1128     MTdata d = init_genrand( gRandomSeed );
1129     foo = verify_write_char;
1130 
1131     ptrSizes[0] = sizeof(cl_char);
1132     ptrSizes[1] = ptrSizes[0] << 1;
1133     ptrSizes[2] = ptrSizes[1] << 1;
1134     ptrSizes[3] = ptrSizes[2] << 1;
1135     ptrSizes[4] = ptrSizes[3] << 1;
1136 
1137     for( i = 0; i < 5; i++ ){
1138         inptr[i] = (char *)malloc(ptrSizes[i] * num_elements);
1139 
1140         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1141             inptr[i][j] = (char)genrand_int32(d);
1142     }
1143 
1144     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_char ), "char", 5, (void **)inptr,
1145                             stream_write_char_kernel_code, char_kernel_name, foo, d );
1146 
1147     for( i = 0; i < 5; i++ ){
1148         free( (void *)inptr[i] );
1149     }
1150 
1151     free_mtdata(d);
1152     return err;
1153 
1154 }    // end write_char_array()
1155 
1156 
test_write_array_uchar(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1157 int test_write_array_uchar( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1158 {
1159     uchar    *inptr[5];
1160     size_t    ptrSizes[5];
1161     int        i, j, err;
1162     int    (*foo)(void *,void *,int);
1163     MTdata d = init_genrand( gRandomSeed );
1164     foo = verify_write_uchar;
1165 
1166     ptrSizes[0] = sizeof(cl_uchar);
1167     ptrSizes[1] = ptrSizes[0] << 1;
1168     ptrSizes[2] = ptrSizes[1] << 1;
1169     ptrSizes[3] = ptrSizes[2] << 1;
1170     ptrSizes[4] = ptrSizes[3] << 1;
1171 
1172     for( i = 0; i < 5; i++ ){
1173         inptr[i] = (uchar *)malloc(ptrSizes[i] * num_elements);
1174 
1175         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1176             inptr[i][j] = (uchar)genrand_int32(d);
1177     }
1178 
1179     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_uchar ), "uchar", 5, (void **)inptr,
1180                             stream_write_uchar_kernel_code, uchar_kernel_name, foo, d );
1181 
1182     for( i = 0; i < 5; i++ ){
1183         free( (void *)inptr[i] );
1184     }
1185 
1186     free_mtdata(d);
1187     return err;
1188 
1189 }    // end write_uchar_array()
1190 
1191 
test_write_array_float(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1192 int test_write_array_float( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1193 {
1194     float    *inptr[5];
1195     size_t    ptrSizes[5];
1196     int        i, j, err;
1197     int    (*foo)(void *,void *,int);
1198     MTdata d = init_genrand( gRandomSeed );
1199     foo = verify_write_float;
1200 
1201     ptrSizes[0] = sizeof(cl_float);
1202     ptrSizes[1] = ptrSizes[0] << 1;
1203     ptrSizes[2] = ptrSizes[1] << 1;
1204     ptrSizes[3] = ptrSizes[2] << 1;
1205     ptrSizes[4] = ptrSizes[3] << 1;
1206 
1207     for( i = 0; i < 5; i++ ){
1208         inptr[i] = (float *)malloc(ptrSizes[i] * num_elements);
1209 
1210         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1211             inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
1212     }
1213 
1214     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_float ), "float", 5, (void **)inptr,
1215                             stream_write_float_kernel_code, float_kernel_name, foo, d );
1216 
1217     for( i = 0; i < 5; i++ ){
1218         free( (void *)inptr[i] );
1219     }
1220 
1221     free_mtdata(d);
1222     return err;
1223 
1224 }    // end write_float_array()
1225 
1226 
test_write_array_half(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1227 int test_write_array_half( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1228 {
1229     float    *inptr[5];
1230     size_t    ptrSizes[5];
1231     int        i, j, err;
1232     int    (*foo)(void *,void *,int);
1233     MTdata d = init_genrand( gRandomSeed );
1234     foo = verify_write_half;
1235 
1236     ptrSizes[0] = sizeof( cl_half );
1237     ptrSizes[1] = ptrSizes[0] << 1;
1238     ptrSizes[2] = ptrSizes[1] << 1;
1239     ptrSizes[3] = ptrSizes[2] << 1;
1240     ptrSizes[4] = ptrSizes[3] << 1;
1241 
1242     for( i = 0; i < 5; i++ ){
1243         inptr[i] = (float *)malloc(ptrSizes[i] * num_elements);
1244 
1245         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ( ptrSizes[0] * 2 ); j++ )
1246             inptr[i][j] = get_random_float( -FLT_MAX, FLT_MAX, d );
1247     }
1248 
1249     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_half ), "half", 5, (void **)inptr,
1250                             stream_write_half_kernel_code, half_kernel_name, foo, d );
1251 
1252     for( i = 0; i < 5; i++ ){
1253         free( (void *)inptr[i] );
1254     }
1255 
1256     free_mtdata(d);
1257     return err;
1258 
1259 }    // end write_half_array()
1260 
1261 
test_write_array_long(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1262 int test_write_array_long( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1263 {
1264     cl_long    *inptr[5];
1265     size_t        ptrSizes[5];
1266     int            i, j, err;
1267     int    (*foo)(void *,void *,int);
1268     MTdata d = init_genrand( gRandomSeed );
1269     foo = verify_write_long;
1270 
1271     if (!gHasLong)
1272     {
1273         log_info("write_long_array: Long types unsupported, skipping.");
1274         return CL_SUCCESS;
1275     }
1276 
1277     ptrSizes[0] = sizeof(cl_long);
1278     ptrSizes[1] = ptrSizes[0] << 1;
1279     ptrSizes[2] = ptrSizes[1] << 1;
1280     ptrSizes[3] = ptrSizes[2] << 1;
1281     ptrSizes[4] = ptrSizes[3] << 1;
1282 
1283     for( i = 0; i < 5; i++ ){
1284         inptr[i] = (cl_long *)malloc(ptrSizes[i] * num_elements);
1285 
1286         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1287             inptr[i][j] = (cl_long) genrand_int32(d) ^ ((cl_long) genrand_int32(d) << 32);
1288     }
1289 
1290     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_long ), "cl_long", 5, (void **)inptr,
1291                             stream_write_long_kernel_code, long_kernel_name, foo, d );
1292 
1293     for( i = 0; i < 5; i++ ){
1294         free( (void *)inptr[i] );
1295     }
1296 
1297     free_mtdata(d);
1298     return err;
1299 
1300 }    // end write_long_array()
1301 
1302 
test_write_array_ulong(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1303 int test_write_array_ulong( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1304 {
1305     cl_ulong    *inptr[5];
1306     size_t                ptrSizes[5];
1307     int                    i, j, err;
1308     int    (*foo)(void *,void *,int);
1309     MTdata d = init_genrand( gRandomSeed );
1310     foo = verify_write_ulong;
1311 
1312     if (!gHasLong)
1313     {
1314         log_info("write_long_array: Long types unsupported, skipping.");
1315         return CL_SUCCESS;
1316     }
1317 
1318     ptrSizes[0] = sizeof(cl_ulong);
1319     ptrSizes[1] = ptrSizes[0] << 1;
1320     ptrSizes[2] = ptrSizes[1] << 1;
1321     ptrSizes[3] = ptrSizes[2] << 1;
1322     ptrSizes[4] = ptrSizes[3] << 1;
1323 
1324     for( i = 0; i < 5; i++ ){
1325         inptr[i] = (cl_ulong *)malloc(ptrSizes[i] * num_elements);
1326 
1327         for( j = 0; (unsigned int)j < ptrSizes[i] * num_elements / ptrSizes[0]; j++ )
1328             inptr[i][j] = (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
1329     }
1330 
1331     err = test_stream_write( device, context, queue, num_elements, sizeof( cl_ulong ), "ulong long", 5, (void **)inptr,
1332                             stream_write_ulong_kernel_code, ulong_kernel_name, foo, d );
1333 
1334     for( i = 0; i < 5; i++ ){
1335         free( (void *)inptr[i] );
1336     }
1337 
1338     free_mtdata(d);
1339     return err;
1340 
1341 }    // end write_ulong_array()
1342 
1343 
test_write_array_struct(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1344 int test_write_array_struct( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
1345 {
1346     TestStruct            *inptr[1];
1347     size_t                ptrSizes[1];
1348     int                    j, err;
1349     int    (*foo)(void *,void *,int);
1350     MTdata d = init_genrand( gRandomSeed );
1351     foo = verify_write_struct;
1352 
1353     ptrSizes[0] = sizeof( TestStruct );
1354 
1355     inptr[0] = (TestStruct *)malloc( ptrSizes[0] * num_elements );
1356 
1357     for( j = 0; (unsigned int)j < ptrSizes[0] * num_elements / ptrSizes[0]; j++ ){
1358         inptr[0][j].a = (int)genrand_int32(d);
1359         inptr[0][j].b = get_random_float( 0.f, 1.844674407370954e+19f, d );
1360     }
1361 
1362     err = test_stream_write( device, context, queue, num_elements, sizeof( TestStruct ), "struct", 1, (void **)inptr,
1363                             stream_write_struct_kernel_code, struct_kernel_name, foo, d );
1364 
1365     free( (void *)inptr[0] );
1366 
1367     free_mtdata(d);
1368     return err;
1369 
1370 }    // end write_struct_array()
1371 
1372 
1373 
1374 
1375 
1376