• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <time.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include "procs.h"
25 #include "harness/testHarness.h"
26 
27 #define TEST_PRIME_INT        ((1<<16)+1)
28 #define TEST_PRIME_UINT        ((1U<<16)+1U)
29 #define TEST_PRIME_LONG        ((1LL<<32)+1LL)
30 #define TEST_PRIME_ULONG    ((1ULL<<32)+1ULL)
31 #define TEST_PRIME_SHORT    ((1S<<8)+1S)
32 #define TEST_PRIME_FLOAT    (float)3.40282346638528860e+38
33 #define TEST_PRIME_HALF        119.f
34 #define TEST_BOOL            true
35 #define TEST_PRIME_CHAR        0x77
36 
37 
38 #ifndef ulong
39 typedef unsigned long ulong;
40 #endif
41 
42 #ifndef uchar
43 typedef unsigned char uchar;
44 #endif
45 
46 #ifndef TestStruct
47 typedef struct{
48     int        a;
49     float    b;
50 } TestStruct;
51 #endif
52 
53 
54 
55 //--- the code for the kernel executables
56 static const char *stream_read_int_kernel_code[] = {
57 "__kernel void test_stream_read_int(__global int *dst)\n"
58 "{\n"
59 "    int  tid = get_global_id(0);\n"
60 "\n"
61 "    dst[tid] = ((1<<16)+1);\n"
62 "}\n",
63 
64 "__kernel void test_stream_read_int2(__global int2 *dst)\n"
65 "{\n"
66 "    int  tid = get_global_id(0);\n"
67 "\n"
68 "    dst[tid] = ((1<<16)+1);\n"
69 "}\n",
70 
71 "__kernel void test_stream_read_int4(__global int4 *dst)\n"
72 "{\n"
73 "    int  tid = get_global_id(0);\n"
74 "\n"
75 "    dst[tid] = ((1<<16)+1);\n"
76 "}\n",
77 
78 "__kernel void test_stream_read_int8(__global int8 *dst)\n"
79 "{\n"
80 "    int  tid = get_global_id(0);\n"
81 "\n"
82 "    dst[tid] = ((1<<16)+1);\n"
83 "}\n",
84 
85 "__kernel void test_stream_read_int16(__global int16 *dst)\n"
86 "{\n"
87 "    int  tid = get_global_id(0);\n"
88 "\n"
89 "    dst[tid] = ((1<<16)+1);\n"
90 "}\n" };
91 
92 static const char *int_kernel_name[] = { "test_stream_read_int", "test_stream_read_int2", "test_stream_read_int4", "test_stream_read_int8", "test_stream_read_int16" };
93 
94 const char *stream_read_uint_kernel_code[] = {
95 "__kernel void test_stream_read_uint(__global uint *dst)\n"
96 "{\n"
97 "    int  tid = get_global_id(0);\n"
98 "\n"
99 "    dst[tid] = ((1U<<16)+1U);\n"
100 "}\n",
101 
102 "__kernel void test_stream_read_uint2(__global uint2 *dst)\n"
103 "{\n"
104 "    int  tid = get_global_id(0);\n"
105 "\n"
106 "    dst[tid] = ((1U<<16)+1U);\n"
107 "}\n",
108 
109 "__kernel void test_stream_read_uint4(__global uint4 *dst)\n"
110 "{\n"
111 "    int  tid = get_global_id(0);\n"
112 "\n"
113 "    dst[tid] = ((1U<<16)+1U);\n"
114 "}\n",
115 
116 "__kernel void test_stream_read_uint8(__global uint8 *dst)\n"
117 "{\n"
118 "    int  tid = get_global_id(0);\n"
119 "\n"
120 "    dst[tid] = ((1U<<16)+1U);\n"
121 "}\n",
122 
123 "__kernel void test_stream_read_uint16(__global uint16 *dst)\n"
124 "{\n"
125 "    int  tid = get_global_id(0);\n"
126 "\n"
127 "    dst[tid] = ((1U<<16)+1U);\n"
128 "}\n" };
129 
130 const char *uint_kernel_name[] = { "test_stream_read_uint", "test_stream_read_uint2", "test_stream_read_uint4", "test_stream_read_uint8", "test_stream_read_uint16" };
131 
132 const char *stream_read_long_kernel_code[] = {
133 "__kernel void test_stream_read_long(__global long *dst)\n"
134 "{\n"
135 "    int  tid = get_global_id(0);\n"
136 "\n"
137 "    dst[tid] = ((1L<<32)+1L);\n"
138 "}\n",
139 
140 "__kernel void test_stream_read_long2(__global long2 *dst)\n"
141 "{\n"
142 "    int  tid = get_global_id(0);\n"
143 "\n"
144 "    dst[tid] = ((1L<<32)+1L);\n"
145 "}\n",
146 
147 "__kernel void test_stream_read_long4(__global long4 *dst)\n"
148 "{\n"
149 "    int  tid = get_global_id(0);\n"
150 "\n"
151 "    dst[tid] = ((1L<<32)+1L);\n"
152 "}\n",
153 
154 "__kernel void test_stream_read_long8(__global long8 *dst)\n"
155 "{\n"
156 "    int  tid = get_global_id(0);\n"
157 "\n"
158 "    dst[tid] = ((1L<<32)+1L);\n"
159 "}\n",
160 
161 "__kernel void test_stream_read_long16(__global long16 *dst)\n"
162 "{\n"
163 "    int  tid = get_global_id(0);\n"
164 "\n"
165 "    dst[tid] = ((1L<<32)+1L);\n"
166 "}\n" };
167 
168 const char *long_kernel_name[] = { "test_stream_read_long", "test_stream_read_long2", "test_stream_read_long4", "test_stream_read_long8", "test_stream_read_long16" };
169 
170 const char *stream_read_ulong_kernel_code[] = {
171 "__kernel void test_stream_read_ulong(__global ulong *dst)\n"
172 "{\n"
173 "    int  tid = get_global_id(0);\n"
174 "\n"
175 "    dst[tid] = ((1UL<<32)+1UL);\n"
176 "}\n",
177 
178 "__kernel void test_stream_read_ulong2(__global ulong2 *dst)\n"
179 "{\n"
180 "    int  tid = get_global_id(0);\n"
181 "\n"
182 "    dst[tid] = ((1UL<<32)+1UL);\n"
183 "}\n",
184 
185 "__kernel void test_stream_read_ulong4(__global ulong4 *dst)\n"
186 "{\n"
187 "    int  tid = get_global_id(0);\n"
188 "\n"
189 "    dst[tid] = ((1UL<<32)+1UL);\n"
190 "}\n",
191 
192 "__kernel void test_stream_read_ulong8(__global ulong8 *dst)\n"
193 "{\n"
194 "    int  tid = get_global_id(0);\n"
195 "\n"
196 "    dst[tid] = ((1UL<<32)+1UL);\n"
197 "}\n",
198 
199 "__kernel void test_stream_read_ulong16(__global ulong16 *dst)\n"
200 "{\n"
201 "    int  tid = get_global_id(0);\n"
202 "\n"
203 "    dst[tid] = ((1UL<<32)+1UL);\n"
204 "}\n" };
205 
206 const char *ulong_kernel_name[] = { "test_stream_read_ulong", "test_stream_read_ulong2", "test_stream_read_ulong4", "test_stream_read_ulong8", "test_stream_read_ulong16" };
207 
208 const char *stream_read_short_kernel_code[] = {
209 "__kernel void test_stream_read_short(__global short *dst)\n"
210 "{\n"
211 "    int  tid = get_global_id(0);\n"
212 "\n"
213 "    dst[tid] = (short)((1<<8)+1);\n"
214 "}\n",
215 
216 "__kernel void test_stream_read_short2(__global short2 *dst)\n"
217 "{\n"
218 "    int  tid = get_global_id(0);\n"
219 "\n"
220 "    dst[tid] = (short)((1<<8)+1);\n"
221 "}\n",
222 
223 "__kernel void test_stream_read_short4(__global short4 *dst)\n"
224 "{\n"
225 "    int  tid = get_global_id(0);\n"
226 "\n"
227 "    dst[tid] = (short)((1<<8)+1);\n"
228 "}\n",
229 
230 "__kernel void test_stream_read_short8(__global short8 *dst)\n"
231 "{\n"
232 "    int  tid = get_global_id(0);\n"
233 "\n"
234 "    dst[tid] = (short)((1<<8)+1);\n"
235 "}\n",
236 
237 "__kernel void test_stream_read_short16(__global short16 *dst)\n"
238 "{\n"
239 "    int  tid = get_global_id(0);\n"
240 "\n"
241 "    dst[tid] = (short)((1<<8)+1);\n"
242 "}\n" };
243 
244 const char *short_kernel_name[] = { "test_stream_read_short", "test_stream_read_short2", "test_stream_read_short4", "test_stream_read_short8", "test_stream_read_short16" };
245 
246 
247 const char *stream_read_ushort_kernel_code[] = {
248 "__kernel void test_stream_read_ushort(__global ushort *dst)\n"
249 "{\n"
250 "    int  tid = get_global_id(0);\n"
251 "\n"
252 "    dst[tid] = (ushort)((1<<8)+1);\n"
253 "}\n",
254 
255 "__kernel void test_stream_read_ushort2(__global ushort2 *dst)\n"
256 "{\n"
257 "    int  tid = get_global_id(0);\n"
258 "\n"
259 "    dst[tid] = (ushort)((1<<8)+1);\n"
260 "}\n",
261 
262 "__kernel void test_stream_read_ushort4(__global ushort4 *dst)\n"
263 "{\n"
264 "    int  tid = get_global_id(0);\n"
265 "\n"
266 "    dst[tid] = (ushort)((1<<8)+1);\n"
267 "}\n",
268 
269 "__kernel void test_stream_read_ushort8(__global ushort8 *dst)\n"
270 "{\n"
271 "    int  tid = get_global_id(0);\n"
272 "\n"
273 "    dst[tid] = (ushort)((1<<8)+1);\n"
274 "}\n",
275 
276 "__kernel void test_stream_read_ushort16(__global ushort16 *dst)\n"
277 "{\n"
278 "    int  tid = get_global_id(0);\n"
279 "\n"
280 "    dst[tid] = (ushort)((1<<8)+1);\n"
281 "}\n" };
282 
283 static const char *ushort_kernel_name[] = { "test_stream_read_ushort", "test_stream_read_ushort2", "test_stream_read_ushort4", "test_stream_read_ushort8", "test_stream_read_ushort16" };
284 
285 
286 const char *stream_read_float_kernel_code[] = {
287 "__kernel void test_stream_read_float(__global float *dst)\n"
288 "{\n"
289 "    int  tid = get_global_id(0);\n"
290 "\n"
291 "    dst[tid] = (float)3.40282346638528860e+38;\n"
292 "}\n",
293 
294 "__kernel void test_stream_read_float2(__global float2 *dst)\n"
295 "{\n"
296 "    int  tid = get_global_id(0);\n"
297 "\n"
298 "    dst[tid] = (float)3.40282346638528860e+38;\n"
299 "}\n",
300 
301 "__kernel void test_stream_read_float4(__global float4 *dst)\n"
302 "{\n"
303 "    int  tid = get_global_id(0);\n"
304 "\n"
305 "    dst[tid] = (float)3.40282346638528860e+38;\n"
306 "}\n",
307 
308 "__kernel void test_stream_read_float8(__global float8 *dst)\n"
309 "{\n"
310 "    int  tid = get_global_id(0);\n"
311 "\n"
312 "    dst[tid] = (float)3.40282346638528860e+38;\n"
313 "}\n",
314 
315 "__kernel void test_stream_read_float16(__global float16 *dst)\n"
316 "{\n"
317 "    int  tid = get_global_id(0);\n"
318 "\n"
319 "    dst[tid] = (float)3.40282346638528860e+38;\n"
320 "}\n" };
321 
322 const char *float_kernel_name[] = { "test_stream_read_float", "test_stream_read_float2", "test_stream_read_float4", "test_stream_read_float8", "test_stream_read_float16" };
323 
324 
325 const char *stream_read_half_kernel_code[] = {
326 "__kernel void test_stream_read_half(__global half *dst)\n"
327 "{\n"
328 "    int  tid = get_global_id(0);\n"
329 "\n"
330 "    dst[tid] = (half)119;\n"
331 "}\n",
332 
333 "__kernel void test_stream_read_half2(__global half2 *dst)\n"
334 "{\n"
335 "    int  tid = get_global_id(0);\n"
336 "\n"
337 "    dst[tid] = (half)119;\n"
338 "}\n",
339 
340 "__kernel void test_stream_read_half4(__global half4 *dst)\n"
341 "{\n"
342 "    int  tid = get_global_id(0);\n"
343 "\n"
344 "    dst[tid] = (half)119;\n"
345 "}\n",
346 
347 "__kernel void test_stream_read_half8(__global half8 *dst)\n"
348 "{\n"
349 "    int  tid = get_global_id(0);\n"
350 "\n"
351 "    dst[tid] = (half)119;\n"
352 "}\n",
353 
354 "__kernel void test_stream_read_half16(__global half16 *dst)\n"
355 "{\n"
356 "    int  tid = get_global_id(0);\n"
357 "\n"
358 "    dst[tid] = (half)119;\n"
359 "}\n" };
360 
361 const char *half_kernel_name[] = { "test_stream_read_half", "test_stream_read_half2", "test_stream_read_half4", "test_stream_read_half8", "test_stream_read_half16" };
362 
363 
364 const char *stream_read_char_kernel_code[] = {
365 "__kernel void test_stream_read_char(__global char *dst)\n"
366 "{\n"
367 "    int  tid = get_global_id(0);\n"
368 "\n"
369 "    dst[tid] = (char)'w';\n"
370 "}\n",
371 
372 "__kernel void test_stream_read_char2(__global char2 *dst)\n"
373 "{\n"
374 "    int  tid = get_global_id(0);\n"
375 "\n"
376 "    dst[tid] = (char)'w';\n"
377 "}\n",
378 
379 "__kernel void test_stream_read_char4(__global char4 *dst)\n"
380 "{\n"
381 "    int  tid = get_global_id(0);\n"
382 "\n"
383 "    dst[tid] = (char)'w';\n"
384 "}\n",
385 
386 "__kernel void test_stream_read_char8(__global char8 *dst)\n"
387 "{\n"
388 "    int  tid = get_global_id(0);\n"
389 "\n"
390 "    dst[tid] = (char)'w';\n"
391 "}\n",
392 
393 "__kernel void test_stream_read_char16(__global char16 *dst)\n"
394 "{\n"
395 "    int  tid = get_global_id(0);\n"
396 "\n"
397 "    dst[tid] = (char)'w';\n"
398 "}\n" };
399 
400 const char *char_kernel_name[] = { "test_stream_read_char", "test_stream_read_char2", "test_stream_read_char4", "test_stream_read_char8", "test_stream_read_char16" };
401 
402 
403 const char *stream_read_uchar_kernel_code[] = {
404 "__kernel void test_stream_read_uchar(__global uchar *dst)\n"
405 "{\n"
406 "    int  tid = get_global_id(0);\n"
407 "\n"
408 "    dst[tid] = 'w';\n"
409 "}\n",
410 
411 "__kernel void test_stream_read_uchar2(__global uchar2 *dst)\n"
412 "{\n"
413 "    int  tid = get_global_id(0);\n"
414 "\n"
415 "    dst[tid] = (uchar)'w';\n"
416 "}\n",
417 
418 "__kernel void test_stream_read_uchar4(__global uchar4 *dst)\n"
419 "{\n"
420 "    int  tid = get_global_id(0);\n"
421 "\n"
422 "    dst[tid] = (uchar)'w';\n"
423 "}\n",
424 
425 "__kernel void test_stream_read_uchar8(__global uchar8 *dst)\n"
426 "{\n"
427 "    int  tid = get_global_id(0);\n"
428 "\n"
429 "    dst[tid] = (uchar)'w';\n"
430 "}\n",
431 
432 "__kernel void test_stream_read_uchar16(__global uchar16 *dst)\n"
433 "{\n"
434 "    int  tid = get_global_id(0);\n"
435 "\n"
436 "    dst[tid] = (uchar)'w';\n"
437 "}\n" };
438 
439 const char *uchar_kernel_name[] = { "test_stream_read_uchar", "test_stream_read_uchar2", "test_stream_read_uchar4", "test_stream_read_uchar8", "test_stream_read_uchar16" };
440 
441 
442 const char *stream_read_struct_kernel_code[] = {
443 "typedef struct{\n"
444 "int    a;\n"
445 "float    b;\n"
446 "} TestStruct;\n"
447 "__kernel void test_stream_read_struct(__global TestStruct *dst)\n"
448 "{\n"
449 "    int  tid = get_global_id(0);\n"
450 "\n"
451 "    dst[tid].a = ((1<<16)+1);\n"
452 "     dst[tid].b = (float)3.40282346638528860e+38;\n"
453 "}\n" };
454 
455 const char *struct_kernel_name[] = { "test_stream_read_struct" };
456 
457 
458 
459 //--- the verify functions
verify_read_int(void * ptr,int n)460 static int verify_read_int(void *ptr, int n)
461 {
462     int        i;
463     int        *outptr = (int *)ptr;
464 
465     for (i=0; i<n; i++){
466         if( outptr[i] != TEST_PRIME_INT )
467             return -1;
468     }
469 
470     return 0;
471 }
472 
473 
verify_read_uint(void * ptr,int n)474 static int verify_read_uint(void *ptr, int n)
475 {
476     int        i;
477     cl_uint    *outptr = (cl_uint *)ptr;
478 
479     for (i=0; i<n; i++){
480         if( outptr[i] != TEST_PRIME_UINT )
481             return -1;
482     }
483 
484     return 0;
485 }
486 
487 
verify_read_long(void * ptr,int n)488 static int verify_read_long(void *ptr, int n)
489 {
490     int        i;
491     cl_long    *outptr = (cl_long *)ptr;
492 
493     for (i=0; i<n; i++){
494         if( outptr[i] != TEST_PRIME_LONG )
495             return -1;
496     }
497 
498     return 0;
499 }
500 
501 
verify_read_ulong(void * ptr,int n)502 static int verify_read_ulong(void *ptr, int n)
503 {
504     int        i;
505     cl_ulong    *outptr = (cl_ulong *)ptr;
506 
507     for (i=0; i<n; i++){
508         if( outptr[i] != TEST_PRIME_ULONG )
509             return -1;
510     }
511 
512     return 0;
513 }
514 
515 
verify_read_short(void * ptr,int n)516 static int verify_read_short(void *ptr, int n)
517 {
518     int        i;
519     short    *outptr = (short *)ptr;
520 
521     for (i=0; i<n; i++){
522         if( outptr[i] != (short)((1<<8)+1) )
523             return -1;
524     }
525 
526     return 0;
527 }
528 
529 
verify_read_ushort(void * ptr,int n)530 static int verify_read_ushort(void *ptr, int n)
531 {
532     int        i;
533     cl_ushort    *outptr = (cl_ushort *)ptr;
534 
535     for (i=0; i<n; i++){
536         if( outptr[i] != (cl_ushort)((1<<8)+1) )
537             return -1;
538     }
539 
540     return 0;
541 }
542 
543 
verify_read_float(void * ptr,int n)544 static int verify_read_float( void *ptr, int n )
545 {
546     int        i;
547     float    *outptr = (float *)ptr;
548 
549     for (i=0; i<n; i++){
550         if( outptr[i] != TEST_PRIME_FLOAT )
551             return -1;
552     }
553 
554     return 0;
555 }
556 
557 
verify_read_half(void * ptr,int n)558 static int verify_read_half( void *ptr, int n )
559 {
560     int        i;
561     float    *outptr = (float *)ptr;
562 
563     for( i = 0; i < n / 2; i++ ){
564         if( outptr[i] != TEST_PRIME_HALF )
565             return -1;
566     }
567 
568     return 0;
569 }
570 
571 
verify_read_char(void * ptr,int n)572 static int verify_read_char(void *ptr, int n)
573 {
574     int        i;
575     char    *outptr = (char *)ptr;
576 
577     for (i=0; i<n; i++){
578         if( outptr[i] != TEST_PRIME_CHAR )
579             return -1;
580     }
581 
582     return 0;
583 }
584 
585 
verify_read_uchar(void * ptr,int n)586 static int verify_read_uchar( void *ptr, int n )
587 {
588     int        i;
589     uchar    *outptr = (uchar *)ptr;
590 
591     for (i=0; i<n; i++){
592         if( outptr[i] != TEST_PRIME_CHAR )
593             return -1;
594     }
595 
596     return 0;
597 }
598 
599 
verify_read_struct(void * ptr,int n)600 static int verify_read_struct( void *ptr, int n )
601 {
602     int         i;
603     TestStruct    *outptr = (TestStruct *)ptr;
604 
605     for ( i = 0; i < n; i++ ){
606         if( ( outptr[i].a != TEST_PRIME_INT ) ||
607            ( outptr[i].b != TEST_PRIME_FLOAT ) )
608             return -1;
609     }
610 
611     return 0;
612 }
613 
614 //----- the test functions
test_stream_read(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,size_t size,const char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))615 int test_stream_read( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, size_t size, const char *type, int loops,
616                      const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
617 {
618     cl_mem            streams[5];
619     void            *outptr[5];
620     cl_program        program[5];
621     cl_kernel        kernel[5];
622     cl_event        readEvent;
623     cl_ulong    queueStart, submitStart, readStart, readEnd;
624     size_t            threads[1];
625 #ifdef USE_LOCAL_THREADS
626     size_t            localThreads[1];
627 #endif
628     int                err, err_count = 0;
629     int                i;
630     size_t            ptrSizes[5];
631 
632     threads[0] = (size_t)num_elements;
633 
634 #ifdef USE_LOCAL_THREADS
635     err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
636     if( err != CL_SUCCESS ){
637         log_error( "Unable to get thread group max size: %d", err );
638         return -1;
639     }
640     if( localThreads[0] > threads[0] )
641         localThreads[0] = threads[0];
642 #endif
643 
644     ptrSizes[0] = size;
645     ptrSizes[1] = ptrSizes[0] << 1;
646     ptrSizes[2] = ptrSizes[1] << 1;
647     ptrSizes[3] = ptrSizes[2] << 1;
648     ptrSizes[4] = ptrSizes[3] << 1;
649     for( i = 0; i < loops; i++ ){
650         outptr[i] = malloc( ptrSizes[i] * num_elements );
651         if( ! outptr[i] ){
652             log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
653             return -1;
654         }
655         streams[i] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  ptrSizes[i] * num_elements, NULL, &err );
656         if( !streams[i] ){
657             log_error( " clCreateBuffer failed\n" );
658             free( outptr[i] );
659             return -1;
660         }
661         err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
662         if( err ){
663             log_error( " Error creating program for %s\n", type );
664             clReleaseMemObject(streams[i]);
665             free( outptr[i] );
666             return -1;
667         }
668 
669         err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[i] );
670         if( err != CL_SUCCESS ){
671             print_error( err, "clSetKernelArg failed" );
672             clReleaseProgram( program[i] );
673             clReleaseKernel( kernel[i] );
674             clReleaseMemObject( streams[i] );
675             free( outptr[i] );
676             return -1;
677         }
678 
679 #ifdef USE_LOCAL_THREADS
680         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
681 #else
682         err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
683 #endif
684         if( err != CL_SUCCESS ){
685             print_error( err, "clEnqueueNDRangeKernel failed" );
686             clReleaseKernel( kernel[i] );
687             clReleaseProgram( program[i] );
688             clReleaseMemObject( streams[i] );
689             free( outptr[i] );
690             return -1;
691         }
692 
693         err = clEnqueueReadBuffer( queue, streams[i], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &readEvent );
694         if( err != CL_SUCCESS ){
695             print_error( err, "clEnqueueReadBuffer failed" );
696             clReleaseKernel( kernel[i] );
697             clReleaseProgram( program[i] );
698             clReleaseMemObject( streams[i] );
699             free( outptr[i] );
700             return -1;
701         }
702         err = clWaitForEvents( 1, &readEvent );
703         if( err != CL_SUCCESS )
704         {
705             print_error( err, "Unable to wait for event completion" );
706             clReleaseKernel( kernel[i] );
707             clReleaseProgram( program[i] );
708             clReleaseMemObject( streams[i] );
709             free( outptr[i] );
710             return -1;
711         }
712         err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL );
713         if( err != CL_SUCCESS ){
714             print_error( err, "clGetEventProfilingInfo failed" );
715             clReleaseKernel( kernel[i] );
716             clReleaseProgram( program[i] );
717             clReleaseMemObject( streams[i] );
718             free( outptr[i] );
719             return -1;
720         }
721 
722         err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL );
723         if( err != CL_SUCCESS ){
724             print_error( err, "clGetEventProfilingInfo failed" );
725             clReleaseKernel( kernel[i] );
726             clReleaseProgram( program[i] );
727             clReleaseMemObject( streams[i] );
728             free( outptr[i] );
729             return -1;
730         }
731 
732         err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &readStart, NULL );
733         if( err != CL_SUCCESS ){
734             print_error( err, "clGetEventProfilingInfo failed" );
735             clReleaseKernel( kernel[i] );
736             clReleaseProgram( program[i] );
737             clReleaseMemObject( streams[i] );
738             free( outptr[i] );
739             return -1;
740         }
741 
742         err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &readEnd, NULL );
743         if( err != CL_SUCCESS ){
744             print_error( err, "clGetEventProfilingInfo failed" );
745             clReleaseKernel( kernel[i] );
746             clReleaseProgram( program[i] );
747             clReleaseMemObject( streams[i] );
748             free( outptr[i] );
749             return -1;
750         }
751 
752         if (fn(outptr[i], num_elements*(1<<i))){
753             log_error( " %s%d data failed to verify\n", type, 1<<i );
754             err_count++;
755         }
756         else{
757             log_info( " %s%d data verified\n", type, 1<<i );
758         }
759 
760     if (check_times(queueStart, submitStart, readStart, readEnd, device))
761       err_count++;
762 
763         // cleanup
764         clReleaseEvent(readEvent);
765         clReleaseKernel( kernel[i] );
766         clReleaseProgram( program[i] );
767         clReleaseMemObject( streams[i] );
768         free( outptr[i] );
769     }
770 
771     return err_count;
772 
773 }    // end test_stream_read()
774 
775 
test_read_array_int(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)776 int test_read_array_int( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
777 {
778     int    (*foo)(void *,int);
779     foo = verify_read_int;
780 
781     return test_stream_read( device, context, queue, num_elements, sizeof( cl_int ), "int", 5,
782                              stream_read_int_kernel_code, int_kernel_name, foo );
783 }
784 
785 
test_read_array_uint(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)786 int test_read_array_uint( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
787 {
788     int    (*foo)(void *,int);
789     foo = verify_read_uint;
790 
791     return test_stream_read( device, context, queue, num_elements, sizeof( cl_uint ), "uint", 5,
792                              stream_read_uint_kernel_code, uint_kernel_name, foo );
793 }
794 
795 
test_read_array_long(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)796 int test_read_array_long( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
797 {
798     int    (*foo)(void *,int);
799     foo = verify_read_long;
800 
801     if (!gHasLong)
802     {
803         log_info("read_long_array: Long types unsupported, skipping.");
804         return CL_SUCCESS;
805     }
806 
807     return test_stream_read( device, context, queue, num_elements, sizeof( cl_long ), "long", 5,
808                              stream_read_long_kernel_code, long_kernel_name, foo );
809 }
810 
811 
test_read_array_ulong(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)812 int test_read_array_ulong( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
813 {
814     int    (*foo)(void *,int);
815     foo = verify_read_ulong;
816 
817     if (!gHasLong)
818     {
819         log_info("read_long_array: Long types unsupported, skipping.");
820         return CL_SUCCESS;
821     }
822 
823     return test_stream_read( device, context, queue, num_elements, sizeof( cl_ulong ), "ulong", 5,
824                              stream_read_ulong_kernel_code, ulong_kernel_name, foo );
825 }
826 
827 
test_read_array_short(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)828 int test_read_array_short( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
829 {
830     int    (*foo)(void *,int);
831     foo = verify_read_short;
832 
833     return test_stream_read( device, context, queue, num_elements, sizeof( cl_short ), "short", 5,
834                              stream_read_short_kernel_code, short_kernel_name, foo );
835 }
836 
837 
test_read_array_ushort(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)838 int test_read_array_ushort( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
839 {
840     int    (*foo)(void *,int);
841     foo = verify_read_ushort;
842 
843     return test_stream_read( device, context, queue, num_elements, sizeof( cl_ushort ), "ushort", 5,
844                              stream_read_ushort_kernel_code, ushort_kernel_name, foo );
845 }
846 
847 
test_read_array_float(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)848 int test_read_array_float( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
849 {
850     int    (*foo)(void *,int);
851     foo = verify_read_float;
852 
853     return test_stream_read( device, context, queue, num_elements, sizeof( cl_float ), "float", 5,
854                              stream_read_float_kernel_code, float_kernel_name, foo );
855 }
856 
857 
test_read_array_half(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)858 int test_read_array_half( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
859 {
860     int    (*foo)(void *,int);
861     foo = verify_read_half;
862 
863     return test_stream_read( device, context, queue, num_elements, sizeof( cl_half ), "half", 5,
864                              stream_read_half_kernel_code, half_kernel_name, foo );
865 }
866 
867 
test_read_array_char(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)868 int test_read_array_char( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
869 {
870     int    (*foo)(void *,int);
871     foo = verify_read_char;
872 
873     return test_stream_read( device, context, queue, num_elements, sizeof( cl_char ), "char", 5,
874                              stream_read_char_kernel_code, char_kernel_name, foo );
875 }
876 
877 
test_read_array_uchar(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)878 int test_read_array_uchar( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
879 {
880     int    (*foo)(void *,int);
881     foo = verify_read_uchar;
882 
883     return test_stream_read( device, context, queue, num_elements, sizeof( cl_uchar ), "uchar", 5,
884                              stream_read_uchar_kernel_code, uchar_kernel_name, foo );
885 }
886 
887 
test_read_array_struct(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)888 int test_read_array_struct( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
889 {
890     int    (*foo)(void *,int);
891     foo = verify_read_struct;
892 
893     return test_stream_read( device, context, queue, num_elements, sizeof( TestStruct ), "struct", 1,
894                              stream_read_struct_kernel_code, struct_kernel_name, foo );
895 }
896 
897 /*
898 int read_struct_array(cl_device_group device, cl_device id, cl_context context, int num_elements)
899 {
900     cl_mem            streams[1];
901     TestStruct        *output_ptr;
902     cl_program        program[1];
903     cl_kernel        kernel[1];
904     void            *values[1];
905     size_t            sizes[1] = { sizeof(cl_stream) };
906     size_t            threads[1];
907 #ifdef USE_LOCAL_THREADS
908     size_t            localThreads[1];
909 #endif
910     int                err;
911     size_t            objSize = sizeof(TestStruct);
912 
913     threads[0] = (size_t)num_elements;
914 
915 #ifdef USE_LOCAL_THREADS
916     err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
917     if( err != CL_SUCCESS ){
918         log_error( "Unable to get thread group max size: %d", err );
919         return -1;
920     }
921     if( localThreads[0] > threads[0] )
922         localThreads[0] = threads[0];
923 #endif
924 
925     output_ptr = malloc(objSize * num_elements);
926     if( ! output_ptr ){
927         log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
928         return -1;
929     }
930     streams[0] = clCreateBuffer( device, (cl_mem_flags)(CL_MEM_READ_WRITE),  objSize * num_elements, NULL );
931     if( !streams[0] ){
932         log_error( " clCreateBuffer failed\n" );
933         free( output_ptr );
934         return -1;
935     }
936 
937     err = create_program_and_kernel( device, stream_read_struct_kernel_code, "test_stream_read_struct", &program[0], &kernel[0]);
938     if( err ){
939         clReleaseProgram( program[0] );
940         free( output_ptr );
941         return -1;
942     }
943 
944     err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&streams[0] );
945     if( err != CL_SUCCESS){
946         print_error( err, "clSetKernelArg failed" );
947         clReleaseProgram( program[0] );
948         clReleaseKernel( kernel[0] );
949         clReleaseMemObject( streams[0] );
950         free( output_ptr );
951         return -1;
952     }
953 
954 #ifdef USE_LOCAL_THREADS
955     err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, threads, localThreads, 0, NULL, NULL );
956 #else
957     err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
958 #endif
959     if( err != CL_SUCCESS ){
960         print_error( err, "clEnqueueNDRangeKernel failed" );
961         clReleaseProgram( program[0] );
962         clReleaseKernel( kernel[0] );
963         clReleaseMemObject( streams[0] );
964         free( output_ptr );
965         return -1;
966     }
967 
968     err = clEnqueueReadBuffer( queue, streams[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
969     if( err != CL_SUCCESS){
970         print_error( err, "clEnqueueReadBuffer failed" );
971         clReleaseProgram( program[0] );
972         clReleaseKernel( kernel[0] );
973         clReleaseMemObject( streams[0] );
974         free( output_ptr );
975         return -1;
976     }
977 
978     if (verify_read_struct(output_ptr, num_elements)){
979         log_error(" struct test failed\n");
980         err = -1;
981     }
982     else{
983         log_info(" struct test passed\n");
984         err = 0;
985     }
986 
987     // cleanup
988     clReleaseProgram( program[0] );
989     clReleaseKernel( kernel[0] );
990     clReleaseMemObject( streams[0] );
991     free( output_ptr );
992 
993     return err;
994 }
995 */
996 
997 
998