1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <time.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23
24 #include "procs.h"
25 #include "harness/testHarness.h"
26
27 #define TEST_PRIME_INT ((1<<16)+1)
28 #define TEST_PRIME_UINT ((1U<<16)+1U)
29 #define TEST_PRIME_LONG ((1LL<<32)+1LL)
30 #define TEST_PRIME_ULONG ((1ULL<<32)+1ULL)
31 #define TEST_PRIME_SHORT ((1S<<8)+1S)
32 #define TEST_PRIME_FLOAT (float)3.40282346638528860e+38
33 #define TEST_PRIME_HALF 119.f
34 #define TEST_BOOL true
35 #define TEST_PRIME_CHAR 0x77
36
37
38 #ifndef ulong
39 typedef unsigned long ulong;
40 #endif
41
42 #ifndef uchar
43 typedef unsigned char uchar;
44 #endif
45
46 #ifndef TestStruct
47 typedef struct{
48 int a;
49 float b;
50 } TestStruct;
51 #endif
52
53
54
55 //--- the code for the kernel executables
56 static const char *stream_read_int_kernel_code[] = {
57 "__kernel void test_stream_read_int(__global int *dst)\n"
58 "{\n"
59 " int tid = get_global_id(0);\n"
60 "\n"
61 " dst[tid] = ((1<<16)+1);\n"
62 "}\n",
63
64 "__kernel void test_stream_read_int2(__global int2 *dst)\n"
65 "{\n"
66 " int tid = get_global_id(0);\n"
67 "\n"
68 " dst[tid] = ((1<<16)+1);\n"
69 "}\n",
70
71 "__kernel void test_stream_read_int4(__global int4 *dst)\n"
72 "{\n"
73 " int tid = get_global_id(0);\n"
74 "\n"
75 " dst[tid] = ((1<<16)+1);\n"
76 "}\n",
77
78 "__kernel void test_stream_read_int8(__global int8 *dst)\n"
79 "{\n"
80 " int tid = get_global_id(0);\n"
81 "\n"
82 " dst[tid] = ((1<<16)+1);\n"
83 "}\n",
84
85 "__kernel void test_stream_read_int16(__global int16 *dst)\n"
86 "{\n"
87 " int tid = get_global_id(0);\n"
88 "\n"
89 " dst[tid] = ((1<<16)+1);\n"
90 "}\n" };
91
92 static const char *int_kernel_name[] = { "test_stream_read_int", "test_stream_read_int2", "test_stream_read_int4", "test_stream_read_int8", "test_stream_read_int16" };
93
94 const char *stream_read_uint_kernel_code[] = {
95 "__kernel void test_stream_read_uint(__global uint *dst)\n"
96 "{\n"
97 " int tid = get_global_id(0);\n"
98 "\n"
99 " dst[tid] = ((1U<<16)+1U);\n"
100 "}\n",
101
102 "__kernel void test_stream_read_uint2(__global uint2 *dst)\n"
103 "{\n"
104 " int tid = get_global_id(0);\n"
105 "\n"
106 " dst[tid] = ((1U<<16)+1U);\n"
107 "}\n",
108
109 "__kernel void test_stream_read_uint4(__global uint4 *dst)\n"
110 "{\n"
111 " int tid = get_global_id(0);\n"
112 "\n"
113 " dst[tid] = ((1U<<16)+1U);\n"
114 "}\n",
115
116 "__kernel void test_stream_read_uint8(__global uint8 *dst)\n"
117 "{\n"
118 " int tid = get_global_id(0);\n"
119 "\n"
120 " dst[tid] = ((1U<<16)+1U);\n"
121 "}\n",
122
123 "__kernel void test_stream_read_uint16(__global uint16 *dst)\n"
124 "{\n"
125 " int tid = get_global_id(0);\n"
126 "\n"
127 " dst[tid] = ((1U<<16)+1U);\n"
128 "}\n" };
129
130 const char *uint_kernel_name[] = { "test_stream_read_uint", "test_stream_read_uint2", "test_stream_read_uint4", "test_stream_read_uint8", "test_stream_read_uint16" };
131
132 const char *stream_read_long_kernel_code[] = {
133 "__kernel void test_stream_read_long(__global long *dst)\n"
134 "{\n"
135 " int tid = get_global_id(0);\n"
136 "\n"
137 " dst[tid] = ((1L<<32)+1L);\n"
138 "}\n",
139
140 "__kernel void test_stream_read_long2(__global long2 *dst)\n"
141 "{\n"
142 " int tid = get_global_id(0);\n"
143 "\n"
144 " dst[tid] = ((1L<<32)+1L);\n"
145 "}\n",
146
147 "__kernel void test_stream_read_long4(__global long4 *dst)\n"
148 "{\n"
149 " int tid = get_global_id(0);\n"
150 "\n"
151 " dst[tid] = ((1L<<32)+1L);\n"
152 "}\n",
153
154 "__kernel void test_stream_read_long8(__global long8 *dst)\n"
155 "{\n"
156 " int tid = get_global_id(0);\n"
157 "\n"
158 " dst[tid] = ((1L<<32)+1L);\n"
159 "}\n",
160
161 "__kernel void test_stream_read_long16(__global long16 *dst)\n"
162 "{\n"
163 " int tid = get_global_id(0);\n"
164 "\n"
165 " dst[tid] = ((1L<<32)+1L);\n"
166 "}\n" };
167
168 const char *long_kernel_name[] = { "test_stream_read_long", "test_stream_read_long2", "test_stream_read_long4", "test_stream_read_long8", "test_stream_read_long16" };
169
170 const char *stream_read_ulong_kernel_code[] = {
171 "__kernel void test_stream_read_ulong(__global ulong *dst)\n"
172 "{\n"
173 " int tid = get_global_id(0);\n"
174 "\n"
175 " dst[tid] = ((1UL<<32)+1UL);\n"
176 "}\n",
177
178 "__kernel void test_stream_read_ulong2(__global ulong2 *dst)\n"
179 "{\n"
180 " int tid = get_global_id(0);\n"
181 "\n"
182 " dst[tid] = ((1UL<<32)+1UL);\n"
183 "}\n",
184
185 "__kernel void test_stream_read_ulong4(__global ulong4 *dst)\n"
186 "{\n"
187 " int tid = get_global_id(0);\n"
188 "\n"
189 " dst[tid] = ((1UL<<32)+1UL);\n"
190 "}\n",
191
192 "__kernel void test_stream_read_ulong8(__global ulong8 *dst)\n"
193 "{\n"
194 " int tid = get_global_id(0);\n"
195 "\n"
196 " dst[tid] = ((1UL<<32)+1UL);\n"
197 "}\n",
198
199 "__kernel void test_stream_read_ulong16(__global ulong16 *dst)\n"
200 "{\n"
201 " int tid = get_global_id(0);\n"
202 "\n"
203 " dst[tid] = ((1UL<<32)+1UL);\n"
204 "}\n" };
205
206 const char *ulong_kernel_name[] = { "test_stream_read_ulong", "test_stream_read_ulong2", "test_stream_read_ulong4", "test_stream_read_ulong8", "test_stream_read_ulong16" };
207
208 const char *stream_read_short_kernel_code[] = {
209 "__kernel void test_stream_read_short(__global short *dst)\n"
210 "{\n"
211 " int tid = get_global_id(0);\n"
212 "\n"
213 " dst[tid] = (short)((1<<8)+1);\n"
214 "}\n",
215
216 "__kernel void test_stream_read_short2(__global short2 *dst)\n"
217 "{\n"
218 " int tid = get_global_id(0);\n"
219 "\n"
220 " dst[tid] = (short)((1<<8)+1);\n"
221 "}\n",
222
223 "__kernel void test_stream_read_short4(__global short4 *dst)\n"
224 "{\n"
225 " int tid = get_global_id(0);\n"
226 "\n"
227 " dst[tid] = (short)((1<<8)+1);\n"
228 "}\n",
229
230 "__kernel void test_stream_read_short8(__global short8 *dst)\n"
231 "{\n"
232 " int tid = get_global_id(0);\n"
233 "\n"
234 " dst[tid] = (short)((1<<8)+1);\n"
235 "}\n",
236
237 "__kernel void test_stream_read_short16(__global short16 *dst)\n"
238 "{\n"
239 " int tid = get_global_id(0);\n"
240 "\n"
241 " dst[tid] = (short)((1<<8)+1);\n"
242 "}\n" };
243
244 const char *short_kernel_name[] = { "test_stream_read_short", "test_stream_read_short2", "test_stream_read_short4", "test_stream_read_short8", "test_stream_read_short16" };
245
246
247 const char *stream_read_ushort_kernel_code[] = {
248 "__kernel void test_stream_read_ushort(__global ushort *dst)\n"
249 "{\n"
250 " int tid = get_global_id(0);\n"
251 "\n"
252 " dst[tid] = (ushort)((1<<8)+1);\n"
253 "}\n",
254
255 "__kernel void test_stream_read_ushort2(__global ushort2 *dst)\n"
256 "{\n"
257 " int tid = get_global_id(0);\n"
258 "\n"
259 " dst[tid] = (ushort)((1<<8)+1);\n"
260 "}\n",
261
262 "__kernel void test_stream_read_ushort4(__global ushort4 *dst)\n"
263 "{\n"
264 " int tid = get_global_id(0);\n"
265 "\n"
266 " dst[tid] = (ushort)((1<<8)+1);\n"
267 "}\n",
268
269 "__kernel void test_stream_read_ushort8(__global ushort8 *dst)\n"
270 "{\n"
271 " int tid = get_global_id(0);\n"
272 "\n"
273 " dst[tid] = (ushort)((1<<8)+1);\n"
274 "}\n",
275
276 "__kernel void test_stream_read_ushort16(__global ushort16 *dst)\n"
277 "{\n"
278 " int tid = get_global_id(0);\n"
279 "\n"
280 " dst[tid] = (ushort)((1<<8)+1);\n"
281 "}\n" };
282
283 static const char *ushort_kernel_name[] = { "test_stream_read_ushort", "test_stream_read_ushort2", "test_stream_read_ushort4", "test_stream_read_ushort8", "test_stream_read_ushort16" };
284
285
286 const char *stream_read_float_kernel_code[] = {
287 "__kernel void test_stream_read_float(__global float *dst)\n"
288 "{\n"
289 " int tid = get_global_id(0);\n"
290 "\n"
291 " dst[tid] = (float)3.40282346638528860e+38;\n"
292 "}\n",
293
294 "__kernel void test_stream_read_float2(__global float2 *dst)\n"
295 "{\n"
296 " int tid = get_global_id(0);\n"
297 "\n"
298 " dst[tid] = (float)3.40282346638528860e+38;\n"
299 "}\n",
300
301 "__kernel void test_stream_read_float4(__global float4 *dst)\n"
302 "{\n"
303 " int tid = get_global_id(0);\n"
304 "\n"
305 " dst[tid] = (float)3.40282346638528860e+38;\n"
306 "}\n",
307
308 "__kernel void test_stream_read_float8(__global float8 *dst)\n"
309 "{\n"
310 " int tid = get_global_id(0);\n"
311 "\n"
312 " dst[tid] = (float)3.40282346638528860e+38;\n"
313 "}\n",
314
315 "__kernel void test_stream_read_float16(__global float16 *dst)\n"
316 "{\n"
317 " int tid = get_global_id(0);\n"
318 "\n"
319 " dst[tid] = (float)3.40282346638528860e+38;\n"
320 "}\n" };
321
322 const char *float_kernel_name[] = { "test_stream_read_float", "test_stream_read_float2", "test_stream_read_float4", "test_stream_read_float8", "test_stream_read_float16" };
323
324
325 const char *stream_read_half_kernel_code[] = {
326 "__kernel void test_stream_read_half(__global half *dst)\n"
327 "{\n"
328 " int tid = get_global_id(0);\n"
329 "\n"
330 " dst[tid] = (half)119;\n"
331 "}\n",
332
333 "__kernel void test_stream_read_half2(__global half2 *dst)\n"
334 "{\n"
335 " int tid = get_global_id(0);\n"
336 "\n"
337 " dst[tid] = (half)119;\n"
338 "}\n",
339
340 "__kernel void test_stream_read_half4(__global half4 *dst)\n"
341 "{\n"
342 " int tid = get_global_id(0);\n"
343 "\n"
344 " dst[tid] = (half)119;\n"
345 "}\n",
346
347 "__kernel void test_stream_read_half8(__global half8 *dst)\n"
348 "{\n"
349 " int tid = get_global_id(0);\n"
350 "\n"
351 " dst[tid] = (half)119;\n"
352 "}\n",
353
354 "__kernel void test_stream_read_half16(__global half16 *dst)\n"
355 "{\n"
356 " int tid = get_global_id(0);\n"
357 "\n"
358 " dst[tid] = (half)119;\n"
359 "}\n" };
360
361 const char *half_kernel_name[] = { "test_stream_read_half", "test_stream_read_half2", "test_stream_read_half4", "test_stream_read_half8", "test_stream_read_half16" };
362
363
364 const char *stream_read_char_kernel_code[] = {
365 "__kernel void test_stream_read_char(__global char *dst)\n"
366 "{\n"
367 " int tid = get_global_id(0);\n"
368 "\n"
369 " dst[tid] = (char)'w';\n"
370 "}\n",
371
372 "__kernel void test_stream_read_char2(__global char2 *dst)\n"
373 "{\n"
374 " int tid = get_global_id(0);\n"
375 "\n"
376 " dst[tid] = (char)'w';\n"
377 "}\n",
378
379 "__kernel void test_stream_read_char4(__global char4 *dst)\n"
380 "{\n"
381 " int tid = get_global_id(0);\n"
382 "\n"
383 " dst[tid] = (char)'w';\n"
384 "}\n",
385
386 "__kernel void test_stream_read_char8(__global char8 *dst)\n"
387 "{\n"
388 " int tid = get_global_id(0);\n"
389 "\n"
390 " dst[tid] = (char)'w';\n"
391 "}\n",
392
393 "__kernel void test_stream_read_char16(__global char16 *dst)\n"
394 "{\n"
395 " int tid = get_global_id(0);\n"
396 "\n"
397 " dst[tid] = (char)'w';\n"
398 "}\n" };
399
400 const char *char_kernel_name[] = { "test_stream_read_char", "test_stream_read_char2", "test_stream_read_char4", "test_stream_read_char8", "test_stream_read_char16" };
401
402
403 const char *stream_read_uchar_kernel_code[] = {
404 "__kernel void test_stream_read_uchar(__global uchar *dst)\n"
405 "{\n"
406 " int tid = get_global_id(0);\n"
407 "\n"
408 " dst[tid] = 'w';\n"
409 "}\n",
410
411 "__kernel void test_stream_read_uchar2(__global uchar2 *dst)\n"
412 "{\n"
413 " int tid = get_global_id(0);\n"
414 "\n"
415 " dst[tid] = (uchar)'w';\n"
416 "}\n",
417
418 "__kernel void test_stream_read_uchar4(__global uchar4 *dst)\n"
419 "{\n"
420 " int tid = get_global_id(0);\n"
421 "\n"
422 " dst[tid] = (uchar)'w';\n"
423 "}\n",
424
425 "__kernel void test_stream_read_uchar8(__global uchar8 *dst)\n"
426 "{\n"
427 " int tid = get_global_id(0);\n"
428 "\n"
429 " dst[tid] = (uchar)'w';\n"
430 "}\n",
431
432 "__kernel void test_stream_read_uchar16(__global uchar16 *dst)\n"
433 "{\n"
434 " int tid = get_global_id(0);\n"
435 "\n"
436 " dst[tid] = (uchar)'w';\n"
437 "}\n" };
438
439 const char *uchar_kernel_name[] = { "test_stream_read_uchar", "test_stream_read_uchar2", "test_stream_read_uchar4", "test_stream_read_uchar8", "test_stream_read_uchar16" };
440
441
442 const char *stream_read_struct_kernel_code[] = {
443 "typedef struct{\n"
444 "int a;\n"
445 "float b;\n"
446 "} TestStruct;\n"
447 "__kernel void test_stream_read_struct(__global TestStruct *dst)\n"
448 "{\n"
449 " int tid = get_global_id(0);\n"
450 "\n"
451 " dst[tid].a = ((1<<16)+1);\n"
452 " dst[tid].b = (float)3.40282346638528860e+38;\n"
453 "}\n" };
454
455 const char *struct_kernel_name[] = { "test_stream_read_struct" };
456
457
458
459 //--- the verify functions
verify_read_int(void * ptr,int n)460 static int verify_read_int(void *ptr, int n)
461 {
462 int i;
463 int *outptr = (int *)ptr;
464
465 for (i=0; i<n; i++){
466 if( outptr[i] != TEST_PRIME_INT )
467 return -1;
468 }
469
470 return 0;
471 }
472
473
verify_read_uint(void * ptr,int n)474 static int verify_read_uint(void *ptr, int n)
475 {
476 int i;
477 cl_uint *outptr = (cl_uint *)ptr;
478
479 for (i=0; i<n; i++){
480 if( outptr[i] != TEST_PRIME_UINT )
481 return -1;
482 }
483
484 return 0;
485 }
486
487
verify_read_long(void * ptr,int n)488 static int verify_read_long(void *ptr, int n)
489 {
490 int i;
491 cl_long *outptr = (cl_long *)ptr;
492
493 for (i=0; i<n; i++){
494 if( outptr[i] != TEST_PRIME_LONG )
495 return -1;
496 }
497
498 return 0;
499 }
500
501
verify_read_ulong(void * ptr,int n)502 static int verify_read_ulong(void *ptr, int n)
503 {
504 int i;
505 cl_ulong *outptr = (cl_ulong *)ptr;
506
507 for (i=0; i<n; i++){
508 if( outptr[i] != TEST_PRIME_ULONG )
509 return -1;
510 }
511
512 return 0;
513 }
514
515
verify_read_short(void * ptr,int n)516 static int verify_read_short(void *ptr, int n)
517 {
518 int i;
519 short *outptr = (short *)ptr;
520
521 for (i=0; i<n; i++){
522 if( outptr[i] != (short)((1<<8)+1) )
523 return -1;
524 }
525
526 return 0;
527 }
528
529
verify_read_ushort(void * ptr,int n)530 static int verify_read_ushort(void *ptr, int n)
531 {
532 int i;
533 cl_ushort *outptr = (cl_ushort *)ptr;
534
535 for (i=0; i<n; i++){
536 if( outptr[i] != (cl_ushort)((1<<8)+1) )
537 return -1;
538 }
539
540 return 0;
541 }
542
543
verify_read_float(void * ptr,int n)544 static int verify_read_float( void *ptr, int n )
545 {
546 int i;
547 float *outptr = (float *)ptr;
548
549 for (i=0; i<n; i++){
550 if( outptr[i] != TEST_PRIME_FLOAT )
551 return -1;
552 }
553
554 return 0;
555 }
556
557
verify_read_half(void * ptr,int n)558 static int verify_read_half( void *ptr, int n )
559 {
560 int i;
561 float *outptr = (float *)ptr;
562
563 for( i = 0; i < n / 2; i++ ){
564 if( outptr[i] != TEST_PRIME_HALF )
565 return -1;
566 }
567
568 return 0;
569 }
570
571
verify_read_char(void * ptr,int n)572 static int verify_read_char(void *ptr, int n)
573 {
574 int i;
575 char *outptr = (char *)ptr;
576
577 for (i=0; i<n; i++){
578 if( outptr[i] != TEST_PRIME_CHAR )
579 return -1;
580 }
581
582 return 0;
583 }
584
585
verify_read_uchar(void * ptr,int n)586 static int verify_read_uchar( void *ptr, int n )
587 {
588 int i;
589 uchar *outptr = (uchar *)ptr;
590
591 for (i=0; i<n; i++){
592 if( outptr[i] != TEST_PRIME_CHAR )
593 return -1;
594 }
595
596 return 0;
597 }
598
599
verify_read_struct(void * ptr,int n)600 static int verify_read_struct( void *ptr, int n )
601 {
602 int i;
603 TestStruct *outptr = (TestStruct *)ptr;
604
605 for ( i = 0; i < n; i++ ){
606 if( ( outptr[i].a != TEST_PRIME_INT ) ||
607 ( outptr[i].b != TEST_PRIME_FLOAT ) )
608 return -1;
609 }
610
611 return 0;
612 }
613
614 //----- the test functions
test_stream_read(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements,size_t size,const char * type,int loops,const char * kernelCode[],const char * kernelName[],int (* fn)(void *,int))615 int test_stream_read( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements, size_t size, const char *type, int loops,
616 const char *kernelCode[], const char *kernelName[], int (*fn)(void *,int) )
617 {
618 cl_mem streams[5];
619 void *outptr[5];
620 cl_program program[5];
621 cl_kernel kernel[5];
622 cl_event readEvent;
623 cl_ulong queueStart, submitStart, readStart, readEnd;
624 size_t threads[1];
625 #ifdef USE_LOCAL_THREADS
626 size_t localThreads[1];
627 #endif
628 int err, err_count = 0;
629 int i;
630 size_t ptrSizes[5];
631
632 threads[0] = (size_t)num_elements;
633
634 #ifdef USE_LOCAL_THREADS
635 err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
636 if( err != CL_SUCCESS ){
637 log_error( "Unable to get thread group max size: %d", err );
638 return -1;
639 }
640 if( localThreads[0] > threads[0] )
641 localThreads[0] = threads[0];
642 #endif
643
644 ptrSizes[0] = size;
645 ptrSizes[1] = ptrSizes[0] << 1;
646 ptrSizes[2] = ptrSizes[1] << 1;
647 ptrSizes[3] = ptrSizes[2] << 1;
648 ptrSizes[4] = ptrSizes[3] << 1;
649 for( i = 0; i < loops; i++ ){
650 outptr[i] = malloc( ptrSizes[i] * num_elements );
651 if( ! outptr[i] ){
652 log_error( " unable to allocate %d bytes for outptr\n", (int)( ptrSizes[i] * num_elements ) );
653 return -1;
654 }
655 streams[i] = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), ptrSizes[i] * num_elements, NULL, &err );
656 if( !streams[i] ){
657 log_error( " clCreateBuffer failed\n" );
658 free( outptr[i] );
659 return -1;
660 }
661 err = create_single_kernel_helper( context, &program[i], &kernel[i], 1, &kernelCode[i], kernelName[i] );
662 if( err ){
663 log_error( " Error creating program for %s\n", type );
664 clReleaseMemObject(streams[i]);
665 free( outptr[i] );
666 return -1;
667 }
668
669 err = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), (void *)&streams[i] );
670 if( err != CL_SUCCESS ){
671 print_error( err, "clSetKernelArg failed" );
672 clReleaseProgram( program[i] );
673 clReleaseKernel( kernel[i] );
674 clReleaseMemObject( streams[i] );
675 free( outptr[i] );
676 return -1;
677 }
678
679 #ifdef USE_LOCAL_THREADS
680 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, localThreads, 0, NULL, NULL );
681 #else
682 err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
683 #endif
684 if( err != CL_SUCCESS ){
685 print_error( err, "clEnqueueNDRangeKernel failed" );
686 clReleaseKernel( kernel[i] );
687 clReleaseProgram( program[i] );
688 clReleaseMemObject( streams[i] );
689 free( outptr[i] );
690 return -1;
691 }
692
693 err = clEnqueueReadBuffer( queue, streams[i], false, 0, ptrSizes[i]*num_elements, outptr[i], 0, NULL, &readEvent );
694 if( err != CL_SUCCESS ){
695 print_error( err, "clEnqueueReadBuffer failed" );
696 clReleaseKernel( kernel[i] );
697 clReleaseProgram( program[i] );
698 clReleaseMemObject( streams[i] );
699 free( outptr[i] );
700 return -1;
701 }
702 err = clWaitForEvents( 1, &readEvent );
703 if( err != CL_SUCCESS )
704 {
705 print_error( err, "Unable to wait for event completion" );
706 clReleaseKernel( kernel[i] );
707 clReleaseProgram( program[i] );
708 clReleaseMemObject( streams[i] );
709 free( outptr[i] );
710 return -1;
711 }
712 err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL );
713 if( err != CL_SUCCESS ){
714 print_error( err, "clGetEventProfilingInfo failed" );
715 clReleaseKernel( kernel[i] );
716 clReleaseProgram( program[i] );
717 clReleaseMemObject( streams[i] );
718 free( outptr[i] );
719 return -1;
720 }
721
722 err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL );
723 if( err != CL_SUCCESS ){
724 print_error( err, "clGetEventProfilingInfo failed" );
725 clReleaseKernel( kernel[i] );
726 clReleaseProgram( program[i] );
727 clReleaseMemObject( streams[i] );
728 free( outptr[i] );
729 return -1;
730 }
731
732 err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &readStart, NULL );
733 if( err != CL_SUCCESS ){
734 print_error( err, "clGetEventProfilingInfo failed" );
735 clReleaseKernel( kernel[i] );
736 clReleaseProgram( program[i] );
737 clReleaseMemObject( streams[i] );
738 free( outptr[i] );
739 return -1;
740 }
741
742 err = clGetEventProfilingInfo( readEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &readEnd, NULL );
743 if( err != CL_SUCCESS ){
744 print_error( err, "clGetEventProfilingInfo failed" );
745 clReleaseKernel( kernel[i] );
746 clReleaseProgram( program[i] );
747 clReleaseMemObject( streams[i] );
748 free( outptr[i] );
749 return -1;
750 }
751
752 if (fn(outptr[i], num_elements*(1<<i))){
753 log_error( " %s%d data failed to verify\n", type, 1<<i );
754 err_count++;
755 }
756 else{
757 log_info( " %s%d data verified\n", type, 1<<i );
758 }
759
760 if (check_times(queueStart, submitStart, readStart, readEnd, device))
761 err_count++;
762
763 // cleanup
764 clReleaseEvent(readEvent);
765 clReleaseKernel( kernel[i] );
766 clReleaseProgram( program[i] );
767 clReleaseMemObject( streams[i] );
768 free( outptr[i] );
769 }
770
771 return err_count;
772
773 } // end test_stream_read()
774
775
test_read_array_int(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)776 int test_read_array_int( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
777 {
778 int (*foo)(void *,int);
779 foo = verify_read_int;
780
781 return test_stream_read( device, context, queue, num_elements, sizeof( cl_int ), "int", 5,
782 stream_read_int_kernel_code, int_kernel_name, foo );
783 }
784
785
test_read_array_uint(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)786 int test_read_array_uint( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
787 {
788 int (*foo)(void *,int);
789 foo = verify_read_uint;
790
791 return test_stream_read( device, context, queue, num_elements, sizeof( cl_uint ), "uint", 5,
792 stream_read_uint_kernel_code, uint_kernel_name, foo );
793 }
794
795
test_read_array_long(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)796 int test_read_array_long( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
797 {
798 int (*foo)(void *,int);
799 foo = verify_read_long;
800
801 if (!gHasLong)
802 {
803 log_info("read_long_array: Long types unsupported, skipping.");
804 return CL_SUCCESS;
805 }
806
807 return test_stream_read( device, context, queue, num_elements, sizeof( cl_long ), "long", 5,
808 stream_read_long_kernel_code, long_kernel_name, foo );
809 }
810
811
test_read_array_ulong(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)812 int test_read_array_ulong( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
813 {
814 int (*foo)(void *,int);
815 foo = verify_read_ulong;
816
817 if (!gHasLong)
818 {
819 log_info("read_long_array: Long types unsupported, skipping.");
820 return CL_SUCCESS;
821 }
822
823 return test_stream_read( device, context, queue, num_elements, sizeof( cl_ulong ), "ulong", 5,
824 stream_read_ulong_kernel_code, ulong_kernel_name, foo );
825 }
826
827
test_read_array_short(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)828 int test_read_array_short( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
829 {
830 int (*foo)(void *,int);
831 foo = verify_read_short;
832
833 return test_stream_read( device, context, queue, num_elements, sizeof( cl_short ), "short", 5,
834 stream_read_short_kernel_code, short_kernel_name, foo );
835 }
836
837
test_read_array_ushort(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)838 int test_read_array_ushort( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
839 {
840 int (*foo)(void *,int);
841 foo = verify_read_ushort;
842
843 return test_stream_read( device, context, queue, num_elements, sizeof( cl_ushort ), "ushort", 5,
844 stream_read_ushort_kernel_code, ushort_kernel_name, foo );
845 }
846
847
test_read_array_float(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)848 int test_read_array_float( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
849 {
850 int (*foo)(void *,int);
851 foo = verify_read_float;
852
853 return test_stream_read( device, context, queue, num_elements, sizeof( cl_float ), "float", 5,
854 stream_read_float_kernel_code, float_kernel_name, foo );
855 }
856
857
test_read_array_half(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)858 int test_read_array_half( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
859 {
860 int (*foo)(void *,int);
861 foo = verify_read_half;
862
863 return test_stream_read( device, context, queue, num_elements, sizeof( cl_half ), "half", 5,
864 stream_read_half_kernel_code, half_kernel_name, foo );
865 }
866
867
test_read_array_char(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)868 int test_read_array_char( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
869 {
870 int (*foo)(void *,int);
871 foo = verify_read_char;
872
873 return test_stream_read( device, context, queue, num_elements, sizeof( cl_char ), "char", 5,
874 stream_read_char_kernel_code, char_kernel_name, foo );
875 }
876
877
test_read_array_uchar(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)878 int test_read_array_uchar( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
879 {
880 int (*foo)(void *,int);
881 foo = verify_read_uchar;
882
883 return test_stream_read( device, context, queue, num_elements, sizeof( cl_uchar ), "uchar", 5,
884 stream_read_uchar_kernel_code, uchar_kernel_name, foo );
885 }
886
887
test_read_array_struct(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)888 int test_read_array_struct( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
889 {
890 int (*foo)(void *,int);
891 foo = verify_read_struct;
892
893 return test_stream_read( device, context, queue, num_elements, sizeof( TestStruct ), "struct", 1,
894 stream_read_struct_kernel_code, struct_kernel_name, foo );
895 }
896
897 /*
898 int read_struct_array(cl_device_group device, cl_device id, cl_context context, int num_elements)
899 {
900 cl_mem streams[1];
901 TestStruct *output_ptr;
902 cl_program program[1];
903 cl_kernel kernel[1];
904 void *values[1];
905 size_t sizes[1] = { sizeof(cl_stream) };
906 size_t threads[1];
907 #ifdef USE_LOCAL_THREADS
908 size_t localThreads[1];
909 #endif
910 int err;
911 size_t objSize = sizeof(TestStruct);
912
913 threads[0] = (size_t)num_elements;
914
915 #ifdef USE_LOCAL_THREADS
916 err = clGetDeviceConfigInfo( id, CL_DEVICE_MAX_THREAD_GROUP_SIZE, localThreads, sizeof( cl_uint ), NULL );
917 if( err != CL_SUCCESS ){
918 log_error( "Unable to get thread group max size: %d", err );
919 return -1;
920 }
921 if( localThreads[0] > threads[0] )
922 localThreads[0] = threads[0];
923 #endif
924
925 output_ptr = malloc(objSize * num_elements);
926 if( ! output_ptr ){
927 log_error( " unable to allocate %d bytes for output_ptr\n", (int)(objSize * num_elements) );
928 return -1;
929 }
930 streams[0] = clCreateBuffer( device, (cl_mem_flags)(CL_MEM_READ_WRITE), objSize * num_elements, NULL );
931 if( !streams[0] ){
932 log_error( " clCreateBuffer failed\n" );
933 free( output_ptr );
934 return -1;
935 }
936
937 err = create_program_and_kernel( device, stream_read_struct_kernel_code, "test_stream_read_struct", &program[0], &kernel[0]);
938 if( err ){
939 clReleaseProgram( program[0] );
940 free( output_ptr );
941 return -1;
942 }
943
944 err = clSetKernelArg( kernel[0], 0, sizeof( cl_mem ), (void *)&streams[0] );
945 if( err != CL_SUCCESS){
946 print_error( err, "clSetKernelArg failed" );
947 clReleaseProgram( program[0] );
948 clReleaseKernel( kernel[0] );
949 clReleaseMemObject( streams[0] );
950 free( output_ptr );
951 return -1;
952 }
953
954 #ifdef USE_LOCAL_THREADS
955 err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, threads, localThreads, 0, NULL, NULL );
956 #else
957 err = clEnqueueNDRangeKernel( queue, kernel[0], 1, NULL, threads, NULL, 0, NULL, NULL );
958 #endif
959 if( err != CL_SUCCESS ){
960 print_error( err, "clEnqueueNDRangeKernel failed" );
961 clReleaseProgram( program[0] );
962 clReleaseKernel( kernel[0] );
963 clReleaseMemObject( streams[0] );
964 free( output_ptr );
965 return -1;
966 }
967
968 err = clEnqueueReadBuffer( queue, streams[0], true, 0, objSize*num_elements, (void *)output_ptr, 0, NULL, NULL );
969 if( err != CL_SUCCESS){
970 print_error( err, "clEnqueueReadBuffer failed" );
971 clReleaseProgram( program[0] );
972 clReleaseKernel( kernel[0] );
973 clReleaseMemObject( streams[0] );
974 free( output_ptr );
975 return -1;
976 }
977
978 if (verify_read_struct(output_ptr, num_elements)){
979 log_error(" struct test failed\n");
980 err = -1;
981 }
982 else{
983 log_info(" struct test passed\n");
984 err = 0;
985 }
986
987 // cleanup
988 clReleaseProgram( program[0] );
989 clReleaseKernel( kernel[0] );
990 clReleaseMemObject( streams[0] );
991 free( output_ptr );
992
993 return err;
994 }
995 */
996
997
998