• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <limits.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include "procs.h"
25 
26 #define UCHAR_MIN   0
27 #define USHRT_MIN   0
28 #define UINT_MIN    0
29 
30 #ifndef MAX
31 #define MAX( _a, _b )   ( (_a) > (_b) ? (_a) : (_b) )
32 #endif
33 #ifndef MIN
34 #define MIN( _a, _b )   ( (_a) < (_b) ? (_a) : (_b) )
35 #endif
36 
verify_addsat_char(const cl_char * inA,const cl_char * inB,const cl_char * outptr,int n,const char * sizeName,int vecSize)37 static int verify_addsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize )
38 {
39     int i;
40     for( i = 0; i < n; i++ )
41     {
42         cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
43         r = MAX( r, CL_CHAR_MIN );
44         r = MIN( r, CL_CHAR_MAX );
45 
46         if( r != outptr[i] )
47         { log_info( "\n%d) Failure for add_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
48     }
49     return 0;
50 }
51 
verify_addsat_uchar(const cl_uchar * inA,const cl_uchar * inB,const cl_uchar * outptr,int n,const char * sizeName,int vecSize)52 static int verify_addsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const cl_uchar *outptr, int n, const char *sizeName, int vecSize )
53 {
54     int i;
55     for( i = 0; i < n; i++ )
56     {
57         cl_int r = (int) inA[i] + (int) inB[i];
58         r = MAX( r, 0 );
59         r = MIN( r, CL_UCHAR_MAX );
60         if( r != outptr[i] )
61         { log_info( "\n%d) Failure for add_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
62     }
63     return 0;
64 }
65 
verify_addsat_short(const cl_short * inA,const cl_short * inB,const cl_short * outptr,int n,const char * sizeName,int vecSize)66 static int verify_addsat_short( const cl_short *inA, const cl_short *inB, const cl_short *outptr, int n, const char *sizeName , int vecSize)
67 {
68     int i;
69     for( i = 0; i < n; i++ )
70     {
71         cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
72         r = MAX( r, CL_SHRT_MIN );
73         r = MIN( r, CL_SHRT_MAX );
74 
75         if( r != outptr[i] )
76         { log_info( "\n%d) Failure for add_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
77     }
78     return 0;
79 }
80 
verify_addsat_ushort(const cl_ushort * inA,const cl_ushort * inB,const cl_ushort * outptr,int n,const char * sizeName,int vecSize)81 static int verify_addsat_ushort( const cl_ushort *inA, const cl_ushort *inB, const cl_ushort *outptr, int n, const char *sizeName , int vecSize)
82 {
83     int i;
84     for( i = 0; i < n; i++ )
85     {
86         cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
87         r = MAX( r, 0 );
88         r = MIN( r, CL_USHRT_MAX );
89 
90         if( r != outptr[i] )
91         { log_info( "\n%d) Failure for add_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
92     }
93     return 0;
94 }
95 
verify_addsat_int(const cl_int * inA,const cl_int * inB,const cl_int * outptr,int n,const char * sizeName,int vecSize)96 static int verify_addsat_int( const cl_int *inA, const cl_int *inB, const cl_int *outptr, int n, const char *sizeName , int vecSize)
97 {
98     int i;
99     for( i = 0; i < n; i++ )
100     {
101         cl_int r = (cl_int) ((cl_uint) inA[i] + (cl_uint)inB[i]);
102         if( inB[i] > 0 )
103         {
104             if( r < inA[i] )
105                 r = CL_INT_MAX;
106         }
107         else
108         {
109             if( r > inA[i] )
110                 r = CL_INT_MIN;
111         }
112 
113 
114         if( r != outptr[i] )
115         { log_info( "\n%d) Failure for add_sat( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
116     }
117     return 0;
118 }
119 
verify_addsat_uint(const cl_uint * inA,const cl_uint * inB,const cl_uint * outptr,int n,const char * sizeName,int vecSize)120 static int verify_addsat_uint( const cl_uint *inA, const cl_uint *inB, const cl_uint *outptr, int n, const char *sizeName , int vecSize)
121 {
122     int i;
123     for( i = 0; i < n; i++ )
124     {
125         cl_uint r = inA[i] + inB[i];
126         if( r < inA[i] )
127             r = CL_UINT_MAX;
128 
129         if( r != outptr[i] )
130         { log_info( "\n%d) Failure for add_sat( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
131     }
132     return 0;
133 }
134 
verify_addsat_long(const cl_long * inA,const cl_long * inB,const cl_long * outptr,int n,const char * sizeName,int vecSize)135 static int verify_addsat_long( const cl_long *inA, const cl_long *inB, const cl_long *outptr, int n, const char *sizeName , int vecSize)
136 {
137     int i;
138     for( i = 0; i < n; i++ )
139     {
140         cl_long r = (cl_long)((cl_ulong)inA[i] + (cl_ulong)inB[i]);
141         if( inB[i] > 0 )
142         {
143             if( r < inA[i] )
144                 r = CL_LONG_MAX;
145         }
146         else
147         {
148             if( r > inA[i] )
149                 r = CL_LONG_MIN;
150         }
151         if( r != outptr[i] )
152         { log_info( "%d) Failure for add_sat( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
153     }
154     return 0;
155 }
156 
verify_addsat_ulong(const cl_ulong * inA,const cl_ulong * inB,const cl_ulong * outptr,int n,const char * sizeName,int vecSize)157 static int verify_addsat_ulong( const cl_ulong *inA, const cl_ulong *inB, const cl_ulong *outptr, int n, const char *sizeName , int vecSize)
158 {
159     int i;
160     for( i = 0; i < n; i++ )
161     {
162         cl_ulong r = inA[i] + inB[i];
163         if( r < inA[i] )
164             r = CL_ULONG_MAX;
165         if( r != outptr[i] )
166         { log_info( "%d) Failure for add_sat( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
167     }
168     return 0;
169 }
170 
171 typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName, int );
172 static const verifyFunc verify[] = {   (verifyFunc) verify_addsat_char, (verifyFunc) verify_addsat_uchar,
173     (verifyFunc) verify_addsat_short, (verifyFunc) verify_addsat_ushort,
174     (verifyFunc) verify_addsat_int, (verifyFunc) verify_addsat_uint,
175     (verifyFunc) verify_addsat_long, (verifyFunc) verify_addsat_ulong };
176 //FIXME:  enable long and ulong when GPU path is working
177 static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
178 
179 //FIXME:  enable "16" when support for > 64 byte vectors go into LLVM
180 static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
181 static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
182 static const size_t  kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
183 
test_integer_add_sat(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)184 int test_integer_add_sat(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
185 {
186     cl_int *input_ptr[2], *output_ptr, *p;
187     int err;
188     int i;
189     cl_uint vectorSize;
190     cl_uint type;
191     MTdata d;
192     int fail_count = 0;
193 
194     size_t length = sizeof(cl_int) * 4 * n_elems;
195 
196     input_ptr[0] = (cl_int*)malloc(length);
197     input_ptr[1] = (cl_int*)malloc(length);
198     output_ptr   = (cl_int*)malloc(length);
199 
200     d = init_genrand( gRandomSeed );
201     p = input_ptr[0];
202     for (i=0; i<4 * n_elems; i++)
203         p[i] = genrand_int32(d);
204     p = input_ptr[1];
205     for (i=0; i<4 * n_elems; i++)
206         p[i] = genrand_int32(d);
207     free_mtdata(d); d = NULL;
208 
209     for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
210     {
211 
212         //embedded devices don't support long/ulong so skip over
213         if (! gHasLong && strstr(test_str_names[type],"long"))
214         {
215             log_info( "WARNING: 64 bit integers are not supported on this device. Skipping %s\n", test_str_names[type] );
216             continue;
217         }
218 
219         verifyFunc f = verify[ type ];
220         // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
221         size_t elementCount = length / kSizes[type];
222         cl_mem streams[3];
223 
224         log_info( "%s", test_str_names[type] );
225         fflush( stdout );
226 
227         // Set up data streams for the type
228         streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
229         if (!streams[0])
230         {
231             log_error("clCreateBuffer failed\n");
232             return -1;
233         }
234         streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
235         if (!streams[1])
236         {
237             log_error("clCreateBuffer failed\n");
238             return -1;
239         }
240         streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
241         if (!streams[2])
242         {
243             log_error("clCreateBuffer failed\n");
244             return -1;
245         }
246 
247         err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
248         if (err != CL_SUCCESS)
249         {
250             log_error("clEnqueueWriteBuffer failed\n");
251             return -1;
252         }
253         err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
254         if (err != CL_SUCCESS)
255         {
256             log_error("clEnqueueWriteBuffer failed\n");
257             return -1;
258         }
259 
260         for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
261         {
262             cl_program program = NULL;
263             cl_kernel kernel = NULL;
264 
265             const char *source[] = {
266                 "__kernel void test_add_sat_", test_str_names[type], vector_size_names[vectorSize],
267                 "(__global ", test_str_names[type], vector_size_names[vectorSize],
268                 " *srcA, __global ", test_str_names[type], vector_size_names[vectorSize],
269                 " *srcB, __global ", test_str_names[type], vector_size_names[vectorSize],
270                 " *dst)\n"
271                 "{\n"
272                 "    int  tid = get_global_id(0);\n"
273                 "\n"
274                 "    ", test_str_names[type], vector_size_names[vectorSize], " tmp = add_sat(srcA[tid], srcB[tid]);\n"
275                 "    dst[tid] = tmp;\n"
276                 "}\n" };
277 
278 
279             const char *sourceV3[] = {
280                 "__kernel void test_add_sat_", test_str_names[type], vector_size_names[vectorSize],
281                 "(__global ", test_str_names[type],
282                 " *srcA, __global ", test_str_names[type],
283                 " *srcB, __global ", test_str_names[type],
284                 " *dst)\n"
285                 "{\n"
286                 "    int  tid = get_global_id(0);\n"
287                 "\n"
288                 "    ", test_str_names[type], vector_size_names[vectorSize], " tmp = add_sat(vload3(tid, srcA), vload3(tid, srcB));\n"
289                 "    vstore3(tmp, tid, dst);\n"
290                 "}\n" };
291 
292             char kernelName[128];
293             snprintf( kernelName, sizeof( kernelName ), "test_add_sat_%s%s", test_str_names[type], vector_size_names[vectorSize] );
294             if(vector_sizes[vectorSize] != 3)
295             {
296                 err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
297             }
298             else
299             {
300                 err = create_single_kernel_helper(context, &program, &kernel, sizeof( sourceV3 ) / sizeof( sourceV3[0] ), sourceV3, kernelName );
301             }
302             if (err)
303                 return -1;
304 
305             err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
306             err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
307             err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
308             if (err != CL_SUCCESS)
309             {
310                 log_error("clSetKernelArgs failed\n");
311                 return -1;
312             }
313 
314             //Wipe the output buffer clean
315             uint32_t pattern = 0xdeadbeef;
316             memset_pattern4( output_ptr, &pattern, length );
317             err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
318             if (err != CL_SUCCESS)
319             {
320                 log_error("clWriteArray failed\n");
321                 return -1;
322             }
323 
324             size_t size = elementCount / (vector_sizes[vectorSize]);
325             err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
326             if (err != CL_SUCCESS)
327             {
328                 log_error("clExecuteKernel failed\n");
329                 return -1;
330             }
331 
332             err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
333             if (err != CL_SUCCESS)
334             {
335                 log_error("clReadArray failed\n");
336                 return -1;
337             }
338 
339             char *inP = (char *)input_ptr[0];
340             char *inP2 = (char *)input_ptr[1];
341             char *outP = (char *)output_ptr;
342 
343             for( size_t e = 0; e < size; e++ )
344             {
345                 if( f( inP, inP2, outP, (vector_sizes[vectorSize]), vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
346                     ++fail_count; break; // return -1;
347                 }
348                 inP += kSizes[type] * vector_sizes[vectorSize];
349                 inP2 += kSizes[type] * vector_sizes[vectorSize];
350                 outP += kSizes[type] * vector_sizes[vectorSize];
351             }
352 
353             clReleaseKernel( kernel );
354             clReleaseProgram( program );
355             log_info( "." );
356             fflush( stdout );
357         }
358 
359         clReleaseMemObject( streams[0] );
360         clReleaseMemObject( streams[1] );
361         clReleaseMemObject( streams[2] );
362         log_info( "done\n" );
363     }
364     if(fail_count) {
365         log_info("Failed on %d types\n", fail_count);
366         return -1;
367     }
368 
369     free(input_ptr[0]);
370     free(input_ptr[1]);
371     free(output_ptr);
372 
373     return err;
374 }
375 
376 
377