1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <limits.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23
24 #include "procs.h"
25
26 #define UCHAR_MIN 0
27 #define USHRT_MIN 0
28 #define UINT_MIN 0
29
30 #ifndef MAX
31 #define MAX( _a, _b ) ( (_a) > (_b) ? (_a) : (_b) )
32 #endif
33 #ifndef MIN
34 #define MIN( _a, _b ) ( (_a) < (_b) ? (_a) : (_b) )
35 #endif
36
37
verify_subsat_char(const cl_char * inA,const cl_char * inB,const cl_char * outptr,int n,const char * sizeName,int vecSize)38 static int verify_subsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize )
39 {
40 int i;
41 for( i = 0; i < n; i++ )
42 {
43 cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
44 r = MAX( r, CL_CHAR_MIN );
45 r = MIN( r, CL_CHAR_MAX );
46
47 if( r != outptr[i] )
48 { log_info( "\n%d) Failure for sub_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
49 }
50 return 0;
51 }
52
verify_subsat_uchar(const cl_uchar * inA,const cl_uchar * inB,const cl_uchar * outptr,int n,const char * sizeName,int vecSize)53 static int verify_subsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const cl_uchar *outptr, int n, const char *sizeName, int vecSize )
54 {
55 int i;
56 for( i = 0; i < n; i++ )
57 {
58 cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
59 r = MAX( r, 0 );
60 r = MIN( r, CL_UCHAR_MAX );
61 if( r != outptr[i] )
62 { log_info( "\n%d) Failure for sub_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
63 }
64 return 0;
65 }
66
verify_subsat_short(const cl_short * inA,const cl_short * inB,const cl_short * outptr,int n,const char * sizeName,int vecSize)67 static int verify_subsat_short( const cl_short *inA, const cl_short *inB, const cl_short *outptr, int n, const char *sizeName, int vecSize )
68 {
69 int i;
70 for( i = 0; i < n; i++ )
71 {
72 cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
73 r = MAX( r, CL_SHRT_MIN );
74 r = MIN( r, CL_SHRT_MAX );
75
76 if( r != outptr[i] )
77 { log_info( "\n%d) Failure for sub_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
78 }
79 return 0;
80 }
81
verify_subsat_ushort(const cl_ushort * inA,const cl_ushort * inB,const cl_ushort * outptr,int n,const char * sizeName,int vecSize)82 static int verify_subsat_ushort( const cl_ushort *inA, const cl_ushort *inB, const cl_ushort *outptr, int n, const char *sizeName , int vecSize)
83 {
84 int i;
85 for( i = 0; i < n; i++ )
86 {
87 cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
88 r = MAX( r, 0 );
89 r = MIN( r, CL_USHRT_MAX );
90
91 if( r != outptr[i] )
92 { log_info( "\n%d) Failure for sub_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
93 }
94 return 0;
95 }
96
verify_subsat_int(const cl_int * inA,const cl_int * inB,const cl_int * outptr,int n,const char * sizeName,int vecSize)97 static int verify_subsat_int( const cl_int *inA, const cl_int *inB, const cl_int *outptr, int n, const char *sizeName , int vecSize)
98 {
99 int i;
100 for( i = 0; i < n; i++ )
101 {
102 cl_int r = (cl_int) ((cl_uint)inA[i] - (cl_uint)inB[i]);
103 if( inB[i] < 0 )
104 {
105 if( r < inA[i] )
106 r = CL_INT_MAX;
107 }
108 else
109 {
110 if( r > inA[i] )
111 r = CL_INT_MIN;
112 }
113
114
115 if( r != outptr[i] )
116 { log_info( "\n%d) Failure for sub_sat( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
117 }
118 return 0;
119 }
120
verify_subsat_uint(const cl_uint * inA,const cl_uint * inB,const cl_uint * outptr,int n,const char * sizeName,int vecSize)121 static int verify_subsat_uint( const cl_uint *inA, const cl_uint *inB, const cl_uint *outptr, int n, const char *sizeName , int vecSize)
122 {
123 int i;
124 for( i = 0; i < n; i++ )
125 {
126 cl_uint r = inA[i] - inB[i];
127 if( inA[i] < inB[i] )
128 r = 0;
129
130 if( r != outptr[i] )
131 { log_info( "\n%d) Failure for sub_sat( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
132 }
133 return 0;
134 }
135
verify_subsat_long(const cl_long * inA,const cl_long * inB,const cl_long * outptr,int n,const char * sizeName,int vecSize)136 static int verify_subsat_long( const cl_long *inA, const cl_long *inB, const cl_long *outptr, int n, const char *sizeName , int vecSize)
137 {
138 int i;
139 for( i = 0; i < n; i++ )
140 {
141 cl_long r = (cl_long)((cl_ulong)inA[i] - (cl_ulong)inB[i]);
142 if( inB[i] < 0 )
143 {
144 if( r < inA[i] )
145 r = CL_LONG_MAX;
146 }
147 else
148 {
149 if( r > inA[i] )
150 r = CL_LONG_MIN;
151 }
152 if( r != outptr[i] )
153 { log_info( "%d) Failure for sub_sat( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
154 }
155 return 0;
156 }
157
verify_subsat_ulong(const cl_ulong * inA,const cl_ulong * inB,const cl_ulong * outptr,int n,const char * sizeName,int vecSize)158 static int verify_subsat_ulong( const cl_ulong *inA, const cl_ulong *inB, const cl_ulong *outptr, int n, const char *sizeName , int vecSize)
159 {
160 int i;
161 for( i = 0; i < n; i++ )
162 {
163 cl_ulong r = inA[i] - inB[i];
164 if( inA[i] < inB[i] )
165 r = 0;
166 if( r != outptr[i] )
167 { log_info( "%d) Failure for sub_sat( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
168 }
169 return 0;
170 }
171
172 typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName, int );
173 static const verifyFunc verify[] = { (verifyFunc) verify_subsat_char, (verifyFunc) verify_subsat_uchar,
174 (verifyFunc) verify_subsat_short, (verifyFunc) verify_subsat_ushort,
175 (verifyFunc) verify_subsat_int, (verifyFunc) verify_subsat_uint,
176 (verifyFunc) verify_subsat_long, (verifyFunc) verify_subsat_ulong };
177
178 static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong" };
179 static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
180 static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16" };
181
182 static const size_t kSizes[8] = { 1, 1, 2, 2, 4, 4, 8, 8 };
183
test_integer_sub_sat(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)184 int test_integer_sub_sat(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
185 {
186 int *input_ptr[2], *output_ptr, *p;
187 int err;
188 cl_uint i;
189 cl_uint vectorSize;
190 cl_uint type;
191 MTdata d;
192 int fail_count = 0;
193
194 size_t length = sizeof(int) * 4 * n_elems;
195
196 input_ptr[0] = (int*)malloc(length);
197 input_ptr[1] = (int*)malloc(length);
198 output_ptr = (int*)malloc(length);
199
200 d = init_genrand( gRandomSeed );
201 p = input_ptr[0];
202 for (i=0; i<4 * (cl_uint) n_elems; i++)
203 p[i] = genrand_int32(d);
204 p = input_ptr[1];
205 for (i=0; i<4 * (cl_uint) n_elems; i++)
206 p[i] = genrand_int32(d);
207 free_mtdata(d); d = NULL;
208
209 for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
210 {
211
212 //embedded devices don't support long/ulong so skip over
213 if (! gHasLong && strstr(test_str_names[type],"long"))
214 {
215 log_info( "WARNING: device does not support 64-bit integers. Skipping %s\n", test_str_names[type] );
216 continue;
217 }
218
219 verifyFunc f = verify[ type ];
220 // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
221 size_t elementCount = length / kSizes[type];
222 cl_mem streams[3];
223
224 log_info( "%s", test_str_names[type] );
225 fflush( stdout );
226
227 // Set up data streams for the type
228 streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
229 if (!streams[0])
230 {
231 log_error("clCreateBuffer failed\n");
232 return -1;
233 }
234 streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
235 if (!streams[1])
236 {
237 log_error("clCreateBuffer failed\n");
238 return -1;
239 }
240 streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
241 if (!streams[2])
242 {
243 log_error("clCreateBuffer failed\n");
244 return -1;
245 }
246
247 err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
248 if (err != CL_SUCCESS)
249 {
250 log_error("clEnqueueWriteBuffer failed\n");
251 return -1;
252 }
253 err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
254 if (err != CL_SUCCESS)
255 {
256 log_error("clEnqueueWriteBuffer failed\n");
257 return -1;
258 }
259
260 for( vectorSize = 0; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ )
261 {
262 cl_program program = NULL;
263 cl_kernel kernel = NULL;
264
265 const char *source[] = {
266 "__kernel void test_sub_sat_", test_str_names[type], vector_size_names[vectorSize],
267 "(__global ", test_str_names[type], vector_size_names[vectorSize],
268 " *srcA, __global ", test_str_names[type], vector_size_names[vectorSize],
269 " *srcB, __global ", test_str_names[type], vector_size_names[vectorSize],
270 " *dst)\n"
271 "{\n"
272 " int tid = get_global_id(0);\n"
273 "\n"
274 " ", test_str_names[type], vector_size_names[vectorSize], " tmp = sub_sat(srcA[tid], srcB[tid]);\n"
275 " dst[tid] = tmp;\n"
276 "}\n"
277 };
278
279 const char *sourceV3[] = {
280 "__kernel void test_sub_sat_", test_str_names[type], vector_size_names[vectorSize],
281 "(__global ", test_str_names[type],
282 " *srcA, __global ", test_str_names[type],
283 " *srcB, __global ", test_str_names[type],
284 " *dst)\n"
285 "{\n"
286 " int tid = get_global_id(0);\n"
287 "\n"
288 " ", test_str_names[type], vector_size_names[vectorSize], " tmp = sub_sat(vload3(tid, srcA), vload3(tid, srcB));\n"
289 " vstore3(tmp, tid, dst);\n"
290 "}\n"
291 };
292
293 char kernelName[128];
294 snprintf( kernelName, sizeof( kernelName ), "test_sub_sat_%s%s", test_str_names[type], vector_size_names[vectorSize] );
295 if(vector_sizes[vectorSize] != 3)
296 {
297 err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
298 } else {
299 err = create_single_kernel_helper(context, &program, &kernel, sizeof( sourceV3 ) / sizeof( sourceV3[0] ), sourceV3, kernelName );
300 }
301 if (err)
302 return -1;
303
304 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
305 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
306 err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
307 if (err != CL_SUCCESS)
308 {
309 log_error("clSetKernelArgs failed\n");
310 return -1;
311 }
312
313 //Wipe the output buffer clean
314 uint32_t pattern = 0xdeadbeef;
315 memset_pattern4( output_ptr, &pattern, length );
316 err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
317 if (err != CL_SUCCESS)
318 {
319 log_error("clEnqueueWriteBuffer failed\n");
320 return -1;
321 }
322
323 size_t size = elementCount / vector_sizes[vectorSize];
324 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
325 if (err != CL_SUCCESS)
326 {
327 log_error("clEnqueueNDRangeKernel failed\n");
328 return -1;
329 }
330
331 err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
332 if (err != CL_SUCCESS)
333 {
334 log_error("clEnqueueReadBuffer failed\n");
335 return -1;
336 }
337
338 char *inP = (char *)input_ptr[0];
339 char *inP2 = (char *)input_ptr[1];
340 char *outP = (char *)output_ptr;
341
342 for( size_t e = 0; e < size; e++ )
343 {
344 if( f( inP, inP2, outP, vector_sizes[vectorSize], vector_size_names[vectorSize], vector_sizes[vectorSize] ) ) {
345 ++fail_count; break; // return -1;
346 }
347 inP += kSizes[type] * vector_sizes[vectorSize];
348 inP2 += kSizes[type] * vector_sizes[vectorSize];
349 outP += kSizes[type] * vector_sizes[vectorSize];
350 }
351
352 clReleaseKernel( kernel );
353 clReleaseProgram( program );
354 log_info( "." );
355 fflush( stdout );
356 }
357
358 clReleaseMemObject( streams[0] );
359 clReleaseMemObject( streams[1] );
360 clReleaseMemObject( streams[2] );
361 log_info( "done\n" );
362 }
363 if(fail_count) {
364 log_info("Failed on %d types\n", fail_count);
365 return -1;
366 }
367
368 free(input_ptr[0]);
369 free(input_ptr[1]);
370 free(output_ptr);
371
372 return err;
373 }
374
375
376