• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017, 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "test_common.h"
17 #include <float.h>
18 
19 #include <algorithm>
20 
21 #if defined( __APPLE__ )
22     #include <signal.h>
23     #include <sys/signal.h>
24     #include <setjmp.h>
25 #endif
26 
27 extern bool gTestImage2DFromBuffer;
28 
29 // Utility function to clamp down image sizes for certain tests to avoid
30 // using too much memory.
reduceImageSizeRange(size_t maxDimSize)31 static size_t reduceImageSizeRange(size_t maxDimSize) {
32   size_t DimSize = maxDimSize/32;
33   if (DimSize < (size_t) 16)
34     return 16;
35   else if (DimSize > (size_t) 256)
36     return 256;
37   else
38     return DimSize;
39 }
40 
41 const char *read2DKernelSourcePattern =
42     "%s\n"
43     "__kernel void sample_kernel( read_only %s input,%s __global float "
44     "*xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
45     "{\n"
46     "%s"
47     "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
48     "%s"
49     "%s"
50     "   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
51     "}";
52 
53 const char *read_write2DKernelSourcePattern =
54     "%s\n"
55     "__kernel void sample_kernel( read_write %s input,%s __global float "
56     "*xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
57     "{\n"
58     "%s"
59     "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
60     "%s"
61     "%s"
62     "   results[offset] = read_image%s( input, coords %s);\n"
63     "}";
64 
65 const char *intCoordKernelSource =
66 "   int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n";
67 
68 const char *floatKernelSource =
69 "   float2 coords = (float2)( (float)( xOffsets[offset] ), (float)( yOffsets[offset] ) );\n";
70 
71 static const char *samplerKernelArg = " sampler_t imageSampler,";
72 
73 static const char *lodOffsetSource =
74 "   unsigned int lod_int = (unsigned int) lod;\n"
75 "   int width_lod = (get_image_width(input) >> lod_int) ?(get_image_width(input) >> lod_int):1 ;\n"
76 "   int offset = tidY*width_lod + tidX;\n";
77 
78 static const char *offsetSource =
79 "   int offset = tidY*get_image_width(input) + tidX;\n";
80 
determine_validation_error(void * imagePtr,image_descriptor * imageInfo,image_sampler_data * imageSampler,T * resultPtr,T * expected,float error,float x,float y,float xAddressOffset,float yAddressOffset,size_t j,int & numTries,int & numClamped,bool printAsFloat,int lod=0)81 template <class T> int determine_validation_error( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
82                                                 T *resultPtr, T * expected, float error,
83                                 float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod = 0 )
84 {
85     int actualX, actualY;
86     int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL, lod );
87     bool clampingErr = false, clamped = false, otherClampingBug = false;
88     int clampedX, clampedY, ignoreMe;
89 
90     clamped = get_integer_coords_offset( x, y, 0.f, xAddressOffset, yAddressOffset, 0.0f, imageInfo->width, imageInfo->height, 0, imageSampler, imageInfo, clampedX, clampedY, ignoreMe );
91 
92     if( found )
93     {
94         // Is it a clamping bug?
95         if( clamped && clampedX == actualX && clampedY == actualY )
96         {
97             if( (--numClamped) == 0 )
98             {
99                 log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
100                 if (imageInfo->format->image_channel_order == CL_DEPTH)
101                 {
102                     if( printAsFloat )
103                     {
104                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
105                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
106                     }
107                     else
108                     {
109                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
110                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
111                     }
112                 }
113                 else
114                 {
115                     if( printAsFloat )
116                     {
117                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
118                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
119                                 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
120                     }
121                     else
122                     {
123                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
124                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
125                                 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
126                     }
127                 }
128                 return 1;
129             }
130             clampingErr = true;
131             otherClampingBug = true;
132         }
133     }
134     if( clamped && !otherClampingBug )
135     {
136         // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
137         if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
138         {
139             if( (--numClamped) == 0 )
140             {
141                 log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
142                 if (imageInfo->format->image_channel_order == CL_DEPTH)
143                 {
144                     if( printAsFloat )
145                     {
146                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
147                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
148                     }
149                     else
150                     {
151                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
152                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
153                     }
154                 }
155                 else
156                 {
157                     if( printAsFloat )
158                     {
159                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
160                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
161                                 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
162                     }
163                     else
164                     {
165                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
166                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
167                                 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
168                     }
169                 }
170                 return 1;
171             }
172             clampingErr = true;
173         }
174     }
175     if( !clampingErr )
176     {
177         if (imageInfo->format->image_channel_order == CL_DEPTH)
178         {
179             if( printAsFloat )
180             {
181               log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
182                         (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
183             }
184             else
185             {
186               log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
187                         (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
188             }
189         }
190         else
191         {
192             if( printAsFloat )
193             {
194                 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g), error of %g\n",
195                           (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
196                           (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
197             }
198             else
199             {
200                 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
201                           (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
202                                     (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
203             }
204         }
205         log_error( "img size %ld,%ld (pitch %ld)", imageInfo->width, imageInfo->height, imageInfo->rowPitch );
206         if( clamped )
207         {
208             log_error( " which would clamp to %d,%d\n", clampedX, clampedY );
209         }
210         if( printAsFloat && gExtraValidateInfo)
211         {
212             log_error( "Nearby values:\n" );
213             log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
214             for( int yOff = -2; yOff <= 1; yOff++ )
215             {
216                 float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
217                 read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
218                 read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
219                 read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
220                 read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
221                 if (imageInfo->format->image_channel_order == CL_DEPTH)
222                 {
223                     log_error( "%d\t(%g)",clampedY + yOff, top[0] );
224                     log_error( " (%g)", real[0] );
225                     log_error( " (%g)",bot[0] );
226                     log_error( " (%g)\n",bot2[0] );
227                 }
228                 else
229                 {
230                     log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
231                     log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
232                     log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
233                     log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
234                 }
235             }
236 
237             if( clampedY < 1 )
238             {
239                 log_error( "Nearby values:\n" );
240                 log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
241                 for( int yOff = (int)imageInfo->height - 2; yOff <= (int)imageInfo->height + 1; yOff++ )
242                 {
243                     float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
244                     read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
245                     read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
246                     read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
247                     read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
248                     if (imageInfo->format->image_channel_order == CL_DEPTH)
249                     {
250                         log_error( "%d\t(%g)",clampedY + yOff, top[0] );
251                         log_error( " (%g)", real[0] );
252                         log_error( " (%g)",bot[0] );
253                         log_error( " (%g)\n",bot2[0] );
254                     }
255                     else
256                     {
257                         log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
258                         log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
259                         log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
260                         log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
261                     }
262                 }
263             }
264         }
265 
266         if( imageSampler->filter_mode != CL_FILTER_LINEAR )
267         {
268             if( found )
269                 log_error( "\tValue really found in image at %d,%d (%s)\n", actualX, actualY, ( found > 1 ) ? "NOT unique!!" : "unique" );
270             else
271                 log_error( "\tValue not actually found in image\n" );
272         }
273         log_error( "\n" );
274 
275         numClamped = -1; // We force the clamped counter to never work
276         if( ( --numTries ) == 0 )
277         {
278             return 1;
279         }
280     }
281     return 0;
282 }
283 
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float xfract,float yfract,int normalized_coords,MTdata d)284 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d )
285 {
286     size_t i = 0;
287     if( gDisableOffsets )
288     {
289         for( size_t y = 0; y < imageInfo->height; y++ )
290         {
291             for( size_t x = 0; x < imageInfo->width; x++, i++ )
292             {
293                 xOffsets[ i ] = (float) (xfract + (double) x);
294                 yOffsets[ i ] = (float) (yfract + (double) y);
295             }
296         }
297     }
298     else
299     {
300         for( size_t y = 0; y < imageInfo->height; y++ )
301         {
302             for( size_t x = 0; x < imageInfo->width; x++, i++ )
303             {
304                 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
305                 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
306             }
307         }
308     }
309 
310     if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
311     {
312         i = 0;
313         for( size_t y = 0; y < imageInfo->height; y++ )
314         {
315             for( size_t x = 0; x < imageInfo->width; x++, i++ )
316             {
317                 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
318                 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)imageInfo->height - 1.0);
319             }
320         }
321     }
322 
323     if( normalized_coords )
324     {
325         i = 0;
326         for( size_t y = 0; y < imageInfo->height; y++ )
327         {
328             for( size_t x = 0; x < imageInfo->width; x++, i++ )
329             {
330                 xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
331                 yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
332             }
333         }
334     }
335 }
336 
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float xfract,float yfract,int normalized_coords,MTdata d,size_t lod)337 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d, size_t lod)
338 {
339     size_t i = 0;
340     size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
341 
342     if( gTestMipmaps )
343     {
344         width_lod = (imageInfo->width >> lod)?(imageInfo->width >> lod):1;
345         height_lod = (imageInfo->height >> lod)?(imageInfo->height >> lod):1;
346     }
347     if( gDisableOffsets )
348     {
349         for( size_t y = 0; y < height_lod; y++ )
350         {
351             for( size_t x = 0; x < width_lod; x++, i++ )
352             {
353                 xOffsets[ i ] = (float) (xfract + (float) x);
354                 yOffsets[ i ] = (float) (yfract + (float) y);
355             }
356         }
357     }
358     else
359     {
360         for( size_t y = 0; y < height_lod; y++ )
361         {
362             for( size_t x = 0; x < width_lod; x++, i++ )
363             {
364                 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
365                 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
366             }
367         }
368     }
369 
370     if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
371     {
372         i = 0;
373         for( size_t y = 0; y < height_lod; y++ )
374         {
375             for( size_t x = 0; x < width_lod; x++, i++ )
376             {
377                 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) width_lod - 1.0);
378                 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)height_lod - 1.0);
379             }
380         }
381     }
382 
383     if( normalized_coords )
384     {
385         i = 0;
386         for( size_t y = 0; y < height_lod; y++ )
387         {
388             for( size_t x = 0; x < width_lod; x++, i++ )
389             {
390                 xOffsets[ i ] = (float) ((float) xOffsets[ i ] / (float) width_lod);
391                 yOffsets[ i ] = (float) ((float) yOffsets[ i ] / (float) height_lod);
392             }
393         }
394     }
395 }
396 
validate_image_2D_depth_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)397 int validate_image_2D_depth_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
398                                                         ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
399 {
400     // Validate results element by element
401     size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
402     size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
403     /*
404      * FLOAT output type
405      */
406     if( outputType == kFloat )
407     {
408         // Validate float results
409         float *resultPtr = (float *)(char *)resultValues;
410         float expected[4], error=0.0f;
411         float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
412         for( size_t y = 0, j = 0; y < height_lod; y++ )
413         {
414             for( size_t x = 0; x < width_lod; x++, j++ )
415             {
416                 // Step 1: go through and see if the results verify for the pixel
417                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
418                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
419                 int checkOnlyOnePixel = 0;
420                 int found_pixel = 0;
421                 float offset = NORM_OFFSET;
422                 if (!imageSampler->normalized_coords
423                     || imageSampler->filter_mode != CL_FILTER_NEAREST
424                     || NORM_OFFSET == 0
425 #if defined( __APPLE__ )
426                     // Apple requires its CPU implementation to do correctly
427                     // rounded address arithmetic in all modes
428                     || !(gDeviceType & CL_DEVICE_TYPE_GPU)
429 #endif
430                 )
431                     offset = 0.0f;          // Loop only once
432 
433                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
434                     for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
435 
436                         // Try sampling the pixel, without flushing denormals.
437                         int containsDenormals = 0;
438                         FloatPixel maxPixel;
439                         maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
440                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
441                                                                     imageSampler, expected, 0, &containsDenormals );
442 
443                         float err1 = ABS_ERROR(resultPtr[0], expected[0]);
444                         // Clamp to the minimum absolute error for the format
445                         if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
446                         float maxErr1 =
447                             std::max(maxErr * maxPixel.p[0], FLT_MIN);
448 
449                         // Check if the result matches.
450                         if( ! (err1 <= maxErr1) )
451                         {
452                             //try flushing the denormals, if there is a failure.
453                             if( containsDenormals )
454                             {
455                                 // If implementation decide to flush subnormals to zero,
456                                 // max error needs to be adjusted
457                                 maxErr1 += 4 * FLT_MIN;
458 
459                                 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
460                                                                              xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
461                                                                              imageSampler, expected, 0, NULL );
462 
463                                 err1 = ABS_ERROR(resultPtr[0], expected[0]);
464                             }
465                         }
466 
467                         // If the final result DOES match, then we've found a valid result and we're done with this pixel.
468                         found_pixel = (err1 <= maxErr1);
469                     }//norm_offset_x
470                 }//norm_offset_y
471 
472 
473                 // Step 2: If we did not find a match, then print out debugging info.
474                 if (!found_pixel) {
475                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
476                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
477                     checkOnlyOnePixel = 0;
478                     int shouldReturn = 0;
479                     for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
480                         for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
481 
482                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
483                             // E.g., test one pixel.
484                             if (!imageSampler->normalized_coords
485                                 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
486                                 || NORM_OFFSET == 0)
487                             {
488                                 norm_offset_x = 0.0f;
489                                 norm_offset_y = 0.0f;
490                                 checkOnlyOnePixel = 1;
491                             }
492 
493                             int containsDenormals = 0;
494                             FloatPixel maxPixel;
495                             maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
496                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
497                                                                                     imageSampler, expected, 0, &containsDenormals );
498 
499                             float err1 = ABS_ERROR(resultPtr[0], expected[0]);
500                             float maxErr1 =
501                                 std::max(maxErr * maxPixel.p[0], FLT_MIN);
502 
503 
504                             if( ! (err1 <= maxErr1) )
505                             {
506                                 //try flushing the denormals, if there is a failure.
507                                 if( containsDenormals )
508                                 {
509                                     maxErr1 += 4 * FLT_MIN;
510 
511                                     maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
512                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
513                                                                                  imageSampler, expected, 0, NULL );
514 
515                                     err1 = ABS_ERROR(resultPtr[0], expected[0]);
516                                 }
517                             }
518                             if( ! (err1 <= maxErr1) )
519                             {
520                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
521 
522                                 float tempOut[4];
523                                 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
524                                                                                   expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
525 
526                                 log_error( "Step by step:\n" );
527                                 FloatPixel temp;
528                                 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
529                                                                                xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
530                                                                                imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
531                                 log_error( "\tulps: %2.2f  (max allowed: %2.2f)\n\n",
532                                                     Ulp_Error( resultPtr[0], expected[0] ),
533                                                     Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
534 
535                             } else {
536                                 log_error("Test error: we should have detected this passing above.\n");
537                             }
538 
539                         }//norm_offset_x
540                     }//norm_offset_y
541                     if( shouldReturn )
542                         return 1;
543                 } // if (!found_pixel)
544 
545                 resultPtr += 1;
546             }
547         }
548     }
549     else
550     {
551         log_error("Test error: Not supported format.\n");
552         return 1;
553     }
554     return 0;
555 }
556 
validate_image_2D_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)557 int validate_image_2D_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
558                                                         ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
559 {
560     // Validate results element by element
561     size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
562     size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
563     /*
564      * FLOAT output type
565      */
566     if( outputType == kFloat )
567     {
568         // Validate float results
569         float *resultPtr = (float *)(char *)resultValues;
570         float expected[4], error=0.0f;
571         float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
572         for( size_t y = 0, j = 0; y < height_lod; y++ )
573         {
574             for( size_t x = 0; x < width_lod; x++, j++ )
575             {
576                 // Step 1: go through and see if the results verify for the pixel
577                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
578                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
579                 int checkOnlyOnePixel = 0;
580                 int found_pixel = 0;
581                 float offset = NORM_OFFSET;
582                 if (!imageSampler->normalized_coords
583                     || imageSampler->filter_mode != CL_FILTER_NEAREST
584                     || NORM_OFFSET == 0
585 #if defined( __APPLE__ )
586                     // Apple requires its CPU implementation to do correctly
587                     // rounded address arithmetic in all modes
588                     || !(gDeviceType & CL_DEVICE_TYPE_GPU)
589 #endif
590                 )
591                     offset = 0.0f;          // Loop only once
592 
593                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
594                     for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
595 
596 
597                         // Try sampling the pixel, without flushing denormals.
598                         int containsDenormals = 0;
599                         FloatPixel maxPixel;
600                         if ( gTestMipmaps )
601                             maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
602                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
603                                                                         imageSampler, expected, 0, &containsDenormals, lod );
604                         else
605                             maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
606                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
607                                                                         imageSampler, expected, 0, &containsDenormals );
608 
609                         float err1 = ABS_ERROR(resultPtr[0], expected[0]);
610                         float err2 = ABS_ERROR(resultPtr[1], expected[1]);
611                         float err3 = ABS_ERROR(resultPtr[2], expected[2]);
612                         float err4 = ABS_ERROR(resultPtr[3], expected[3]);
613                         // Clamp to the minimum absolute error for the format
614                         if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
615                         if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
616                         if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
617                         if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
618                         float maxErr1 =
619                             std::max(maxErr * maxPixel.p[0], FLT_MIN);
620                         float maxErr2 =
621                             std::max(maxErr * maxPixel.p[1], FLT_MIN);
622                         float maxErr3 =
623                             std::max(maxErr * maxPixel.p[2], FLT_MIN);
624                         float maxErr4 =
625                             std::max(maxErr * maxPixel.p[3], FLT_MIN);
626 
627                         // Check if the result matches.
628                         if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
629                            ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
630                         {
631                             //try flushing the denormals, if there is a failure.
632                             if( containsDenormals )
633                             {
634                                // If implementation decide to flush subnormals to zero,
635                                // max error needs to be adjusted
636                                 maxErr1 += 4 * FLT_MIN;
637                                 maxErr2 += 4 * FLT_MIN;
638                                 maxErr3 += 4 * FLT_MIN;
639                                 maxErr4 += 4 * FLT_MIN;
640 
641                                 if(gTestMipmaps)
642                                     maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
643                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
644                                                                                  imageSampler, expected, 0, NULL,lod );
645                                 else
646                                     maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
647                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
648                                                                                  imageSampler, expected, 0, NULL );
649 
650                                 err1 = ABS_ERROR(resultPtr[0], expected[0]);
651                                 err2 = ABS_ERROR(resultPtr[1], expected[1]);
652                                 err3 = ABS_ERROR(resultPtr[2], expected[2]);
653                                 err4 = ABS_ERROR(resultPtr[3], expected[3]);
654                             }
655                         }
656 
657                         // If the final result DOES match, then we've found a valid result and we're done with this pixel.
658                         found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);
659                     }//norm_offset_x
660                 }//norm_offset_y
661 
662 
663                 // Step 2: If we did not find a match, then print out debugging info.
664                 if (!found_pixel) {
665                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
666                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
667                     checkOnlyOnePixel = 0;
668                     int shouldReturn = 0;
669                     for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
670                         for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
671 
672                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
673                             // E.g., test one pixel.
674                             if (!imageSampler->normalized_coords
675                                 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
676                                 || NORM_OFFSET == 0)
677                             {
678                                 norm_offset_x = 0.0f;
679                                 norm_offset_y = 0.0f;
680                                 checkOnlyOnePixel = 1;
681                             }
682 
683                             int containsDenormals = 0;
684                             FloatPixel maxPixel;
685                             if(gTestMipmaps)
686                                 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
687                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
688                                                                                         imageSampler, expected, 0, &containsDenormals, lod );
689                             else
690                                 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
691                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
692                                                                                         imageSampler, expected, 0, &containsDenormals );
693 
694                             float err1 = ABS_ERROR(resultPtr[0], expected[0]);
695                             float err2 = ABS_ERROR(resultPtr[1], expected[1]);
696                             float err3 = ABS_ERROR(resultPtr[2], expected[2]);
697                             float err4 = ABS_ERROR(resultPtr[3], expected[3]);
698                             float maxErr1 =
699                                 std::max(maxErr * maxPixel.p[0], FLT_MIN);
700                             float maxErr2 =
701                                 std::max(maxErr * maxPixel.p[1], FLT_MIN);
702                             float maxErr3 =
703                                 std::max(maxErr * maxPixel.p[2], FLT_MIN);
704                             float maxErr4 =
705                                 std::max(maxErr * maxPixel.p[3], FLT_MIN);
706 
707 
708                             if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
709                                ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
710                             {
711                                 //try flushing the denormals, if there is a failure.
712                                 if( containsDenormals )
713                                 {
714                                     maxErr1 += 4 * FLT_MIN;
715                                     maxErr2 += 4 * FLT_MIN;
716                                     maxErr3 += 4 * FLT_MIN;
717                                     maxErr4 += 4 * FLT_MIN;
718 
719                                     if(gTestMipmaps)
720                                         maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
721                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
722                                                                                      imageSampler, expected, 0, NULL, lod );
723                                     else
724                                         maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
725                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
726                                                                                      imageSampler, expected, 0, NULL );
727 
728                                     err1 = ABS_ERROR(resultPtr[0], expected[0]);
729                                     err2 = ABS_ERROR(resultPtr[1], expected[1]);
730                                     err3 = ABS_ERROR(resultPtr[2], expected[2]);
731                                     err4 = ABS_ERROR(resultPtr[3], expected[3]);
732                                 }
733                             }
734                             if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
735                                ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
736                             {
737                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
738 
739                                 float tempOut[4];
740                                 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
741                                                                                   expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
742 
743                                 log_error( "Step by step:\n" );
744                                 FloatPixel temp;
745                                 if( gTestMipmaps )
746                                      temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
747                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
748                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
749                                  else
750                                      temp = sample_image_pixel_float_offset( imageValues, imageInfo,
751                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
752                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
753                                 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
754                                                     Ulp_Error( resultPtr[0], expected[0] ),
755                                                     Ulp_Error( resultPtr[1], expected[1] ),
756                                                     Ulp_Error( resultPtr[2], expected[2] ),
757                                                     Ulp_Error( resultPtr[3], expected[3] ),
758                                                     Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
759 
760                             } else {
761                                 log_error("Test error: we should have detected this passing above.\n");
762                             }
763 
764                         }//norm_offset_x
765                     }//norm_offset_y
766                     if( shouldReturn )
767                         return 1;
768                 } // if (!found_pixel)
769 
770                 resultPtr += 4;
771             }
772         }
773     }
774     /*
775      * UINT output type
776      */
777     else if( outputType == kUInt )
778     {
779         // Validate unsigned integer results
780         unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
781         unsigned int expected[4];
782         float error;
783         for( size_t y = 0, j = 0; y < height_lod ; y++ )
784         {
785             for( size_t x = 0; x < width_lod ; x++, j++ )
786             {
787                 // Step 1: go through and see if the results verify for the pixel
788                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
789                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
790                 int checkOnlyOnePixel = 0;
791                 int found_pixel = 0;
792                 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
793                     for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
794 
795                         // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
796                         // E.g., test one pixel.
797                         if (!imageSampler->normalized_coords
798                             || !(gDeviceType & CL_DEVICE_TYPE_GPU)
799                             || NORM_OFFSET == 0)
800                         {
801                             norm_offset_x = 0.0f;
802                             norm_offset_y = 0.0f;
803                             checkOnlyOnePixel = 1;
804                         }
805 
806                         if ( gTestMipmaps )
807                             sample_image_pixel_offset<unsigned int>( (char*)imagePtr, imageInfo,
808                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
809                                                                                              imageSampler, expected, lod );
810                         else
811                             sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
812                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
813                                                                                              imageSampler, expected);
814 
815 
816                         error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
817                                        errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
818 
819                         if (error <= MAX_ERR)
820                             found_pixel = 1;
821                     }//norm_offset_x
822                 }//norm_offset_y
823 
824                 // Step 2: If we did not find a match, then print out debugging info.
825                 if (!found_pixel) {
826                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
827                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
828                     checkOnlyOnePixel = 0;
829                     int shouldReturn = 0;
830                     for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
831                         for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
832 
833                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
834                             // E.g., test one pixel.
835                             if (!imageSampler->normalized_coords
836                                 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
837                                 || NORM_OFFSET == 0)
838                             {
839                                 norm_offset_x = 0.0f;
840                                 norm_offset_y = 0.0f;
841                                 checkOnlyOnePixel = 1;
842                             }
843 
844                             if( gTestMipmaps )
845                                 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
846                                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
847                                                                                                  imageSampler, expected, lod );
848                             else
849                                 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
850                                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
851                                                                                                  imageSampler, expected);
852 
853 
854                             error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
855                                            errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
856 
857                             if( error > MAX_ERR )
858                             {
859                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
860 
861                                 shouldReturn |= determine_validation_error<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
862                                                                                          expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
863                             } else {
864                                 log_error("Test error: we should have detected this passing above.\n");
865                             }
866                         }//norm_offset_x
867                     }//norm_offset_y
868                     if( shouldReturn )
869                         return 1;
870                 } // if (!found_pixel)
871 
872                 resultPtr += 4;
873             }
874         }
875     }
876     /*
877      * INT output type
878      */
879     else
880     {
881         // Validate integer results
882         int *resultPtr = (int *)(char *)resultValues;
883         int expected[4];
884         float error;
885         for( size_t y = 0, j = 0; y < height_lod ; y++ )
886         {
887             for( size_t x = 0; x < width_lod; x++, j++ )
888             {
889                 // Step 1: go through and see if the results verify for the pixel
890                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
891                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
892                 int checkOnlyOnePixel = 0;
893                 int found_pixel = 0;
894                 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
895                     for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
896 
897                         // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
898                         // E.g., test one pixel.
899                         if (!imageSampler->normalized_coords
900                             || !(gDeviceType & CL_DEVICE_TYPE_GPU)
901                             || NORM_OFFSET == 0)
902                         {
903                             norm_offset_x = 0.0f;
904                             norm_offset_y = 0.0f;
905                             checkOnlyOnePixel = 1;
906                         }
907 
908                         if ( gTestMipmaps )
909                             sample_image_pixel_offset<int>( imagePtr, imageInfo,
910                                                             xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
911                                                             imageSampler, expected , lod);
912                         else
913                             sample_image_pixel_offset<int>( imageValues, imageInfo,
914                                                             xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
915                                                             imageSampler, expected );
916 
917 
918                         error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
919                                        errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
920 
921                         if (error <= MAX_ERR)
922                             found_pixel = 1;
923                     }//norm_offset_x
924                 }//norm_offset_y
925 
926                 // Step 2: If we did not find a match, then print out debugging info.
927                 if (!found_pixel) {
928                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
929                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
930                     checkOnlyOnePixel = 0;
931                     int shouldReturn = 0;
932                     for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
933                         for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
934 
935                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
936                             // E.g., test one pixel.
937                             if (!imageSampler->normalized_coords
938                                 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
939                                 || NORM_OFFSET == 0)
940                             {
941                                 norm_offset_x = 0.0f;
942                                 norm_offset_y = 0.0f;
943                                 checkOnlyOnePixel = 1;
944                             }
945 
946                             if ( gTestMipmaps )
947                                 sample_image_pixel_offset<int>( imageValues, imageInfo,
948                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
949                                                                 imageSampler, expected, lod );
950                             else
951                                 sample_image_pixel_offset<int>( imageValues, imageInfo,
952                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
953                                                                 imageSampler, expected );
954 
955 
956                             error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
957                                            errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
958 
959                             if( error > MAX_ERR )
960                             {
961                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
962 
963                                 shouldReturn |= determine_validation_error<int>( imagePtr, imageInfo, imageSampler, resultPtr,
964                                                                                 expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
965                             } else {
966                                 log_error("Test error: we should have detected this passing above.\n");
967                             }
968                         }//norm_offset_x
969                     }//norm_offset_y
970                     if( shouldReturn )
971                         return 1;
972                 } // if (!found_pixel)
973 
974                 resultPtr += 4;
975             }
976         }
977     }
978     return 0;
979 }
980 
validate_image_2D_sRGB_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)981 int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
982                                                         ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
983 {
984     // Validate results element by element
985     size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
986     size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
987     /*
988      * FLOAT output type
989      */
990     if( outputType == kFloat )
991     {
992         // Validate float results
993         float *resultPtr = (float *)(char *)resultValues;
994         float expected[4], error=0.0f;
995         float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
996         for( size_t y = 0, j = 0; y < height_lod; y++ )
997         {
998             for( size_t x = 0; x < width_lod; x++, j++ )
999             {
1000                 // Step 1: go through and see if the results verify for the pixel
1001                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
1002                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1003                 int checkOnlyOnePixel = 0;
1004                 int found_pixel = 0;
1005                 float offset = NORM_OFFSET;
1006                 if (!imageSampler->normalized_coords
1007                     || imageSampler->filter_mode != CL_FILTER_NEAREST
1008                     || NORM_OFFSET == 0
1009 #if defined( __APPLE__ )
1010                     // Apple requires its CPU implementation to do correctly
1011                     // rounded address arithmetic in all modes
1012                     || !(gDeviceType & CL_DEVICE_TYPE_GPU)
1013 #endif
1014                 )
1015                     offset = 0.0f;          // Loop only once
1016 
1017                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
1018                     for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
1019 
1020 
1021                         // Try sampling the pixel, without flushing denormals.
1022                         int containsDenormals = 0;
1023                         FloatPixel maxPixel;
1024                         if ( gTestMipmaps )
1025                             maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1026                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
1027                                                                         imageSampler, expected, 0, &containsDenormals, lod );
1028                         else
1029                             maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1030                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
1031                                                                         imageSampler, expected, 0, &containsDenormals );
1032                         float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1033                                                sRGBmap(expected[0]));
1034                         float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1035                                                sRGBmap(expected[1]));
1036                         float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1037                                                sRGBmap(expected[2]));
1038                         float err4 = ABS_ERROR(resultPtr[3], expected[3]);
1039                         float maxErr = 0.5;
1040 
1041                         // Check if the result matches.
1042                         if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
1043                            ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
1044                         {
1045                             //try flushing the denormals, if there is a failure.
1046                             if( containsDenormals )
1047                             {
1048                                 // If implementation decide to flush subnormals to zero,
1049                                 // max error needs to be adjusted
1050                                 maxErr += 4 * FLT_MIN;
1051 
1052                                 if(gTestMipmaps)
1053                                     maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1054                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1055                                                                                  imageSampler, expected, 0, NULL,lod );
1056                                 else
1057                                     maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1058                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1059                                                                                  imageSampler, expected, 0, NULL );
1060 
1061                                 err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1062                                                  sRGBmap(expected[0]));
1063                                 err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1064                                                  sRGBmap(expected[1]));
1065                                 err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1066                                                  sRGBmap(expected[2]));
1067                                 err4 = ABS_ERROR(resultPtr[3], expected[3]);
1068                             }
1069                         }
1070 
1071                         // If the final result DOES match, then we've found a valid result and we're done with this pixel.
1072                         found_pixel = (err1 <= maxErr) && (err2 <= maxErr)  && (err3 <= maxErr) && (err4 <= maxErr);
1073                     }//norm_offset_x
1074                 }//norm_offset_y
1075 
1076 
1077                 // Step 2: If we did not find a match, then print out debugging info.
1078                 if (!found_pixel) {
1079                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
1080                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1081                     checkOnlyOnePixel = 0;
1082                     int shouldReturn = 0;
1083                     for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
1084                         for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
1085 
1086                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
1087                             // E.g., test one pixel.
1088                             if (!imageSampler->normalized_coords
1089                                 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
1090                                 || NORM_OFFSET == 0)
1091                             {
1092                                 norm_offset_x = 0.0f;
1093                                 norm_offset_y = 0.0f;
1094                                 checkOnlyOnePixel = 1;
1095                             }
1096 
1097                             int containsDenormals = 0;
1098                             FloatPixel maxPixel;
1099                             if(gTestMipmaps)
1100                                 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1101                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1102                                                                                         imageSampler, expected, 0, &containsDenormals, lod );
1103                             else
1104                                 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1105                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1106                                                                                         imageSampler, expected, 0, &containsDenormals );
1107 
1108                             float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1109                                                    sRGBmap(expected[0]));
1110                             float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1111                                                    sRGBmap(expected[1]));
1112                             float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1113                                                    sRGBmap(expected[2]));
1114                             float err4 = ABS_ERROR(resultPtr[3], expected[3]);
1115                             float maxErr = 0.6;
1116 
1117                             if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
1118                                ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
1119                             {
1120                                 //try flushing the denormals, if there is a failure.
1121                                 if( containsDenormals )
1122                                 {
1123                                     // If implementation decide to flush subnormals to zero,
1124                                     // max error needs to be adjusted
1125                                     maxErr += 4 * FLT_MIN;
1126                                     if(gTestMipmaps)
1127                                         maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1128                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1129                                                                                      imageSampler, expected, 0, NULL, lod );
1130                                     else
1131                                         maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1132                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1133                                                                                      imageSampler, expected, 0, NULL );
1134 
1135                                     err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1136                                                      sRGBmap(expected[0]));
1137                                     err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1138                                                      sRGBmap(expected[1]));
1139                                     err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1140                                                      sRGBmap(expected[2]));
1141                                     err4 = ABS_ERROR(resultPtr[3], expected[3]);
1142                                 }
1143                             }
1144                             if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
1145                                ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
1146                             {
1147                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
1148 
1149                                 float tempOut[4];
1150                                 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
1151                                                                                   expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
1152 
1153                                 log_error( "Step by step:\n" );
1154                                 FloatPixel temp;
1155                                 if( gTestMipmaps )
1156                                      temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
1157                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1158                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
1159                                  else
1160                                      temp = sample_image_pixel_float_offset( imageValues, imageInfo,
1161                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1162                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
1163                                 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
1164                                                     Ulp_Error( resultPtr[0], expected[0] ),
1165                                                     Ulp_Error( resultPtr[1], expected[1] ),
1166                                                     Ulp_Error( resultPtr[2], expected[2] ),
1167                                                     Ulp_Error( resultPtr[3], expected[3] ),
1168                                                     Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
1169 
1170                             } else {
1171                                 log_error("Test error: we should have detected this passing above.\n");
1172                             }
1173 
1174                         }//norm_offset_x
1175                     }//norm_offset_y
1176                     if( shouldReturn )
1177                         return 1;
1178                 } // if (!found_pixel)
1179 
1180                 resultPtr += 4;
1181             }
1182         }
1183     }
1184     else {
1185         log_error("Test error: NOT SUPPORTED.\n");
1186     }
1187     return 0;
1188 }
1189 
validate_float_write_results(float * expected,float * actual,image_descriptor * imageInfo)1190 bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo )
1191 {
1192     bool pass = true;
1193     // Compare floats
1194     if( memcmp( expected, actual, sizeof( cl_float ) * get_format_channel_count( imageInfo->format ) ) != 0 )
1195     {
1196         // 8.3.3 Fix up cases where we have NaNs or flushed denorms; "all other values must be preserved"
1197         for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
1198         {
1199             if ( isnan( expected[j] ) && isnan( actual[j] ) )
1200                 continue;
1201             if ( IsFloatSubnormal( expected[j] ) && actual[j] == 0.0f )
1202                 continue;
1203             if (expected[j] != actual[j])
1204             {
1205                 pass = false;
1206                 break;
1207             }
1208         }
1209     }
1210     return pass;
1211 }
1212 
validate_half_write_results(cl_half * expected,cl_half * actual,image_descriptor * imageInfo)1213 bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo )
1214 {
1215     bool pass = true;
1216     // Compare half floats
1217     if (memcmp(expected, actual, sizeof( cl_half ) * get_format_channel_count(imageInfo->format)) != 0) {
1218 
1219         // 8.3.2 Fix up cases where we have NaNs or generated half denormals
1220         for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ ) {
1221             if ( is_half_nan( expected[j] ) && is_half_nan( actual[j] ) )
1222                 continue;
1223             if ( is_half_denorm( expected[j] ) && is_half_zero( actual[j] ) )
1224                 continue;
1225             if (expected[j] != actual[j])
1226             {
1227                 pass = false;
1228                 break;
1229             }
1230         }
1231     }
1232     return pass;
1233 }
1234 
test_read_image_2D(cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,image_sampler_data * imageSampler,bool useFloatCoords,ExplicitType outputType,MTdata d)1235 int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel kernel,
1236                         image_descriptor *imageInfo, image_sampler_data *imageSampler,
1237                        bool useFloatCoords, ExplicitType outputType, MTdata d )
1238 {
1239     int error;
1240     static int initHalf = 0;
1241     cl_mem imageBuffer;
1242     cl_mem_flags    image_read_write_flags = CL_MEM_READ_ONLY;
1243     size_t threads[2];
1244 
1245     clMemWrapper xOffsets, yOffsets, results;
1246     clSamplerWrapper actualSampler;
1247     BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
1248 
1249     // The DataBuffer template class really does use delete[], not free -- IRO
1250     BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1251     BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1252 
1253     if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
1254         if( DetectFloatToHalfRoundingMode(queue) )
1255             return 1;
1256 
1257     // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
1258     BufferOwningPtr<char> imageValues;
1259     generate_random_image_data( imageInfo, imageValues, d );
1260 
1261     if( gDebugTrace )
1262     {
1263         log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
1264         if( gTestMipmaps )
1265         {
1266             log_info( " - with %d mip levels", (int) imageInfo->num_mip_levels );
1267         }
1268     }
1269 
1270     // Construct testing sources
1271     clProtectedImage protImage;
1272     clMemWrapper unprotImage;
1273     cl_mem image;
1274 
1275     if(gtestTypesToRun & kReadTests)
1276     {
1277         image_read_write_flags = CL_MEM_READ_ONLY;
1278     }
1279     else
1280     {
1281         image_read_write_flags = CL_MEM_READ_WRITE;
1282     }
1283 
1284     if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
1285     {
1286         if (gTestImage2DFromBuffer)
1287         {
1288             generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1289             imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
1290                                           imageInfo->rowPitch * imageInfo->height, maxImageUseHostPtrBackingStore, &error);
1291             test_error( error, "Unable to create buffer" );
1292             unprotImage = create_image_2d_buffer( context,
1293                                           image_read_write_flags,
1294                                           imageInfo->format,
1295                                           imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1296                                           imageBuffer, &error );
1297 
1298         }
1299         else
1300         {
1301             // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
1302             // Do not use protected images for max image size test since it rounds the row size to a page size
1303             if (gTestMaxImages) {
1304                 generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1305                 unprotImage = create_image_2d( context,
1306                                         image_read_write_flags | CL_MEM_USE_HOST_PTR,
1307                                         imageInfo->format,
1308                                         imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1309                                         maxImageUseHostPtrBackingStore, &error );
1310             }
1311             else
1312             {
1313                 error = protImage.Create( context,
1314                                         image_read_write_flags,
1315                                         imageInfo->format, imageInfo->width, imageInfo->height );
1316             }
1317         }
1318 
1319         if( error != CL_SUCCESS )
1320         {
1321             if (gTestImage2DFromBuffer) {
1322                 clReleaseMemObject(imageBuffer);
1323                 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1324                     log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1325                     return 0;
1326                 }
1327             }
1328 
1329             log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1330             return error;
1331         }
1332 
1333         if (gTestMaxImages || gTestImage2DFromBuffer)
1334             image = (cl_mem)unprotImage;
1335         else
1336             image = (cl_mem)protImage;
1337     }
1338     else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
1339     {
1340         if (gTestImage2DFromBuffer)
1341         {
1342             imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1343                                          imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1344             test_error( error, "Unable to create buffer" );
1345             unprotImage = create_image_2d_buffer( context,
1346                                                  image_read_write_flags,
1347                                                  imageInfo->format,
1348                                                  imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1349                                                  imageBuffer, &error );
1350 
1351         }
1352         else
1353         {
1354             // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
1355             unprotImage = create_image_2d( context,
1356                                       image_read_write_flags | CL_MEM_COPY_HOST_PTR,
1357                                       imageInfo->format,
1358                                       imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1359                                       imageValues, &error );
1360         }
1361         if( error != CL_SUCCESS )
1362         {
1363             if (gTestImage2DFromBuffer) {
1364                 clReleaseMemObject(imageBuffer);
1365                 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1366                     log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1367                     return 0;
1368                 }
1369             }
1370 
1371             log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1372             return error;
1373         }
1374         image = unprotImage;
1375     }
1376     else // Either CL_MEM_ALLOC_HOST_PTR or none
1377     {
1378         if( gTestMipmaps )
1379         {
1380             cl_image_desc image_desc = {0};
1381             image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
1382             image_desc.image_width = imageInfo->width;
1383             image_desc.image_height = imageInfo->height;
1384             image_desc.num_mip_levels = imageInfo->num_mip_levels;
1385             unprotImage = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
1386         }
1387         else if (gTestImage2DFromBuffer)
1388         {
1389             imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | gMemFlagsToUse,
1390                                          imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1391             test_error( error, "Unable to create buffer" );
1392             unprotImage = create_image_2d_buffer( context,
1393                                                  image_read_write_flags,
1394                                                  imageInfo->format,
1395                                                  imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1396                                                  imageBuffer, &error );
1397 
1398         }
1399         else
1400         {
1401             // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
1402             // it works just as if no flag is specified, so we just do the same thing either way
1403             unprotImage = create_image_2d( context,
1404                                       image_read_write_flags | gMemFlagsToUse,
1405                                       imageInfo->format,
1406                                       imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1407                                       imageValues, &error );
1408         }
1409         if( error != CL_SUCCESS )
1410         {
1411             if (gTestImage2DFromBuffer) {
1412                 clReleaseMemObject(imageBuffer);
1413                 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1414                     log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1415                     return 0;
1416                 }
1417             }
1418 
1419             log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1420             return error;
1421         }
1422         image = unprotImage;
1423     }
1424 
1425     if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
1426     {
1427         if( gDebugTrace )
1428             log_info( " - Writing image...\n" );
1429 
1430         size_t origin[ 3 ] = { 0, 0, 0 };
1431         size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
1432 
1433         if(!gTestMipmaps)
1434         {
1435             error = clEnqueueWriteImage(queue, image, CL_TRUE,
1436                                         origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0,
1437                                        imageValues, 0, NULL, NULL);
1438             if (error != CL_SUCCESS)
1439             {
1440                 log_error( "ERROR: Unable to write to 2D image of size %d x %d\n", (int)imageInfo->width, (int)imageInfo->height );
1441                 return error;
1442             }
1443         }
1444         else
1445         {
1446             size_t tmpNextLevelOffset = 0;
1447             for(size_t level = 0; level < imageInfo->num_mip_levels; level++)
1448             {
1449                 origin[2] = level;
1450                 error = clEnqueueWriteImage(queue, image, CL_TRUE,
1451                                             origin, region, (( gEnablePitch || gTestImage2DFromBuffer) ? imageInfo->rowPitch : 0 ), 0,
1452                                             (char*)imageValues + tmpNextLevelOffset, 0, NULL, NULL);
1453                 tmpNextLevelOffset += region[0]*region[1]*get_pixel_size(imageInfo->format);
1454                 region[0] = (region[0] >> 1) ? (region[0] >> 1) : 1;
1455                 region[1] = (region[1] >> 1) ? (region[1] >> 1) : 1;
1456             }
1457         }
1458     }
1459 
1460     if( gDebugTrace )
1461         log_info( " - Creating kernel arguments...\n" );
1462 
1463     xOffsets =
1464         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
1465                        sizeof(cl_float) * imageInfo->width * imageInfo->height,
1466                        xOffsetValues, &error);
1467     test_error( error, "Unable to create x offset buffer" );
1468     yOffsets =
1469         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
1470                        sizeof(cl_float) * imageInfo->width * imageInfo->height,
1471                        yOffsetValues, &error);
1472     test_error( error, "Unable to create y offset buffer" );
1473     results = clCreateBuffer(context, CL_MEM_READ_WRITE,
1474                              get_explicit_type_size(outputType) * 4
1475                                  * imageInfo->width * imageInfo->height,
1476                              NULL, &error);
1477     test_error( error, "Unable to create result buffer" );
1478 
1479     // Create sampler to use
1480     actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
1481     test_error(error, "Unable to create image sampler");
1482 
1483     // Set arguments
1484     int idx = 0;
1485     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
1486     test_error( error, "Unable to set kernel arguments" );
1487     if( !gUseKernelSamplers )
1488     {
1489         error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
1490         test_error( error, "Unable to set kernel arguments" );
1491     }
1492     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
1493     test_error( error, "Unable to set kernel arguments" );
1494     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
1495     test_error( error, "Unable to set kernel arguments" );
1496     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
1497     test_error( error, "Unable to set kernel arguments" );
1498 
1499     // A cast of troublesome offsets. The first one has to be zero.
1500     const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
1501     int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
1502     int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
1503     int loopCount = 2 * float_offset_count;
1504     if( ! useFloatCoords )
1505         loopCount = 1;
1506     if (gTestMaxImages) {
1507         loopCount = 1;
1508       log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
1509     }
1510 
1511     if(gtestTypesToRun & kReadWriteTests)
1512     {
1513         loopCount = 1;
1514     }
1515 
1516     // Get the maximum absolute error for this format
1517     double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
1518     if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
1519 
1520     if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
1521         initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
1522         if (initHalf) {
1523             log_info("Half rounding mode successfully detected.\n");
1524         }
1525     }
1526 
1527     size_t nextLevelOffset = 0;
1528     size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
1529     for( size_t lod = 0; (gTestMipmaps && (lod < imageInfo->num_mip_levels))|| (!gTestMipmaps && lod < 1); lod ++)
1530     {
1531         size_t resultValuesSize = width_lod * height_lod * get_explicit_type_size( outputType ) * 4;
1532         BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
1533         float lod_float = (float)lod;
1534         char *imagePtr = (char *)imageValues + nextLevelOffset;
1535         if( gTestMipmaps )
1536         {
1537             if(gDebugTrace)
1538                 log_info("\t- Working at mip level %d\n", lod);
1539             error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
1540         }
1541 
1542         // Validate results element by element
1543         for( int q = 0; q < loopCount; q++ )
1544         {
1545             float offset = float_offsets[ q % float_offset_count ];
1546 
1547             // Init the coordinates
1548             InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues,
1549                                 q>=float_offset_count ? -offset: offset,
1550                                 q>=float_offset_count ? offset: -offset, imageSampler->normalized_coords, d, lod );
1551 
1552             error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, xOffsetValues, 0, NULL, NULL );
1553             test_error( error, "Unable to write x offsets" );
1554             error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, yOffsetValues, 0, NULL, NULL );
1555             test_error( error, "Unable to write y offsets" );
1556 
1557             // Get results
1558             memset( resultValues, 0xff, resultValuesSize );
1559             clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
1560 
1561             // Run the kernel
1562             threads[0] = (size_t)width_lod;
1563             threads[1] = (size_t)height_lod;
1564             error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
1565             test_error( error, "Unable to run kernel" );
1566 
1567             if( gDebugTrace )
1568                 log_info( "    reading results, %ld kbytes\n", (unsigned long)( width_lod * height_lod * get_explicit_type_size( outputType ) * 4 / 1024 ) );
1569 
1570             error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL ); //XXX check
1571             test_error( error, "Unable to read results from kernel" );
1572             if( gDebugTrace )
1573                 log_info( "    results read\n" );
1574 
1575             int retCode;
1576             switch (imageInfo->format->image_channel_order) {
1577             case CL_DEPTH:
1578                 retCode = validate_image_2D_depth_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1579                 break;
1580             case CL_sRGB:
1581             case CL_sRGBx:
1582             case CL_sRGBA:
1583             case CL_sBGRA:
1584                 retCode = validate_image_2D_sRGB_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1585                 break;
1586             default:
1587                 retCode = validate_image_2D_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1588             }
1589             if (retCode)
1590                 return retCode;
1591         }
1592         end:
1593         if ( gTestMipmaps )
1594         {
1595             nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format );
1596             width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
1597             height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
1598         }
1599     }
1600 
1601     if (gTestImage2DFromBuffer) clReleaseMemObject(imageBuffer);
1602 
1603     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
1604 }
1605 
test_read_image_set_2D(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,image_sampler_data * imageSampler,bool floatCoords,ExplicitType outputType)1606 int test_read_image_set_2D(cl_device_id device, cl_context context,
1607                            cl_command_queue queue,
1608                            const cl_image_format *format,
1609                            image_sampler_data *imageSampler, bool floatCoords,
1610                            ExplicitType outputType)
1611 {
1612     char programSrc[10240];
1613     const char *ptr;
1614     const char *readFormat;
1615     clProgramWrapper program;
1616     clKernelWrapper kernel;
1617     const char *KernelSourcePattern = NULL;
1618 
1619     if (gTestImage2DFromBuffer)
1620     {
1621         if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
1622         {
1623             switch (format->image_channel_data_type)
1624             {
1625                 case CL_UNORM_INT8:
1626                 case CL_UNORM_INT16:
1627                 case CL_SNORM_INT8:
1628                 case CL_SNORM_INT16:
1629                 case CL_HALF_FLOAT:
1630                 case CL_FLOAT:
1631                 case CL_SIGNED_INT8:
1632                 case CL_SIGNED_INT16:
1633                 case CL_SIGNED_INT32:
1634                 case CL_UNSIGNED_INT8:
1635                 case CL_UNSIGNED_INT16:
1636                 case CL_UNSIGNED_INT32:
1637                     log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
1638                              GetChannelTypeName( format->image_channel_data_type ));
1639                     return 0;
1640                 default:
1641                     break;
1642             }
1643         }
1644     }
1645 
1646 
1647     RandomSeed seed( gRandomSeed );
1648     int error;
1649 
1650     // Get our operating params
1651     size_t maxWidth, maxHeight;
1652     cl_ulong maxAllocSize, memSize;
1653     image_descriptor imageInfo = { 0x0 };
1654     size_t pixelSize;
1655 
1656     imageInfo.format = format;
1657     imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
1658     imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
1659     pixelSize = get_pixel_size( imageInfo.format );
1660 
1661     error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
1662     error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
1663     error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
1664     error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
1665     test_error( error, "Unable to get max image 2D size from device" );
1666 
1667     if (memSize > (cl_ulong)SIZE_MAX) {
1668       memSize = (cl_ulong)SIZE_MAX;
1669     }
1670 
1671     // Determine types
1672     if( outputType == kInt )
1673         readFormat = "i";
1674     else if( outputType == kUInt )
1675         readFormat = "ui";
1676     else // kFloat
1677         readFormat = "f";
1678 
1679     // Construct the source
1680     const char *samplerArg = samplerKernelArg;
1681     char samplerVar[ 1024 ] = "";
1682     if( gUseKernelSamplers )
1683     {
1684         get_sampler_kernel_code( imageSampler, samplerVar );
1685         samplerArg = "";
1686     }
1687 
1688     if(gtestTypesToRun & kReadTests)
1689     {
1690         KernelSourcePattern = read2DKernelSourcePattern;
1691     }
1692     else
1693     {
1694         KernelSourcePattern = read_write2DKernelSourcePattern;
1695     }
1696 
1697 
1698     sprintf(programSrc, KernelSourcePattern,
1699             gTestMipmaps
1700                 ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
1701                 : "",
1702             (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t"
1703                                                       : "image2d_t",
1704             samplerArg, get_explicit_type_name(outputType),
1705             (format->image_channel_order == CL_DEPTH) ? "" : "4",
1706             gTestMipmaps ? ", float lod" : " ", samplerVar,
1707             gTestMipmaps ? lodOffsetSource : offsetSource,
1708             floatCoords ? floatKernelSource : intCoordKernelSource, readFormat,
1709             gTestMipmaps ? ", lod" : " ");
1710 
1711     ptr = programSrc;
1712     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
1713                                         "sample_kernel");
1714     test_error( error, "Unable to create testing kernel" );
1715 
1716     if( gTestSmallImages )
1717     {
1718         for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
1719         {
1720             imageInfo.rowPitch = imageInfo.width * pixelSize;
1721             for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
1722             {
1723                 if( gTestMipmaps )
1724                 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1725 
1726                 if( gDebugTrace )
1727                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
1728 
1729                 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1730                 if( retCode )
1731                     return retCode;
1732             }
1733         }
1734     }
1735     else if( gTestMaxImages )
1736     {
1737         // Try a specific set of maximum sizes
1738         size_t numbeOfSizes;
1739         size_t sizes[100][3];
1740 
1741         get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format, CL_TRUE);
1742 
1743         for( size_t idx = 0; idx < numbeOfSizes; idx++ )
1744         {
1745             imageInfo.width = sizes[ idx ][ 0 ];
1746             imageInfo.height = sizes[ idx ][ 1 ];
1747             imageInfo.rowPitch = imageInfo.width * pixelSize;
1748             log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
1749 
1750             if( gTestMipmaps )
1751                 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1752 
1753             if( gDebugTrace )
1754                 log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
1755             int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1756             if( retCode )
1757                 return retCode;
1758         }
1759     }
1760     else if( gTestRounding )
1761     {
1762         uint64_t typeRange = 1LL << ( get_format_type_size( imageInfo.format ) * 8 );
1763         typeRange /= pixelSize / get_format_type_size( imageInfo.format );
1764         imageInfo.height = (size_t)( ( typeRange + 255LL ) / 256LL );
1765         imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
1766         while( imageInfo.height >= maxHeight / 2 )
1767         {
1768             imageInfo.width <<= 1;
1769             imageInfo.height >>= 1;
1770         }
1771 
1772         while( imageInfo.width >= maxWidth / 2 )
1773             imageInfo.width >>= 1;
1774         imageInfo.rowPitch = imageInfo.width * pixelSize;
1775 
1776         gRoundingStartValue = 0;
1777         do
1778         {
1779             if( gDebugTrace )
1780                 log_info( "   at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.height, gRoundingStartValue, typeRange );
1781             int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1782             if( retCode )
1783                 return retCode;
1784 
1785             gRoundingStartValue += imageInfo.width * imageInfo.height * pixelSize / get_format_type_size( imageInfo.format );
1786 
1787         } while( gRoundingStartValue < typeRange );
1788     }
1789     else
1790     {
1791         cl_uint imagePitchAlign = 0;
1792         if (gTestImage2DFromBuffer)
1793         {
1794 #if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT)
1795             error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof( cl_uint ), &imagePitchAlign, NULL );
1796 #endif
1797             if (!imagePitchAlign || error) {
1798               test_error( error, "Unable to get CL_DEVICE_IMAGE_PITCH_ALIGNMENT from device" );
1799               imagePitchAlign = 1;
1800             }
1801         }
1802 
1803         int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
1804         int maxHeightRange = (int) reduceImageSizeRange(maxHeight);
1805 
1806         for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
1807         {
1808             cl_ulong size;
1809             // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
1810             // image, the result array, plus offset arrays, will fit in the global ram space
1811             do
1812             {
1813                 imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, seed );
1814                 imageInfo.height = (size_t)random_log_in_range( 16, maxHeightRange, seed );
1815 
1816                 imageInfo.rowPitch = imageInfo.width * pixelSize;
1817                 if( gTestMipmaps )
1818                 {
1819                     imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1820                     size = 4 * compute_mipmapped_image_size(imageInfo);
1821                 }
1822                 else
1823                 {
1824                     if( gEnablePitch )
1825                     {
1826                         size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
1827                         imageInfo.rowPitch += extraWidth * pixelSize;
1828                     }
1829 
1830                 // if we are creating a 2D image from a buffer, make sure that the rowpitch is aligned to CL_DEVICE_IMAGE_PITCH_ALIGNMENT_APPLE
1831                     if (gTestImage2DFromBuffer)
1832                     {
1833                         size_t pitch = imagePitchAlign * pixelSize;
1834                         imageInfo.rowPitch = ((imageInfo.rowPitch + pitch - 1) / pitch ) * pitch;
1835                     }
1836 
1837                     size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
1838                 }
1839             } while(  size > maxAllocSize || ( size * 3 ) > memSize );
1840 
1841             if( gDebugTrace )
1842                 log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
1843             int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1844             if( retCode )
1845                 return retCode;
1846         }
1847     }
1848 
1849     return 0;
1850 }
1851