• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "test_common.h"
17 #include <float.h>
18 
19 #if defined( __APPLE__ )
20     #include <signal.h>
21     #include <sys/signal.h>
22     #include <setjmp.h>
23 #endif
24 
25 #define MAX_ERR 0.005f
26 #define MAX_HALF_LINEAR_ERR 0.3f
27 
28 extern bool            gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestImage2DFromBuffer, gTestMipmaps;
29 extern bool            gUseKernelSamplers;
30 extern cl_filter_mode    gFilterModeToUse;
31 extern cl_addressing_mode    gAddressModeToUse;
32 extern uint64_t gRoundingStartValue;
33 extern cl_mem_flags gMemFlagsToUse;
34 extern int gtestTypesToRun;
35 extern bool gDeviceLt20;
36 
37 #define MAX_TRIES               1
38 #define MAX_CLAMPED             1
39 
40 // Utility function to clamp down image sizes for certain tests to avoid
41 // using too much memory.
reduceImageSizeRange(size_t maxDimSize)42 static size_t reduceImageSizeRange(size_t maxDimSize) {
43   size_t DimSize = maxDimSize/32;
44   if (DimSize < (size_t) 16)
45     return 16;
46   else if (DimSize > (size_t) 256)
47     return 256;
48   else
49     return DimSize;
50 }
51 
52 const char *read2DKernelSourcePattern =
53 "__kernel void sample_kernel( read_only %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
54 "{\n"
55 "%s"
56 "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
57 "%s"
58 "%s"
59 "   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
60 "}";
61 
62 const char *read_write2DKernelSourcePattern =
63 "__kernel void sample_kernel( read_write %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
64 "{\n"
65 "%s"
66 "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
67 "%s"
68 "%s"
69 "   results[offset] = read_image%s( input, coords %s);\n"
70 "}";
71 
72 const char *intCoordKernelSource =
73 "   int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n";
74 
75 const char *floatKernelSource =
76 "   float2 coords = (float2)( (float)( xOffsets[offset] ), (float)( yOffsets[offset] ) );\n";
77 
78 static const char *samplerKernelArg = " sampler_t imageSampler,";
79 
80 static const char *lodOffsetSource =
81 "   unsigned int lod_int = (unsigned int) lod;\n"
82 "   int width_lod = (get_image_width(input) >> lod_int) ?(get_image_width(input) >> lod_int):1 ;\n"
83 "   int offset = tidY*width_lod + tidX;\n";
84 
85 static const char *offsetSource =
86 "   int offset = tidY*get_image_width(input) + tidX;\n";
87 
88 extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
89                             int x, int y, int z, float *outData );
determine_validation_error(void * imagePtr,image_descriptor * imageInfo,image_sampler_data * imageSampler,T * resultPtr,T * expected,float error,float x,float y,float xAddressOffset,float yAddressOffset,size_t j,int & numTries,int & numClamped,bool printAsFloat,int lod=0)90 template <class T> int determine_validation_error( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
91                                                 T *resultPtr, T * expected, float error,
92                                 float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod = 0 )
93 {
94     int actualX, actualY;
95     int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL, lod );
96     bool clampingErr = false, clamped = false, otherClampingBug = false;
97     int clampedX, clampedY, ignoreMe;
98 
99     clamped = get_integer_coords_offset( x, y, 0.f, xAddressOffset, yAddressOffset, 0.0f, imageInfo->width, imageInfo->height, 0, imageSampler, imageInfo, clampedX, clampedY, ignoreMe );
100 
101     if( found )
102     {
103         // Is it a clamping bug?
104         if( clamped && clampedX == actualX && clampedY == actualY )
105         {
106             if( (--numClamped) == 0 )
107             {
108                 log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
109                 if (imageInfo->format->image_channel_order == CL_DEPTH)
110                 {
111                     if( printAsFloat )
112                     {
113                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
114                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
115                     }
116                     else
117                     {
118                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
119                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
120                     }
121                 }
122                 else
123                 {
124                     if( printAsFloat )
125                     {
126                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
127                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
128                                 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
129                     }
130                     else
131                     {
132                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
133                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
134                                 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
135                     }
136                 }
137                 return 1;
138             }
139             clampingErr = true;
140             otherClampingBug = true;
141         }
142     }
143     if( clamped && !otherClampingBug )
144     {
145         // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
146         if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
147         {
148             if( (--numClamped) == 0 )
149             {
150                 log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
151                 if (imageInfo->format->image_channel_order == CL_DEPTH)
152                 {
153                     if( printAsFloat )
154                     {
155                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
156                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
157                     }
158                     else
159                     {
160                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
161                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
162                     }
163                 }
164                 else
165                 {
166                     if( printAsFloat )
167                     {
168                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g),\n\terror of %g\n",
169                                 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
170                                 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
171                     }
172                     else
173                     {
174                       log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
175                                 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
176                                 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
177                     }
178                 }
179                 return 1;
180             }
181             clampingErr = true;
182         }
183     }
184     if( !clampingErr )
185     {
186         if (imageInfo->format->image_channel_order == CL_DEPTH)
187         {
188             if( printAsFloat )
189             {
190               log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot      (%g),\n\terror of %g\n",
191                         (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
192             }
193             else
194             {
195               log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot      (%x)\n",
196                         (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
197             }
198         }
199         else
200         {
201             if( printAsFloat )
202             {
203                 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot      (%g,%g,%g,%g), error of %g\n",
204                           (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
205                           (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
206             }
207             else
208             {
209                 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot      (%x,%x,%x,%x)\n",
210                           (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
211                                     (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
212             }
213         }
214         log_error( "img size %ld,%ld (pitch %ld)", imageInfo->width, imageInfo->height, imageInfo->rowPitch );
215         if( clamped )
216         {
217             log_error( " which would clamp to %d,%d\n", clampedX, clampedY );
218         }
219         if( printAsFloat && gExtraValidateInfo)
220         {
221             log_error( "Nearby values:\n" );
222             log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
223             for( int yOff = -2; yOff <= 1; yOff++ )
224             {
225                 float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
226                 read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
227                 read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
228                 read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
229                 read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
230                 if (imageInfo->format->image_channel_order == CL_DEPTH)
231                 {
232                     log_error( "%d\t(%g)",clampedY + yOff, top[0] );
233                     log_error( " (%g)", real[0] );
234                     log_error( " (%g)",bot[0] );
235                     log_error( " (%g)\n",bot2[0] );
236                 }
237                 else
238                 {
239                     log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
240                     log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
241                     log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
242                     log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
243                 }
244             }
245 
246             if( clampedY < 1 )
247             {
248                 log_error( "Nearby values:\n" );
249                 log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
250                 for( int yOff = (int)imageInfo->height - 2; yOff <= (int)imageInfo->height + 1; yOff++ )
251                 {
252                     float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
253                     read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
254                     read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
255                     read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
256                     read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
257                     if (imageInfo->format->image_channel_order == CL_DEPTH)
258                     {
259                         log_error( "%d\t(%g)",clampedY + yOff, top[0] );
260                         log_error( " (%g)", real[0] );
261                         log_error( " (%g)",bot[0] );
262                         log_error( " (%g)\n",bot2[0] );
263                     }
264                     else
265                     {
266                         log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
267                         log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
268                         log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
269                         log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
270                     }
271                 }
272             }
273         }
274 
275         if( imageSampler->filter_mode != CL_FILTER_LINEAR )
276         {
277             if( found )
278                 log_error( "\tValue really found in image at %d,%d (%s)\n", actualX, actualY, ( found > 1 ) ? "NOT unique!!" : "unique" );
279             else
280                 log_error( "\tValue not actually found in image\n" );
281         }
282         log_error( "\n" );
283 
284         numClamped = -1; // We force the clamped counter to never work
285         if( ( --numTries ) == 0 )
286         {
287             return 1;
288         }
289     }
290     return 0;
291 }
292 
293 #define CLAMP( _val, _min, _max )           ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
294 
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float xfract,float yfract,int normalized_coords,MTdata d)295 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d )
296 {
297     size_t i = 0;
298     if( gDisableOffsets )
299     {
300         for( size_t y = 0; y < imageInfo->height; y++ )
301         {
302             for( size_t x = 0; x < imageInfo->width; x++, i++ )
303             {
304                 xOffsets[ i ] = (float) (xfract + (double) x);
305                 yOffsets[ i ] = (float) (yfract + (double) y);
306             }
307         }
308     }
309     else
310     {
311         for( size_t y = 0; y < imageInfo->height; y++ )
312         {
313             for( size_t x = 0; x < imageInfo->width; x++, i++ )
314             {
315                 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
316                 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
317             }
318         }
319     }
320 
321     if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
322     {
323         i = 0;
324         for( size_t y = 0; y < imageInfo->height; y++ )
325         {
326             for( size_t x = 0; x < imageInfo->width; x++, i++ )
327             {
328                 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
329                 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)imageInfo->height - 1.0);
330             }
331         }
332     }
333 
334     if( normalized_coords )
335     {
336         i = 0;
337         for( size_t y = 0; y < imageInfo->height; y++ )
338         {
339             for( size_t x = 0; x < imageInfo->width; x++, i++ )
340             {
341                 xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
342                 yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
343             }
344         }
345     }
346 }
347 
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float xfract,float yfract,int normalized_coords,MTdata d,size_t lod)348 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d, size_t lod)
349 {
350     size_t i = 0;
351     size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
352 
353     if( gTestMipmaps )
354     {
355         width_lod = (imageInfo->width >> lod)?(imageInfo->width >> lod):1;
356         height_lod = (imageInfo->height >> lod)?(imageInfo->height >> lod):1;
357     }
358     if( gDisableOffsets )
359     {
360         for( size_t y = 0; y < height_lod; y++ )
361         {
362             for( size_t x = 0; x < width_lod; x++, i++ )
363             {
364                 xOffsets[ i ] = (float) (xfract + (float) x);
365                 yOffsets[ i ] = (float) (yfract + (float) y);
366             }
367         }
368     }
369     else
370     {
371         for( size_t y = 0; y < height_lod; y++ )
372         {
373             for( size_t x = 0; x < width_lod; x++, i++ )
374             {
375                 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
376                 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
377             }
378         }
379     }
380 
381     if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
382     {
383         i = 0;
384         for( size_t y = 0; y < height_lod; y++ )
385         {
386             for( size_t x = 0; x < width_lod; x++, i++ )
387             {
388                 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) width_lod - 1.0);
389                 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)height_lod - 1.0);
390             }
391         }
392     }
393 
394     if( normalized_coords )
395     {
396         i = 0;
397         for( size_t y = 0; y < height_lod; y++ )
398         {
399             for( size_t x = 0; x < width_lod; x++, i++ )
400             {
401                 xOffsets[ i ] = (float) ((float) xOffsets[ i ] / (float) width_lod);
402                 yOffsets[ i ] = (float) ((float) yOffsets[ i ] / (float) height_lod);
403             }
404         }
405     }
406 }
407 #ifndef MAX
408     #define MAX( _a, _b )           ((_a) > (_b) ? (_a) : (_b))
409 #endif
410 
validate_image_2D_depth_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)411 int validate_image_2D_depth_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
412                                                         ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
413 {
414     // Validate results element by element
415     size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
416     size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
417     /*
418      * FLOAT output type
419      */
420     if( outputType == kFloat )
421     {
422         // Validate float results
423         float *resultPtr = (float *)(char *)resultValues;
424         float expected[4], error=0.0f;
425         float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
426         for( size_t y = 0, j = 0; y < height_lod; y++ )
427         {
428             for( size_t x = 0; x < width_lod; x++, j++ )
429             {
430                 // Step 1: go through and see if the results verify for the pixel
431                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
432                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
433                 int checkOnlyOnePixel = 0;
434                 int found_pixel = 0;
435                 float offset = NORM_OFFSET;
436                 if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
437 #if defined( __APPLE__ )
438                     // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
439                     || gDeviceType != CL_DEVICE_TYPE_GPU
440 #endif
441                     )
442                     offset = 0.0f;          // Loop only once
443 
444                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
445                     for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
446 
447                         // Try sampling the pixel, without flushing denormals.
448                         int containsDenormals = 0;
449                         FloatPixel maxPixel;
450                         maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
451                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
452                                                                     imageSampler, expected, 0, &containsDenormals );
453 
454                         float err1 = ABS_ERROR(resultPtr[0], expected[0]);
455                         // Clamp to the minimum absolute error for the format
456                         if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
457                         float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
458 
459                         // Check if the result matches.
460                         if( ! (err1 <= maxErr1) )
461                         {
462                             //try flushing the denormals, if there is a failure.
463                             if( containsDenormals )
464                             {
465                                 // If implementation decide to flush subnormals to zero,
466                                 // max error needs to be adjusted
467                                 maxErr1 += 4 * FLT_MIN;
468 
469                                 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
470                                                                              xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
471                                                                              imageSampler, expected, 0, NULL );
472 
473                                 err1 = ABS_ERROR(resultPtr[0], expected[0]);
474                             }
475                         }
476 
477                         // If the final result DOES match, then we've found a valid result and we're done with this pixel.
478                         found_pixel = (err1 <= maxErr1);
479                     }//norm_offset_x
480                 }//norm_offset_y
481 
482 
483                 // Step 2: If we did not find a match, then print out debugging info.
484                 if (!found_pixel) {
485                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
486                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
487                     checkOnlyOnePixel = 0;
488                     int shouldReturn = 0;
489                     for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
490                         for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
491 
492                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
493                             // E.g., test one pixel.
494                             if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
495                                 norm_offset_x = 0.0f;
496                                 norm_offset_y = 0.0f;
497                                 checkOnlyOnePixel = 1;
498                             }
499 
500                             int containsDenormals = 0;
501                             FloatPixel maxPixel;
502                             maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
503                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
504                                                                                     imageSampler, expected, 0, &containsDenormals );
505 
506                             float err1 = ABS_ERROR(resultPtr[0], expected[0]);
507                             float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
508 
509 
510                             if( ! (err1 <= maxErr1) )
511                             {
512                                 //try flushing the denormals, if there is a failure.
513                                 if( containsDenormals )
514                                 {
515                                     maxErr1 += 4 * FLT_MIN;
516 
517                                     maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
518                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
519                                                                                  imageSampler, expected, 0, NULL );
520 
521                                     err1 = ABS_ERROR(resultPtr[0], expected[0]);
522                                 }
523                             }
524                             if( ! (err1 <= maxErr1) )
525                             {
526                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
527 
528                                 float tempOut[4];
529                                 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
530                                                                                   expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
531 
532                                 log_error( "Step by step:\n" );
533                                 FloatPixel temp;
534                                 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
535                                                                                xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
536                                                                                imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
537                                 log_error( "\tulps: %2.2f  (max allowed: %2.2f)\n\n",
538                                                     Ulp_Error( resultPtr[0], expected[0] ),
539                                                     Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
540 
541                             } else {
542                                 log_error("Test error: we should have detected this passing above.\n");
543                             }
544 
545                         }//norm_offset_x
546                     }//norm_offset_y
547                     if( shouldReturn )
548                         return 1;
549                 } // if (!found_pixel)
550 
551                 resultPtr += 1;
552             }
553         }
554     }
555     else
556     {
557         log_error("Test error: Not supported format.\n");
558         return 1;
559     }
560     return 0;
561 }
562 
validate_image_2D_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)563 int validate_image_2D_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
564                                                         ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
565 {
566     // Validate results element by element
567     size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
568     size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
569     /*
570      * FLOAT output type
571      */
572     if( outputType == kFloat )
573     {
574         // Validate float results
575         float *resultPtr = (float *)(char *)resultValues;
576         float expected[4], error=0.0f;
577         float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
578         for( size_t y = 0, j = 0; y < height_lod; y++ )
579         {
580             for( size_t x = 0; x < width_lod; x++, j++ )
581             {
582                 // Step 1: go through and see if the results verify for the pixel
583                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
584                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
585                 int checkOnlyOnePixel = 0;
586                 int found_pixel = 0;
587                 float offset = NORM_OFFSET;
588                 if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
589 #if defined( __APPLE__ )
590                     // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
591                     || gDeviceType != CL_DEVICE_TYPE_GPU
592 #endif
593                     )
594                     offset = 0.0f;          // Loop only once
595 
596                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
597                     for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
598 
599 
600                         // Try sampling the pixel, without flushing denormals.
601                         int containsDenormals = 0;
602                         FloatPixel maxPixel;
603                         if ( gTestMipmaps )
604                             maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
605                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
606                                                                         imageSampler, expected, 0, &containsDenormals, lod );
607                         else
608                             maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
609                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
610                                                                         imageSampler, expected, 0, &containsDenormals );
611 
612                         float err1 = ABS_ERROR(resultPtr[0], expected[0]);
613                         float err2 = ABS_ERROR(resultPtr[1], expected[1]);
614                         float err3 = ABS_ERROR(resultPtr[2], expected[2]);
615                         float err4 = ABS_ERROR(resultPtr[3], expected[3]);
616                         // Clamp to the minimum absolute error for the format
617                         if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
618                         if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
619                         if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
620                         if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
621                         float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
622                         float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
623                         float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
624                         float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
625 
626                         // Check if the result matches.
627                         if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
628                            ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
629                         {
630                             //try flushing the denormals, if there is a failure.
631                             if( containsDenormals )
632                             {
633                                // If implementation decide to flush subnormals to zero,
634                                // max error needs to be adjusted
635                                 maxErr1 += 4 * FLT_MIN;
636                                 maxErr2 += 4 * FLT_MIN;
637                                 maxErr3 += 4 * FLT_MIN;
638                                 maxErr4 += 4 * FLT_MIN;
639 
640                                 if(gTestMipmaps)
641                                     maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
642                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
643                                                                                  imageSampler, expected, 0, NULL,lod );
644                                 else
645                                     maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
646                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
647                                                                                  imageSampler, expected, 0, NULL );
648 
649                                 err1 = ABS_ERROR(resultPtr[0], expected[0]);
650                                 err2 = ABS_ERROR(resultPtr[1], expected[1]);
651                                 err3 = ABS_ERROR(resultPtr[2], expected[2]);
652                                 err4 = ABS_ERROR(resultPtr[3], expected[3]);
653                             }
654                         }
655 
656                         // If the final result DOES match, then we've found a valid result and we're done with this pixel.
657                         found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2)  && (err3 <= maxErr3) && (err4 <= maxErr4);
658                     }//norm_offset_x
659                 }//norm_offset_y
660 
661 
662                 // Step 2: If we did not find a match, then print out debugging info.
663                 if (!found_pixel) {
664                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
665                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
666                     checkOnlyOnePixel = 0;
667                     int shouldReturn = 0;
668                     for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
669                         for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
670 
671                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
672                             // E.g., test one pixel.
673                             if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
674                                 norm_offset_x = 0.0f;
675                                 norm_offset_y = 0.0f;
676                                 checkOnlyOnePixel = 1;
677                             }
678 
679                             int containsDenormals = 0;
680                             FloatPixel maxPixel;
681                             if(gTestMipmaps)
682                                 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
683                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
684                                                                                         imageSampler, expected, 0, &containsDenormals, lod );
685                             else
686                                 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
687                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
688                                                                                         imageSampler, expected, 0, &containsDenormals );
689 
690                             float err1 = ABS_ERROR(resultPtr[0], expected[0]);
691                             float err2 = ABS_ERROR(resultPtr[1], expected[1]);
692                             float err3 = ABS_ERROR(resultPtr[2], expected[2]);
693                             float err4 = ABS_ERROR(resultPtr[3], expected[3]);
694                             float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
695                             float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
696                             float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
697                             float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
698 
699 
700                             if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
701                                ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
702                             {
703                                 //try flushing the denormals, if there is a failure.
704                                 if( containsDenormals )
705                                 {
706                                     maxErr1 += 4 * FLT_MIN;
707                                     maxErr2 += 4 * FLT_MIN;
708                                     maxErr3 += 4 * FLT_MIN;
709                                     maxErr4 += 4 * FLT_MIN;
710 
711                                     if(gTestMipmaps)
712                                         maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
713                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
714                                                                                      imageSampler, expected, 0, NULL, lod );
715                                     else
716                                         maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
717                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
718                                                                                      imageSampler, expected, 0, NULL );
719 
720                                     err1 = ABS_ERROR(resultPtr[0], expected[0]);
721                                     err2 = ABS_ERROR(resultPtr[1], expected[1]);
722                                     err3 = ABS_ERROR(resultPtr[2], expected[2]);
723                                     err4 = ABS_ERROR(resultPtr[3], expected[3]);
724                                 }
725                             }
726                             if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
727                                ! (err3 <= maxErr3) || ! (err4 <= maxErr4)    )
728                             {
729                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
730 
731                                 float tempOut[4];
732                                 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
733                                                                                   expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
734 
735                                 log_error( "Step by step:\n" );
736                                 FloatPixel temp;
737                                 if( gTestMipmaps )
738                                      temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
739                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
740                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
741                                  else
742                                      temp = sample_image_pixel_float_offset( imageValues, imageInfo,
743                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
744                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
745                                 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
746                                                     Ulp_Error( resultPtr[0], expected[0] ),
747                                                     Ulp_Error( resultPtr[1], expected[1] ),
748                                                     Ulp_Error( resultPtr[2], expected[2] ),
749                                                     Ulp_Error( resultPtr[3], expected[3] ),
750                                                     Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
751 
752                             } else {
753                                 log_error("Test error: we should have detected this passing above.\n");
754                             }
755 
756                         }//norm_offset_x
757                     }//norm_offset_y
758                     if( shouldReturn )
759                         return 1;
760                 } // if (!found_pixel)
761 
762                 resultPtr += 4;
763             }
764         }
765     }
766     /*
767      * UINT output type
768      */
769     else if( outputType == kUInt )
770     {
771         // Validate unsigned integer results
772         unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
773         unsigned int expected[4];
774         float error;
775         for( size_t y = 0, j = 0; y < height_lod ; y++ )
776         {
777             for( size_t x = 0; x < width_lod ; x++, j++ )
778             {
779                 // Step 1: go through and see if the results verify for the pixel
780                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
781                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
782                 int checkOnlyOnePixel = 0;
783                 int found_pixel = 0;
784                 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
785                     for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
786 
787                         // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
788                         // E.g., test one pixel.
789                         if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
790                             norm_offset_x = 0.0f;
791                             norm_offset_y = 0.0f;
792                             checkOnlyOnePixel = 1;
793                         }
794 
795                         if ( gTestMipmaps )
796                             sample_image_pixel_offset<unsigned int>( (char*)imagePtr, imageInfo,
797                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
798                                                                                              imageSampler, expected, lod );
799                         else
800                             sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
801                                                                                              xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
802                                                                                              imageSampler, expected);
803 
804 
805                         error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
806                                        errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
807 
808                         if (error <= MAX_ERR)
809                             found_pixel = 1;
810                     }//norm_offset_x
811                 }//norm_offset_y
812 
813                 // Step 2: If we did not find a match, then print out debugging info.
814                 if (!found_pixel) {
815                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
816                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
817                     checkOnlyOnePixel = 0;
818                     int shouldReturn = 0;
819                     for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
820                         for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
821 
822                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
823                             // E.g., test one pixel.
824                             if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
825                                 norm_offset_x = 0.0f;
826                                 norm_offset_y = 0.0f;
827                                 checkOnlyOnePixel = 1;
828                             }
829 
830                             if( gTestMipmaps )
831                                 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
832                                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
833                                                                                                  imageSampler, expected, lod );
834                             else
835                                 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
836                                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
837                                                                                                  imageSampler, expected);
838 
839 
840                             error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
841                                            errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
842 
843                             if( error > MAX_ERR )
844                             {
845                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
846 
847                                 shouldReturn |= determine_validation_error<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
848                                                                                          expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
849                             } else {
850                                 log_error("Test error: we should have detected this passing above.\n");
851                             }
852                         }//norm_offset_x
853                     }//norm_offset_y
854                     if( shouldReturn )
855                         return 1;
856                 } // if (!found_pixel)
857 
858                 resultPtr += 4;
859             }
860         }
861     }
862     /*
863      * INT output type
864      */
865     else
866     {
867         // Validate integer results
868         int *resultPtr = (int *)(char *)resultValues;
869         int expected[4];
870         float error;
871         for( size_t y = 0, j = 0; y < height_lod ; y++ )
872         {
873             for( size_t x = 0; x < width_lod; x++, j++ )
874             {
875                 // Step 1: go through and see if the results verify for the pixel
876                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
877                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
878                 int checkOnlyOnePixel = 0;
879                 int found_pixel = 0;
880                 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
881                     for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
882 
883                         // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
884                         // E.g., test one pixel.
885                         if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
886                             norm_offset_x = 0.0f;
887                             norm_offset_y = 0.0f;
888                             checkOnlyOnePixel = 1;
889                         }
890 
891                         if ( gTestMipmaps )
892                             sample_image_pixel_offset<int>( imagePtr, imageInfo,
893                                                             xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
894                                                             imageSampler, expected , lod);
895                         else
896                             sample_image_pixel_offset<int>( imageValues, imageInfo,
897                                                             xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
898                                                             imageSampler, expected );
899 
900 
901                         error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
902                                        errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
903 
904                         if (error <= MAX_ERR)
905                             found_pixel = 1;
906                     }//norm_offset_x
907                 }//norm_offset_y
908 
909                 // Step 2: If we did not find a match, then print out debugging info.
910                 if (!found_pixel) {
911                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
912                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
913                     checkOnlyOnePixel = 0;
914                     int shouldReturn = 0;
915                     for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
916                         for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
917 
918                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
919                             // E.g., test one pixel.
920                             if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
921                                 norm_offset_x = 0.0f;
922                                 norm_offset_y = 0.0f;
923                                 checkOnlyOnePixel = 1;
924                             }
925 
926                             if ( gTestMipmaps )
927                                 sample_image_pixel_offset<int>( imageValues, imageInfo,
928                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
929                                                                 imageSampler, expected, lod );
930                             else
931                                 sample_image_pixel_offset<int>( imageValues, imageInfo,
932                                                                 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
933                                                                 imageSampler, expected );
934 
935 
936                             error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
937                                            errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
938 
939                             if( error > MAX_ERR )
940                             {
941                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
942 
943                                 shouldReturn |= determine_validation_error<int>( imagePtr, imageInfo, imageSampler, resultPtr,
944                                                                                 expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
945                             } else {
946                                 log_error("Test error: we should have detected this passing above.\n");
947                             }
948                         }//norm_offset_x
949                     }//norm_offset_y
950                     if( shouldReturn )
951                         return 1;
952                 } // if (!found_pixel)
953 
954                 resultPtr += 4;
955             }
956         }
957     }
958     return 0;
959 }
960 
validate_image_2D_sRGB_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)961 int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
962                                                         ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
963 {
964     // Validate results element by element
965     size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
966     size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
967     /*
968      * FLOAT output type
969      */
970     if( outputType == kFloat )
971     {
972         // Validate float results
973         float *resultPtr = (float *)(char *)resultValues;
974         float expected[4], error=0.0f;
975         float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
976         for( size_t y = 0, j = 0; y < height_lod; y++ )
977         {
978             for( size_t x = 0; x < width_lod; x++, j++ )
979             {
980                 // Step 1: go through and see if the results verify for the pixel
981                 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
982                 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
983                 int checkOnlyOnePixel = 0;
984                 int found_pixel = 0;
985                 float offset = NORM_OFFSET;
986                 if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
987 #if defined( __APPLE__ )
988                     // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
989                     || gDeviceType != CL_DEVICE_TYPE_GPU
990 #endif
991                     )
992                     offset = 0.0f;          // Loop only once
993 
994                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
995                     for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
996 
997 
998                         // Try sampling the pixel, without flushing denormals.
999                         int containsDenormals = 0;
1000                         FloatPixel maxPixel;
1001                         if ( gTestMipmaps )
1002                             maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1003                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
1004                                                                         imageSampler, expected, 0, &containsDenormals, lod );
1005                         else
1006                             maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1007                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
1008                                                                         imageSampler, expected, 0, &containsDenormals );
1009                         float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1010                                                sRGBmap(expected[0]));
1011                         float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1012                                                sRGBmap(expected[1]));
1013                         float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1014                                                sRGBmap(expected[2]));
1015                         float err4 = ABS_ERROR(resultPtr[3], expected[3]);
1016                         float maxErr = 0.5;
1017 
1018                         // Check if the result matches.
1019                         if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
1020                            ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
1021                         {
1022                             //try flushing the denormals, if there is a failure.
1023                             if( containsDenormals )
1024                             {
1025                                 // If implementation decide to flush subnormals to zero,
1026                                 // max error needs to be adjusted
1027                                 maxErr += 4 * FLT_MIN;
1028 
1029                                 if(gTestMipmaps)
1030                                     maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1031                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1032                                                                                  imageSampler, expected, 0, NULL,lod );
1033                                 else
1034                                     maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1035                                                                                  xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1036                                                                                  imageSampler, expected, 0, NULL );
1037 
1038                                 err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1039                                                  sRGBmap(expected[0]));
1040                                 err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1041                                                  sRGBmap(expected[1]));
1042                                 err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1043                                                  sRGBmap(expected[2]));
1044                                 err4 = ABS_ERROR(resultPtr[3], expected[3]);
1045                             }
1046                         }
1047 
1048                         // If the final result DOES match, then we've found a valid result and we're done with this pixel.
1049                         found_pixel = (err1 <= maxErr) && (err2 <= maxErr)  && (err3 <= maxErr) && (err4 <= maxErr);
1050                     }//norm_offset_x
1051                 }//norm_offset_y
1052 
1053 
1054                 // Step 2: If we did not find a match, then print out debugging info.
1055                 if (!found_pixel) {
1056                     // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
1057                     // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1058                     checkOnlyOnePixel = 0;
1059                     int shouldReturn = 0;
1060                     for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
1061                         for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
1062 
1063                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
1064                             // E.g., test one pixel.
1065                             if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
1066                                 norm_offset_x = 0.0f;
1067                                 norm_offset_y = 0.0f;
1068                                 checkOnlyOnePixel = 1;
1069                             }
1070 
1071                             int containsDenormals = 0;
1072                             FloatPixel maxPixel;
1073                             if(gTestMipmaps)
1074                                 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1075                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1076                                                                                         imageSampler, expected, 0, &containsDenormals, lod );
1077                             else
1078                                 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1079                                                                                         xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1080                                                                                         imageSampler, expected, 0, &containsDenormals );
1081 
1082                             float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1083                                                    sRGBmap(expected[0]));
1084                             float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1085                                                    sRGBmap(expected[1]));
1086                             float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1087                                                    sRGBmap(expected[2]));
1088                             float err4 = ABS_ERROR(resultPtr[3], expected[3]);
1089                             float maxErr = 0.6;
1090 
1091                             if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
1092                                ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
1093                             {
1094                                 //try flushing the denormals, if there is a failure.
1095                                 if( containsDenormals )
1096                                 {
1097                                     // If implementation decide to flush subnormals to zero,
1098                                     // max error needs to be adjusted
1099                                     maxErr += 4 * FLT_MIN;
1100                                     if(gTestMipmaps)
1101                                         maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1102                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1103                                                                                      imageSampler, expected, 0, NULL, lod );
1104                                     else
1105                                         maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1106                                                                                      xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1107                                                                                      imageSampler, expected, 0, NULL );
1108 
1109                                     err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1110                                                      sRGBmap(expected[0]));
1111                                     err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1112                                                      sRGBmap(expected[1]));
1113                                     err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1114                                                      sRGBmap(expected[2]));
1115                                     err4 = ABS_ERROR(resultPtr[3], expected[3]);
1116                                 }
1117                             }
1118                             if( ! (err1 <= maxErr) || ! (err2 <= maxErr)    ||
1119                                ! (err3 <= maxErr) || ! (err4 <= maxErr)    )
1120                             {
1121                                 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
1122 
1123                                 float tempOut[4];
1124                                 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
1125                                                                                   expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
1126 
1127                                 log_error( "Step by step:\n" );
1128                                 FloatPixel temp;
1129                                 if( gTestMipmaps )
1130                                      temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
1131                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1132                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
1133                                  else
1134                                      temp = sample_image_pixel_float_offset( imageValues, imageInfo,
1135                                                                                     xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1136                                                                                     imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
1137                                 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f  (max allowed: %2.2f)\n\n",
1138                                                     Ulp_Error( resultPtr[0], expected[0] ),
1139                                                     Ulp_Error( resultPtr[1], expected[1] ),
1140                                                     Ulp_Error( resultPtr[2], expected[2] ),
1141                                                     Ulp_Error( resultPtr[3], expected[3] ),
1142                                                     Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
1143 
1144                             } else {
1145                                 log_error("Test error: we should have detected this passing above.\n");
1146                             }
1147 
1148                         }//norm_offset_x
1149                     }//norm_offset_y
1150                     if( shouldReturn )
1151                         return 1;
1152                 } // if (!found_pixel)
1153 
1154                 resultPtr += 4;
1155             }
1156         }
1157     }
1158     else {
1159         log_error("Test error: NOT SUPPORTED.\n");
1160     }
1161     return 0;
1162 }
1163 
validate_float_write_results(float * expected,float * actual,image_descriptor * imageInfo)1164 bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo )
1165 {
1166     bool pass = true;
1167     // Compare floats
1168     if( memcmp( expected, actual, sizeof( cl_float ) * get_format_channel_count( imageInfo->format ) ) != 0 )
1169     {
1170         // 8.3.3 Fix up cases where we have NaNs or flushed denorms; "all other values must be preserved"
1171         for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
1172         {
1173             if ( isnan( expected[j] ) && isnan( actual[j] ) )
1174                 continue;
1175             if ( IsFloatSubnormal( expected[j] ) && actual[j] == 0.0f )
1176                 continue;
1177             pass = false;
1178             break;
1179         }
1180     }
1181     return pass;
1182 }
1183 
validate_half_write_results(cl_half * expected,cl_half * actual,image_descriptor * imageInfo)1184 bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo )
1185 {
1186     bool pass = true;
1187     // Compare half floats
1188     if (memcmp(expected, actual, sizeof( cl_half ) * get_format_channel_count(imageInfo->format)) != 0) {
1189 
1190         // 8.3.2 Fix up cases where we have NaNs or generated half denormals
1191         for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ ) {
1192             if ( is_half_nan( expected[j] ) && is_half_nan( actual[j] ) )
1193                 continue;
1194             if ( is_half_denorm( expected[j] ) && is_half_zero( actual[j] ) )
1195                 continue;
1196             pass = false;
1197             break;
1198         }
1199     }
1200     return pass;
1201 }
1202 
test_read_image_2D(cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,image_sampler_data * imageSampler,bool useFloatCoords,ExplicitType outputType,MTdata d)1203 int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel kernel,
1204                         image_descriptor *imageInfo, image_sampler_data *imageSampler,
1205                        bool useFloatCoords, ExplicitType outputType, MTdata d )
1206 {
1207     int error;
1208     static int initHalf = 0;
1209     cl_mem imageBuffer;
1210     cl_mem_flags    image_read_write_flags = CL_MEM_READ_ONLY;
1211     size_t threads[2];
1212 
1213     clMemWrapper xOffsets, yOffsets, results;
1214     clSamplerWrapper actualSampler;
1215     BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
1216 
1217     // The DataBuffer template class really does use delete[], not free -- IRO
1218     BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1219     BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1220 
1221     if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
1222         if( DetectFloatToHalfRoundingMode(queue) )
1223             return 1;
1224 
1225     // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
1226     BufferOwningPtr<char> imageValues;
1227     generate_random_image_data( imageInfo, imageValues, d );
1228 
1229     if( gDebugTrace )
1230     {
1231         log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
1232         if( gTestMipmaps )
1233         {
1234             log_info( " - with %d mip levels", (int) imageInfo->num_mip_levels );
1235         }
1236     }
1237 
1238     // Construct testing sources
1239     clProtectedImage protImage;
1240     clMemWrapper unprotImage;
1241     cl_mem image;
1242 
1243     if(gtestTypesToRun & kReadTests)
1244     {
1245         image_read_write_flags = CL_MEM_READ_ONLY;
1246     }
1247     else
1248     {
1249         image_read_write_flags = CL_MEM_READ_WRITE;
1250     }
1251 
1252     if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
1253     {
1254         if (gTestImage2DFromBuffer)
1255         {
1256             generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1257             imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
1258                                           imageInfo->rowPitch * imageInfo->height, maxImageUseHostPtrBackingStore, &error);
1259             test_error( error, "Unable to create buffer" );
1260             unprotImage = create_image_2d_buffer( context,
1261                                           image_read_write_flags,
1262                                           imageInfo->format,
1263                                           imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1264                                           imageBuffer, &error );
1265 
1266         }
1267         else
1268         {
1269             // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
1270             // Do not use protected images for max image size test since it rounds the row size to a page size
1271             if (gTestMaxImages) {
1272                 generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1273                 unprotImage = create_image_2d( context,
1274                                         image_read_write_flags | CL_MEM_USE_HOST_PTR,
1275                                         imageInfo->format,
1276                                         imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1277                                         maxImageUseHostPtrBackingStore, &error );
1278             }
1279             else
1280             {
1281                 error = protImage.Create( context,
1282                                         image_read_write_flags,
1283                                         imageInfo->format, imageInfo->width, imageInfo->height );
1284             }
1285         }
1286 
1287         if( error != CL_SUCCESS )
1288         {
1289             if (gTestImage2DFromBuffer) {
1290                 clReleaseMemObject(imageBuffer);
1291                 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1292                     log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1293                     return 0;
1294                 }
1295             }
1296 
1297             log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1298             return error;
1299         }
1300 
1301         if (gTestMaxImages || gTestImage2DFromBuffer)
1302             image = (cl_mem)unprotImage;
1303         else
1304             image = (cl_mem)protImage;
1305     }
1306     else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
1307     {
1308         if (gTestImage2DFromBuffer)
1309         {
1310             imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1311                                          imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1312             test_error( error, "Unable to create buffer" );
1313             unprotImage = create_image_2d_buffer( context,
1314                                                  image_read_write_flags,
1315                                                  imageInfo->format,
1316                                                  imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1317                                                  imageBuffer, &error );
1318 
1319         }
1320         else
1321         {
1322             // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
1323             unprotImage = create_image_2d( context,
1324                                       image_read_write_flags | CL_MEM_COPY_HOST_PTR,
1325                                       imageInfo->format,
1326                                       imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1327                                       imageValues, &error );
1328         }
1329         if( error != CL_SUCCESS )
1330         {
1331             if (gTestImage2DFromBuffer) {
1332                 clReleaseMemObject(imageBuffer);
1333                 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1334                     log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1335                     return 0;
1336                 }
1337             }
1338 
1339             log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1340             return error;
1341         }
1342         image = unprotImage;
1343     }
1344     else // Either CL_MEM_ALLOC_HOST_PTR or none
1345     {
1346         if( gTestMipmaps )
1347         {
1348             cl_image_desc image_desc = {0};
1349             image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
1350             image_desc.image_width = imageInfo->width;
1351             image_desc.image_height = imageInfo->height;
1352             image_desc.num_mip_levels = imageInfo->num_mip_levels;
1353             unprotImage = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
1354         }
1355         else if (gTestImage2DFromBuffer)
1356         {
1357             imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | gMemFlagsToUse,
1358                                          imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1359             test_error( error, "Unable to create buffer" );
1360             unprotImage = create_image_2d_buffer( context,
1361                                                  image_read_write_flags,
1362                                                  imageInfo->format,
1363                                                  imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1364                                                  imageBuffer, &error );
1365 
1366         }
1367         else
1368         {
1369             // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
1370             // it works just as if no flag is specified, so we just do the same thing either way
1371             unprotImage = create_image_2d( context,
1372                                       image_read_write_flags | gMemFlagsToUse,
1373                                       imageInfo->format,
1374                                       imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1375                                       imageValues, &error );
1376         }
1377         if( error != CL_SUCCESS )
1378         {
1379             if (gTestImage2DFromBuffer) {
1380                 clReleaseMemObject(imageBuffer);
1381                 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1382                     log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1383                     return 0;
1384                 }
1385             }
1386 
1387             log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1388             return error;
1389         }
1390         image = unprotImage;
1391     }
1392 
1393     if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
1394     {
1395         if( gDebugTrace )
1396             log_info( " - Writing image...\n" );
1397 
1398         size_t origin[ 3 ] = { 0, 0, 0 };
1399         size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
1400 
1401         if(!gTestMipmaps)
1402         {
1403             error = clEnqueueWriteImage(queue, image, CL_TRUE,
1404                                         origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0,
1405                                        imageValues, 0, NULL, NULL);
1406             if (error != CL_SUCCESS)
1407             {
1408                 log_error( "ERROR: Unable to write to 2D image of size %d x %d\n", (int)imageInfo->width, (int)imageInfo->height );
1409                 return error;
1410             }
1411         }
1412         else
1413         {
1414             size_t tmpNextLevelOffset = 0;
1415             for(size_t level = 0; level < imageInfo->num_mip_levels; level++)
1416             {
1417                 origin[2] = level;
1418                 error = clEnqueueWriteImage(queue, image, CL_TRUE,
1419                                             origin, region, (( gEnablePitch || gTestImage2DFromBuffer) ? imageInfo->rowPitch : 0 ), 0,
1420                                             (char*)imageValues + tmpNextLevelOffset, 0, NULL, NULL);
1421                 tmpNextLevelOffset += region[0]*region[1]*get_pixel_size(imageInfo->format);
1422                 region[0] = (region[0] >> 1) ? (region[0] >> 1) : 1;
1423                 region[1] = (region[1] >> 1) ? (region[1] >> 1) : 1;
1424             }
1425         }
1426     }
1427 
1428     if( gDebugTrace )
1429         log_info( " - Creating kernel arguments...\n" );
1430 
1431     xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height, xOffsetValues, &error );
1432     test_error( error, "Unable to create x offset buffer" );
1433     yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height, yOffsetValues, &error );
1434     test_error( error, "Unable to create y offset buffer" );
1435     results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE),  get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->height, NULL, &error );
1436     test_error( error, "Unable to create result buffer" );
1437 
1438     // Create sampler to use
1439     actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
1440     test_error(error, "Unable to create image sampler");
1441 
1442     // Set arguments
1443     int idx = 0;
1444     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
1445     test_error( error, "Unable to set kernel arguments" );
1446     if( !gUseKernelSamplers )
1447     {
1448         error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
1449         test_error( error, "Unable to set kernel arguments" );
1450     }
1451     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
1452     test_error( error, "Unable to set kernel arguments" );
1453     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
1454     test_error( error, "Unable to set kernel arguments" );
1455     error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
1456     test_error( error, "Unable to set kernel arguments" );
1457 
1458     // A cast of troublesome offsets. The first one has to be zero.
1459     const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
1460     int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
1461     int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
1462     int loopCount = 2 * float_offset_count;
1463     if( ! useFloatCoords )
1464         loopCount = 1;
1465     if (gTestMaxImages) {
1466         loopCount = 1;
1467       log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
1468     }
1469 
1470     if(gtestTypesToRun & kReadWriteTests)
1471     {
1472         loopCount = 1;
1473     }
1474 
1475     // Get the maximum absolute error for this format
1476     double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
1477     if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
1478 
1479     if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
1480         initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
1481         if (initHalf) {
1482             log_info("Half rounding mode successfully detected.\n");
1483         }
1484     }
1485 
1486     size_t nextLevelOffset = 0;
1487     size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
1488     for( size_t lod = 0; (gTestMipmaps && (lod < imageInfo->num_mip_levels))|| (!gTestMipmaps && lod < 1); lod ++)
1489     {
1490         size_t resultValuesSize = width_lod * height_lod * get_explicit_type_size( outputType ) * 4;
1491         BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
1492         float lod_float = (float)lod;
1493         char *imagePtr = (char *)imageValues + nextLevelOffset;
1494         if( gTestMipmaps )
1495         {
1496             if(gDebugTrace)
1497                 log_info("\t- Working at mip level %d\n", lod);
1498             error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
1499         }
1500 
1501         // Validate results element by element
1502         for( int q = 0; q < loopCount; q++ )
1503         {
1504             float offset = float_offsets[ q % float_offset_count ];
1505 
1506             // Init the coordinates
1507             InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues,
1508                                 q>=float_offset_count ? -offset: offset,
1509                                 q>=float_offset_count ? offset: -offset, imageSampler->normalized_coords, d, lod );
1510 
1511             error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, xOffsetValues, 0, NULL, NULL );
1512             test_error( error, "Unable to write x offsets" );
1513             error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, yOffsetValues, 0, NULL, NULL );
1514             test_error( error, "Unable to write y offsets" );
1515 
1516             // Get results
1517             memset( resultValues, 0xff, resultValuesSize );
1518             clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
1519 
1520             // Run the kernel
1521             threads[0] = (size_t)width_lod;
1522             threads[1] = (size_t)height_lod;
1523             error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
1524             test_error( error, "Unable to run kernel" );
1525 
1526             if( gDebugTrace )
1527                 log_info( "    reading results, %ld kbytes\n", (unsigned long)( width_lod * height_lod * get_explicit_type_size( outputType ) * 4 / 1024 ) );
1528 
1529             error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL ); //XXX check
1530             test_error( error, "Unable to read results from kernel" );
1531             if( gDebugTrace )
1532                 log_info( "    results read\n" );
1533 
1534             int retCode;
1535             switch (imageInfo->format->image_channel_order) {
1536             case CL_DEPTH:
1537                 retCode = validate_image_2D_depth_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1538                 break;
1539             case CL_sRGB:
1540             case CL_sRGBx:
1541             case CL_sRGBA:
1542             case CL_sBGRA:
1543                 retCode = validate_image_2D_sRGB_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1544                 break;
1545             default:
1546                 retCode = validate_image_2D_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1547             }
1548             if (retCode)
1549                 return retCode;
1550         }
1551         end:
1552         if ( gTestMipmaps )
1553         {
1554             nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format );
1555             width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
1556             height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
1557         }
1558     }
1559 
1560     if (gTestImage2DFromBuffer) clReleaseMemObject(imageBuffer);
1561 
1562     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
1563 }
1564 
test_read_image_set_2D(cl_device_id device,cl_context context,cl_command_queue queue,cl_image_format * format,image_sampler_data * imageSampler,bool floatCoords,ExplicitType outputType)1565 int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
1566                         bool floatCoords, ExplicitType outputType )
1567 {
1568     char programSrc[10240];
1569     const char *ptr;
1570     const char *readFormat;
1571     clProgramWrapper program;
1572     clKernelWrapper kernel;
1573     const char *KernelSourcePattern = NULL;
1574 
1575     if (gTestImage2DFromBuffer)
1576     {
1577         if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
1578         {
1579             switch (format->image_channel_data_type)
1580             {
1581                 case CL_UNORM_INT8:
1582                 case CL_UNORM_INT16:
1583                 case CL_SNORM_INT8:
1584                 case CL_SNORM_INT16:
1585                 case CL_HALF_FLOAT:
1586                 case CL_FLOAT:
1587                 case CL_SIGNED_INT8:
1588                 case CL_SIGNED_INT16:
1589                 case CL_SIGNED_INT32:
1590                 case CL_UNSIGNED_INT8:
1591                 case CL_UNSIGNED_INT16:
1592                 case CL_UNSIGNED_INT32:
1593                     log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
1594                              GetChannelTypeName( format->image_channel_data_type ));
1595                     return 0;
1596                 default:
1597                     break;
1598             }
1599         }
1600     }
1601 
1602 
1603     RandomSeed seed( gRandomSeed );
1604     int error;
1605 
1606     // Get our operating params
1607     size_t maxWidth, maxHeight;
1608     cl_ulong maxAllocSize, memSize;
1609     image_descriptor imageInfo = { 0x0 };
1610     size_t pixelSize;
1611 
1612     imageInfo.format = format;
1613     imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
1614     imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
1615     pixelSize = get_pixel_size( imageInfo.format );
1616 
1617     error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
1618     error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
1619     error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
1620     error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
1621     test_error( error, "Unable to get max image 2D size from device" );
1622 
1623     if (memSize > (cl_ulong)SIZE_MAX) {
1624       memSize = (cl_ulong)SIZE_MAX;
1625     }
1626 
1627     // Determine types
1628     if( outputType == kInt )
1629         readFormat = "i";
1630     else if( outputType == kUInt )
1631         readFormat = "ui";
1632     else // kFloat
1633         readFormat = "f";
1634 
1635     // Construct the source
1636     const char *samplerArg = samplerKernelArg;
1637     char samplerVar[ 1024 ] = "";
1638     if( gUseKernelSamplers )
1639     {
1640         get_sampler_kernel_code( imageSampler, samplerVar );
1641         samplerArg = "";
1642     }
1643 
1644     if(gtestTypesToRun & kReadTests)
1645     {
1646         KernelSourcePattern = read2DKernelSourcePattern;
1647     }
1648     else
1649     {
1650         KernelSourcePattern = read_write2DKernelSourcePattern;
1651     }
1652 
1653 
1654     sprintf( programSrc, KernelSourcePattern,
1655             (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
1656             samplerArg, get_explicit_type_name( outputType ),
1657             (format->image_channel_order == CL_DEPTH) ? "" : "4",
1658             gTestMipmaps?", float lod":" ",
1659             samplerVar,
1660             gTestMipmaps? lodOffsetSource : offsetSource,
1661             floatCoords ? floatKernelSource : intCoordKernelSource,
1662             readFormat,
1663             gTestMipmaps?", lod":" ");
1664 
1665     ptr = programSrc;
1666     error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
1667     test_error( error, "Unable to create testing kernel" );
1668 
1669     if( gTestSmallImages )
1670     {
1671         for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
1672         {
1673             imageInfo.rowPitch = imageInfo.width * pixelSize;
1674             for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
1675             {
1676                 if( gTestMipmaps )
1677                 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1678 
1679                 if( gDebugTrace )
1680                     log_info( "   at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
1681 
1682                 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1683                 if( retCode )
1684                     return retCode;
1685             }
1686         }
1687     }
1688     else if( gTestMaxImages )
1689     {
1690         // Try a specific set of maximum sizes
1691         size_t numbeOfSizes;
1692         size_t sizes[100][3];
1693 
1694         get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format, CL_TRUE);
1695 
1696         for( size_t idx = 0; idx < numbeOfSizes; idx++ )
1697         {
1698             imageInfo.width = sizes[ idx ][ 0 ];
1699             imageInfo.height = sizes[ idx ][ 1 ];
1700             imageInfo.rowPitch = imageInfo.width * pixelSize;
1701             log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
1702 
1703             if( gTestMipmaps )
1704                 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1705 
1706             if( gDebugTrace )
1707                 log_info( "   at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
1708             int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1709             if( retCode )
1710                 return retCode;
1711         }
1712     }
1713     else if( gTestRounding )
1714     {
1715         uint64_t typeRange = 1LL << ( get_format_type_size( imageInfo.format ) * 8 );
1716         typeRange /= pixelSize / get_format_type_size( imageInfo.format );
1717         imageInfo.height = (size_t)( ( typeRange + 255LL ) / 256LL );
1718         imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
1719         while( imageInfo.height >= maxHeight / 2 )
1720         {
1721             imageInfo.width <<= 1;
1722             imageInfo.height >>= 1;
1723         }
1724 
1725         while( imageInfo.width >= maxWidth / 2 )
1726             imageInfo.width >>= 1;
1727         imageInfo.rowPitch = imageInfo.width * pixelSize;
1728 
1729         gRoundingStartValue = 0;
1730         do
1731         {
1732             if( gDebugTrace )
1733                 log_info( "   at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.height, gRoundingStartValue, typeRange );
1734             int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1735             if( retCode )
1736                 return retCode;
1737 
1738             gRoundingStartValue += imageInfo.width * imageInfo.height * pixelSize / get_format_type_size( imageInfo.format );
1739 
1740         } while( gRoundingStartValue < typeRange );
1741     }
1742     else
1743     {
1744         cl_uint imagePitchAlign = 0;
1745         if (gTestImage2DFromBuffer)
1746         {
1747 #if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT)
1748             error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof( cl_uint ), &imagePitchAlign, NULL );
1749 #endif
1750             if (!imagePitchAlign || error) {
1751               test_error( error, "Unable to get CL_DEVICE_IMAGE_PITCH_ALIGNMENT from device" );
1752               imagePitchAlign = 1;
1753             }
1754         }
1755 
1756         int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
1757         int maxHeightRange = (int) reduceImageSizeRange(maxHeight);
1758 
1759         for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
1760         {
1761             cl_ulong size;
1762             // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
1763             // image, the result array, plus offset arrays, will fit in the global ram space
1764             do
1765             {
1766                 imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, seed );
1767                 imageInfo.height = (size_t)random_log_in_range( 16, maxHeightRange, seed );
1768 
1769                 imageInfo.rowPitch = imageInfo.width * pixelSize;
1770                 if( gTestMipmaps )
1771                 {
1772                     imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1773                     size = 4 * compute_mipmapped_image_size(imageInfo);
1774                 }
1775                 else
1776                 {
1777                     if( gEnablePitch )
1778                     {
1779                         size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
1780                         imageInfo.rowPitch += extraWidth * pixelSize;
1781                     }
1782 
1783                 // if we are creating a 2D image from a buffer, make sure that the rowpitch is aligned to CL_DEVICE_IMAGE_PITCH_ALIGNMENT_APPLE
1784                     if (gTestImage2DFromBuffer)
1785                     {
1786                         size_t pitch = imagePitchAlign * pixelSize;
1787                         imageInfo.rowPitch = ((imageInfo.rowPitch + pitch - 1) / pitch ) * pitch;
1788                     }
1789 
1790                     size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
1791                 }
1792             } while(  size > maxAllocSize || ( size * 3 ) > memSize );
1793 
1794             if( gDebugTrace )
1795                 log_info( "   at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
1796             int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1797             if( retCode )
1798                 return retCode;
1799         }
1800     }
1801 
1802     return 0;
1803 }
1804