1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "test_common.h"
17 #include <float.h>
18
19 #if defined( __APPLE__ )
20 #include <signal.h>
21 #include <sys/signal.h>
22 #include <setjmp.h>
23 #endif
24
25 #define MAX_ERR 0.005f
26 #define MAX_HALF_LINEAR_ERR 0.3f
27
28 extern bool gDebugTrace, gExtraValidateInfo, gDisableOffsets, gTestSmallImages, gEnablePitch, gTestMaxImages, gTestImage2DFromBuffer, gTestMipmaps;
29 extern bool gUseKernelSamplers;
30 extern cl_filter_mode gFilterModeToUse;
31 extern cl_addressing_mode gAddressModeToUse;
32 extern uint64_t gRoundingStartValue;
33 extern cl_mem_flags gMemFlagsToUse;
34 extern int gtestTypesToRun;
35 extern bool gDeviceLt20;
36
37 #define MAX_TRIES 1
38 #define MAX_CLAMPED 1
39
40 // Utility function to clamp down image sizes for certain tests to avoid
41 // using too much memory.
reduceImageSizeRange(size_t maxDimSize)42 static size_t reduceImageSizeRange(size_t maxDimSize) {
43 size_t DimSize = maxDimSize/32;
44 if (DimSize < (size_t) 16)
45 return 16;
46 else if (DimSize > (size_t) 256)
47 return 256;
48 else
49 return DimSize;
50 }
51
52 const char *read2DKernelSourcePattern =
53 "__kernel void sample_kernel( read_only %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
54 "{\n"
55 "%s"
56 " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
57 "%s"
58 "%s"
59 " results[offset] = read_image%s( input, imageSampler, coords %s);\n"
60 "}";
61
62 const char *read_write2DKernelSourcePattern =
63 "__kernel void sample_kernel( read_write %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
64 "{\n"
65 "%s"
66 " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
67 "%s"
68 "%s"
69 " results[offset] = read_image%s( input, coords %s);\n"
70 "}";
71
72 const char *intCoordKernelSource =
73 " int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n";
74
75 const char *floatKernelSource =
76 " float2 coords = (float2)( (float)( xOffsets[offset] ), (float)( yOffsets[offset] ) );\n";
77
78 static const char *samplerKernelArg = " sampler_t imageSampler,";
79
80 static const char *lodOffsetSource =
81 " unsigned int lod_int = (unsigned int) lod;\n"
82 " int width_lod = (get_image_width(input) >> lod_int) ?(get_image_width(input) >> lod_int):1 ;\n"
83 " int offset = tidY*width_lod + tidX;\n";
84
85 static const char *offsetSource =
86 " int offset = tidY*get_image_width(input) + tidX;\n";
87
88 extern void read_image_pixel_float( void *imageData, image_descriptor *imageInfo,
89 int x, int y, int z, float *outData );
determine_validation_error(void * imagePtr,image_descriptor * imageInfo,image_sampler_data * imageSampler,T * resultPtr,T * expected,float error,float x,float y,float xAddressOffset,float yAddressOffset,size_t j,int & numTries,int & numClamped,bool printAsFloat,int lod=0)90 template <class T> int determine_validation_error( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
91 T *resultPtr, T * expected, float error,
92 float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod = 0 )
93 {
94 int actualX, actualY;
95 int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL, lod );
96 bool clampingErr = false, clamped = false, otherClampingBug = false;
97 int clampedX, clampedY, ignoreMe;
98
99 clamped = get_integer_coords_offset( x, y, 0.f, xAddressOffset, yAddressOffset, 0.0f, imageInfo->width, imageInfo->height, 0, imageSampler, imageInfo, clampedX, clampedY, ignoreMe );
100
101 if( found )
102 {
103 // Is it a clamping bug?
104 if( clamped && clampedX == actualX && clampedY == actualY )
105 {
106 if( (--numClamped) == 0 )
107 {
108 log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
109 if (imageInfo->format->image_channel_order == CL_DEPTH)
110 {
111 if( printAsFloat )
112 {
113 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot (%g),\n\terror of %g\n",
114 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
115 }
116 else
117 {
118 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot (%x)\n",
119 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
120 }
121 }
122 else
123 {
124 if( printAsFloat )
125 {
126 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g),\n\terror of %g\n",
127 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
128 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
129 }
130 else
131 {
132 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n",
133 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
134 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
135 }
136 }
137 return 1;
138 }
139 clampingErr = true;
140 otherClampingBug = true;
141 }
142 }
143 if( clamped && !otherClampingBug )
144 {
145 // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
146 if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
147 {
148 if( (--numClamped) == 0 )
149 {
150 log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
151 if (imageInfo->format->image_channel_order == CL_DEPTH)
152 {
153 if( printAsFloat )
154 {
155 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot (%g),\n\terror of %g\n",
156 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
157 }
158 else
159 {
160 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot (%x)\n",
161 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
162 }
163 }
164 else
165 {
166 if( printAsFloat )
167 {
168 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g),\n\terror of %g\n",
169 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
170 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
171 }
172 else
173 {
174 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n",
175 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
176 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
177 }
178 }
179 return 1;
180 }
181 clampingErr = true;
182 }
183 }
184 if( !clampingErr )
185 {
186 if (imageInfo->format->image_channel_order == CL_DEPTH)
187 {
188 if( printAsFloat )
189 {
190 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot (%g),\n\terror of %g\n",
191 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
192 }
193 else
194 {
195 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot (%x)\n",
196 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
197 }
198 }
199 else
200 {
201 if( printAsFloat )
202 {
203 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g), error of %g\n",
204 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
205 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
206 }
207 else
208 {
209 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n",
210 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
211 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
212 }
213 }
214 log_error( "img size %ld,%ld (pitch %ld)", imageInfo->width, imageInfo->height, imageInfo->rowPitch );
215 if( clamped )
216 {
217 log_error( " which would clamp to %d,%d\n", clampedX, clampedY );
218 }
219 if( printAsFloat && gExtraValidateInfo)
220 {
221 log_error( "Nearby values:\n" );
222 log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
223 for( int yOff = -2; yOff <= 1; yOff++ )
224 {
225 float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
226 read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
227 read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
228 read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
229 read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
230 if (imageInfo->format->image_channel_order == CL_DEPTH)
231 {
232 log_error( "%d\t(%g)",clampedY + yOff, top[0] );
233 log_error( " (%g)", real[0] );
234 log_error( " (%g)",bot[0] );
235 log_error( " (%g)\n",bot2[0] );
236 }
237 else
238 {
239 log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
240 log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
241 log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
242 log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
243 }
244 }
245
246 if( clampedY < 1 )
247 {
248 log_error( "Nearby values:\n" );
249 log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
250 for( int yOff = (int)imageInfo->height - 2; yOff <= (int)imageInfo->height + 1; yOff++ )
251 {
252 float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
253 read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
254 read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
255 read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
256 read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
257 if (imageInfo->format->image_channel_order == CL_DEPTH)
258 {
259 log_error( "%d\t(%g)",clampedY + yOff, top[0] );
260 log_error( " (%g)", real[0] );
261 log_error( " (%g)",bot[0] );
262 log_error( " (%g)\n",bot2[0] );
263 }
264 else
265 {
266 log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
267 log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
268 log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
269 log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
270 }
271 }
272 }
273 }
274
275 if( imageSampler->filter_mode != CL_FILTER_LINEAR )
276 {
277 if( found )
278 log_error( "\tValue really found in image at %d,%d (%s)\n", actualX, actualY, ( found > 1 ) ? "NOT unique!!" : "unique" );
279 else
280 log_error( "\tValue not actually found in image\n" );
281 }
282 log_error( "\n" );
283
284 numClamped = -1; // We force the clamped counter to never work
285 if( ( --numTries ) == 0 )
286 {
287 return 1;
288 }
289 }
290 return 0;
291 }
292
293 #define CLAMP( _val, _min, _max ) ((_val) < (_min) ? (_min) : (_val) > (_max) ? (_max) : (_val))
294
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float xfract,float yfract,int normalized_coords,MTdata d)295 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d )
296 {
297 size_t i = 0;
298 if( gDisableOffsets )
299 {
300 for( size_t y = 0; y < imageInfo->height; y++ )
301 {
302 for( size_t x = 0; x < imageInfo->width; x++, i++ )
303 {
304 xOffsets[ i ] = (float) (xfract + (double) x);
305 yOffsets[ i ] = (float) (yfract + (double) y);
306 }
307 }
308 }
309 else
310 {
311 for( size_t y = 0; y < imageInfo->height; y++ )
312 {
313 for( size_t x = 0; x < imageInfo->width; x++, i++ )
314 {
315 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
316 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
317 }
318 }
319 }
320
321 if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
322 {
323 i = 0;
324 for( size_t y = 0; y < imageInfo->height; y++ )
325 {
326 for( size_t x = 0; x < imageInfo->width; x++, i++ )
327 {
328 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
329 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)imageInfo->height - 1.0);
330 }
331 }
332 }
333
334 if( normalized_coords )
335 {
336 i = 0;
337 for( size_t y = 0; y < imageInfo->height; y++ )
338 {
339 for( size_t x = 0; x < imageInfo->width; x++, i++ )
340 {
341 xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
342 yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
343 }
344 }
345 }
346 }
347
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float xfract,float yfract,int normalized_coords,MTdata d,size_t lod)348 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d, size_t lod)
349 {
350 size_t i = 0;
351 size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
352
353 if( gTestMipmaps )
354 {
355 width_lod = (imageInfo->width >> lod)?(imageInfo->width >> lod):1;
356 height_lod = (imageInfo->height >> lod)?(imageInfo->height >> lod):1;
357 }
358 if( gDisableOffsets )
359 {
360 for( size_t y = 0; y < height_lod; y++ )
361 {
362 for( size_t x = 0; x < width_lod; x++, i++ )
363 {
364 xOffsets[ i ] = (float) (xfract + (float) x);
365 yOffsets[ i ] = (float) (yfract + (float) y);
366 }
367 }
368 }
369 else
370 {
371 for( size_t y = 0; y < height_lod; y++ )
372 {
373 for( size_t x = 0; x < width_lod; x++, i++ )
374 {
375 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
376 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
377 }
378 }
379 }
380
381 if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
382 {
383 i = 0;
384 for( size_t y = 0; y < height_lod; y++ )
385 {
386 for( size_t x = 0; x < width_lod; x++, i++ )
387 {
388 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) width_lod - 1.0);
389 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)height_lod - 1.0);
390 }
391 }
392 }
393
394 if( normalized_coords )
395 {
396 i = 0;
397 for( size_t y = 0; y < height_lod; y++ )
398 {
399 for( size_t x = 0; x < width_lod; x++, i++ )
400 {
401 xOffsets[ i ] = (float) ((float) xOffsets[ i ] / (float) width_lod);
402 yOffsets[ i ] = (float) ((float) yOffsets[ i ] / (float) height_lod);
403 }
404 }
405 }
406 }
407 #ifndef MAX
408 #define MAX( _a, _b ) ((_a) > (_b) ? (_a) : (_b))
409 #endif
410
validate_image_2D_depth_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)411 int validate_image_2D_depth_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
412 ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
413 {
414 // Validate results element by element
415 size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
416 size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
417 /*
418 * FLOAT output type
419 */
420 if( outputType == kFloat )
421 {
422 // Validate float results
423 float *resultPtr = (float *)(char *)resultValues;
424 float expected[4], error=0.0f;
425 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
426 for( size_t y = 0, j = 0; y < height_lod; y++ )
427 {
428 for( size_t x = 0; x < width_lod; x++, j++ )
429 {
430 // Step 1: go through and see if the results verify for the pixel
431 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
432 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
433 int checkOnlyOnePixel = 0;
434 int found_pixel = 0;
435 float offset = NORM_OFFSET;
436 if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
437 #if defined( __APPLE__ )
438 // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
439 || gDeviceType != CL_DEVICE_TYPE_GPU
440 #endif
441 )
442 offset = 0.0f; // Loop only once
443
444 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
445 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
446
447 // Try sampling the pixel, without flushing denormals.
448 int containsDenormals = 0;
449 FloatPixel maxPixel;
450 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
451 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
452 imageSampler, expected, 0, &containsDenormals );
453
454 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
455 // Clamp to the minimum absolute error for the format
456 if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
457 float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
458
459 // Check if the result matches.
460 if( ! (err1 <= maxErr1) )
461 {
462 //try flushing the denormals, if there is a failure.
463 if( containsDenormals )
464 {
465 // If implementation decide to flush subnormals to zero,
466 // max error needs to be adjusted
467 maxErr1 += 4 * FLT_MIN;
468
469 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
470 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
471 imageSampler, expected, 0, NULL );
472
473 err1 = ABS_ERROR(resultPtr[0], expected[0]);
474 }
475 }
476
477 // If the final result DOES match, then we've found a valid result and we're done with this pixel.
478 found_pixel = (err1 <= maxErr1);
479 }//norm_offset_x
480 }//norm_offset_y
481
482
483 // Step 2: If we did not find a match, then print out debugging info.
484 if (!found_pixel) {
485 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
486 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
487 checkOnlyOnePixel = 0;
488 int shouldReturn = 0;
489 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
490 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
491
492 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
493 // E.g., test one pixel.
494 if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
495 norm_offset_x = 0.0f;
496 norm_offset_y = 0.0f;
497 checkOnlyOnePixel = 1;
498 }
499
500 int containsDenormals = 0;
501 FloatPixel maxPixel;
502 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
503 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
504 imageSampler, expected, 0, &containsDenormals );
505
506 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
507 float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
508
509
510 if( ! (err1 <= maxErr1) )
511 {
512 //try flushing the denormals, if there is a failure.
513 if( containsDenormals )
514 {
515 maxErr1 += 4 * FLT_MIN;
516
517 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
518 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
519 imageSampler, expected, 0, NULL );
520
521 err1 = ABS_ERROR(resultPtr[0], expected[0]);
522 }
523 }
524 if( ! (err1 <= maxErr1) )
525 {
526 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
527
528 float tempOut[4];
529 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
530 expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
531
532 log_error( "Step by step:\n" );
533 FloatPixel temp;
534 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
535 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
536 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
537 log_error( "\tulps: %2.2f (max allowed: %2.2f)\n\n",
538 Ulp_Error( resultPtr[0], expected[0] ),
539 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
540
541 } else {
542 log_error("Test error: we should have detected this passing above.\n");
543 }
544
545 }//norm_offset_x
546 }//norm_offset_y
547 if( shouldReturn )
548 return 1;
549 } // if (!found_pixel)
550
551 resultPtr += 1;
552 }
553 }
554 }
555 else
556 {
557 log_error("Test error: Not supported format.\n");
558 return 1;
559 }
560 return 0;
561 }
562
validate_image_2D_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)563 int validate_image_2D_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
564 ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
565 {
566 // Validate results element by element
567 size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
568 size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
569 /*
570 * FLOAT output type
571 */
572 if( outputType == kFloat )
573 {
574 // Validate float results
575 float *resultPtr = (float *)(char *)resultValues;
576 float expected[4], error=0.0f;
577 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
578 for( size_t y = 0, j = 0; y < height_lod; y++ )
579 {
580 for( size_t x = 0; x < width_lod; x++, j++ )
581 {
582 // Step 1: go through and see if the results verify for the pixel
583 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
584 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
585 int checkOnlyOnePixel = 0;
586 int found_pixel = 0;
587 float offset = NORM_OFFSET;
588 if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
589 #if defined( __APPLE__ )
590 // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
591 || gDeviceType != CL_DEVICE_TYPE_GPU
592 #endif
593 )
594 offset = 0.0f; // Loop only once
595
596 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
597 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
598
599
600 // Try sampling the pixel, without flushing denormals.
601 int containsDenormals = 0;
602 FloatPixel maxPixel;
603 if ( gTestMipmaps )
604 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
605 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
606 imageSampler, expected, 0, &containsDenormals, lod );
607 else
608 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
609 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
610 imageSampler, expected, 0, &containsDenormals );
611
612 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
613 float err2 = ABS_ERROR(resultPtr[1], expected[1]);
614 float err3 = ABS_ERROR(resultPtr[2], expected[2]);
615 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
616 // Clamp to the minimum absolute error for the format
617 if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
618 if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
619 if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
620 if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
621 float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
622 float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
623 float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
624 float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
625
626 // Check if the result matches.
627 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
628 ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
629 {
630 //try flushing the denormals, if there is a failure.
631 if( containsDenormals )
632 {
633 // If implementation decide to flush subnormals to zero,
634 // max error needs to be adjusted
635 maxErr1 += 4 * FLT_MIN;
636 maxErr2 += 4 * FLT_MIN;
637 maxErr3 += 4 * FLT_MIN;
638 maxErr4 += 4 * FLT_MIN;
639
640 if(gTestMipmaps)
641 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
642 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
643 imageSampler, expected, 0, NULL,lod );
644 else
645 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
646 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
647 imageSampler, expected, 0, NULL );
648
649 err1 = ABS_ERROR(resultPtr[0], expected[0]);
650 err2 = ABS_ERROR(resultPtr[1], expected[1]);
651 err3 = ABS_ERROR(resultPtr[2], expected[2]);
652 err4 = ABS_ERROR(resultPtr[3], expected[3]);
653 }
654 }
655
656 // If the final result DOES match, then we've found a valid result and we're done with this pixel.
657 found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2) && (err3 <= maxErr3) && (err4 <= maxErr4);
658 }//norm_offset_x
659 }//norm_offset_y
660
661
662 // Step 2: If we did not find a match, then print out debugging info.
663 if (!found_pixel) {
664 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
665 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
666 checkOnlyOnePixel = 0;
667 int shouldReturn = 0;
668 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
669 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
670
671 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
672 // E.g., test one pixel.
673 if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
674 norm_offset_x = 0.0f;
675 norm_offset_y = 0.0f;
676 checkOnlyOnePixel = 1;
677 }
678
679 int containsDenormals = 0;
680 FloatPixel maxPixel;
681 if(gTestMipmaps)
682 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
683 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
684 imageSampler, expected, 0, &containsDenormals, lod );
685 else
686 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
687 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
688 imageSampler, expected, 0, &containsDenormals );
689
690 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
691 float err2 = ABS_ERROR(resultPtr[1], expected[1]);
692 float err3 = ABS_ERROR(resultPtr[2], expected[2]);
693 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
694 float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
695 float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
696 float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
697 float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
698
699
700 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
701 ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
702 {
703 //try flushing the denormals, if there is a failure.
704 if( containsDenormals )
705 {
706 maxErr1 += 4 * FLT_MIN;
707 maxErr2 += 4 * FLT_MIN;
708 maxErr3 += 4 * FLT_MIN;
709 maxErr4 += 4 * FLT_MIN;
710
711 if(gTestMipmaps)
712 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
713 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
714 imageSampler, expected, 0, NULL, lod );
715 else
716 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
717 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
718 imageSampler, expected, 0, NULL );
719
720 err1 = ABS_ERROR(resultPtr[0], expected[0]);
721 err2 = ABS_ERROR(resultPtr[1], expected[1]);
722 err3 = ABS_ERROR(resultPtr[2], expected[2]);
723 err4 = ABS_ERROR(resultPtr[3], expected[3]);
724 }
725 }
726 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
727 ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
728 {
729 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
730
731 float tempOut[4];
732 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
733 expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
734
735 log_error( "Step by step:\n" );
736 FloatPixel temp;
737 if( gTestMipmaps )
738 temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
739 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
740 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
741 else
742 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
743 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
744 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
745 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n",
746 Ulp_Error( resultPtr[0], expected[0] ),
747 Ulp_Error( resultPtr[1], expected[1] ),
748 Ulp_Error( resultPtr[2], expected[2] ),
749 Ulp_Error( resultPtr[3], expected[3] ),
750 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
751
752 } else {
753 log_error("Test error: we should have detected this passing above.\n");
754 }
755
756 }//norm_offset_x
757 }//norm_offset_y
758 if( shouldReturn )
759 return 1;
760 } // if (!found_pixel)
761
762 resultPtr += 4;
763 }
764 }
765 }
766 /*
767 * UINT output type
768 */
769 else if( outputType == kUInt )
770 {
771 // Validate unsigned integer results
772 unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
773 unsigned int expected[4];
774 float error;
775 for( size_t y = 0, j = 0; y < height_lod ; y++ )
776 {
777 for( size_t x = 0; x < width_lod ; x++, j++ )
778 {
779 // Step 1: go through and see if the results verify for the pixel
780 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
781 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
782 int checkOnlyOnePixel = 0;
783 int found_pixel = 0;
784 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
785 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
786
787 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
788 // E.g., test one pixel.
789 if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
790 norm_offset_x = 0.0f;
791 norm_offset_y = 0.0f;
792 checkOnlyOnePixel = 1;
793 }
794
795 if ( gTestMipmaps )
796 sample_image_pixel_offset<unsigned int>( (char*)imagePtr, imageInfo,
797 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
798 imageSampler, expected, lod );
799 else
800 sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
801 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
802 imageSampler, expected);
803
804
805 error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
806 errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
807
808 if (error <= MAX_ERR)
809 found_pixel = 1;
810 }//norm_offset_x
811 }//norm_offset_y
812
813 // Step 2: If we did not find a match, then print out debugging info.
814 if (!found_pixel) {
815 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
816 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
817 checkOnlyOnePixel = 0;
818 int shouldReturn = 0;
819 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
820 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
821
822 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
823 // E.g., test one pixel.
824 if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
825 norm_offset_x = 0.0f;
826 norm_offset_y = 0.0f;
827 checkOnlyOnePixel = 1;
828 }
829
830 if( gTestMipmaps )
831 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
832 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
833 imageSampler, expected, lod );
834 else
835 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
836 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
837 imageSampler, expected);
838
839
840 error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
841 errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
842
843 if( error > MAX_ERR )
844 {
845 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
846
847 shouldReturn |= determine_validation_error<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
848 expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
849 } else {
850 log_error("Test error: we should have detected this passing above.\n");
851 }
852 }//norm_offset_x
853 }//norm_offset_y
854 if( shouldReturn )
855 return 1;
856 } // if (!found_pixel)
857
858 resultPtr += 4;
859 }
860 }
861 }
862 /*
863 * INT output type
864 */
865 else
866 {
867 // Validate integer results
868 int *resultPtr = (int *)(char *)resultValues;
869 int expected[4];
870 float error;
871 for( size_t y = 0, j = 0; y < height_lod ; y++ )
872 {
873 for( size_t x = 0; x < width_lod; x++, j++ )
874 {
875 // Step 1: go through and see if the results verify for the pixel
876 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
877 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
878 int checkOnlyOnePixel = 0;
879 int found_pixel = 0;
880 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
881 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
882
883 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
884 // E.g., test one pixel.
885 if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
886 norm_offset_x = 0.0f;
887 norm_offset_y = 0.0f;
888 checkOnlyOnePixel = 1;
889 }
890
891 if ( gTestMipmaps )
892 sample_image_pixel_offset<int>( imagePtr, imageInfo,
893 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
894 imageSampler, expected , lod);
895 else
896 sample_image_pixel_offset<int>( imageValues, imageInfo,
897 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
898 imageSampler, expected );
899
900
901 error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
902 errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
903
904 if (error <= MAX_ERR)
905 found_pixel = 1;
906 }//norm_offset_x
907 }//norm_offset_y
908
909 // Step 2: If we did not find a match, then print out debugging info.
910 if (!found_pixel) {
911 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
912 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
913 checkOnlyOnePixel = 0;
914 int shouldReturn = 0;
915 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
916 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
917
918 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
919 // E.g., test one pixel.
920 if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
921 norm_offset_x = 0.0f;
922 norm_offset_y = 0.0f;
923 checkOnlyOnePixel = 1;
924 }
925
926 if ( gTestMipmaps )
927 sample_image_pixel_offset<int>( imageValues, imageInfo,
928 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
929 imageSampler, expected, lod );
930 else
931 sample_image_pixel_offset<int>( imageValues, imageInfo,
932 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
933 imageSampler, expected );
934
935
936 error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
937 errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
938
939 if( error > MAX_ERR )
940 {
941 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
942
943 shouldReturn |= determine_validation_error<int>( imagePtr, imageInfo, imageSampler, resultPtr,
944 expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
945 } else {
946 log_error("Test error: we should have detected this passing above.\n");
947 }
948 }//norm_offset_x
949 }//norm_offset_y
950 if( shouldReturn )
951 return 1;
952 } // if (!found_pixel)
953
954 resultPtr += 4;
955 }
956 }
957 }
958 return 0;
959 }
960
validate_image_2D_sRGB_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)961 int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
962 ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
963 {
964 // Validate results element by element
965 size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
966 size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
967 /*
968 * FLOAT output type
969 */
970 if( outputType == kFloat )
971 {
972 // Validate float results
973 float *resultPtr = (float *)(char *)resultValues;
974 float expected[4], error=0.0f;
975 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
976 for( size_t y = 0, j = 0; y < height_lod; y++ )
977 {
978 for( size_t x = 0; x < width_lod; x++, j++ )
979 {
980 // Step 1: go through and see if the results verify for the pixel
981 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
982 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
983 int checkOnlyOnePixel = 0;
984 int found_pixel = 0;
985 float offset = NORM_OFFSET;
986 if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
987 #if defined( __APPLE__ )
988 // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
989 || gDeviceType != CL_DEVICE_TYPE_GPU
990 #endif
991 )
992 offset = 0.0f; // Loop only once
993
994 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
995 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
996
997
998 // Try sampling the pixel, without flushing denormals.
999 int containsDenormals = 0;
1000 FloatPixel maxPixel;
1001 if ( gTestMipmaps )
1002 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1003 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
1004 imageSampler, expected, 0, &containsDenormals, lod );
1005 else
1006 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1007 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
1008 imageSampler, expected, 0, &containsDenormals );
1009 float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1010 sRGBmap(expected[0]));
1011 float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1012 sRGBmap(expected[1]));
1013 float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1014 sRGBmap(expected[2]));
1015 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
1016 float maxErr = 0.5;
1017
1018 // Check if the result matches.
1019 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) ||
1020 ! (err3 <= maxErr) || ! (err4 <= maxErr) )
1021 {
1022 //try flushing the denormals, if there is a failure.
1023 if( containsDenormals )
1024 {
1025 // If implementation decide to flush subnormals to zero,
1026 // max error needs to be adjusted
1027 maxErr += 4 * FLT_MIN;
1028
1029 if(gTestMipmaps)
1030 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1031 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1032 imageSampler, expected, 0, NULL,lod );
1033 else
1034 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1035 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1036 imageSampler, expected, 0, NULL );
1037
1038 err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1039 sRGBmap(expected[0]));
1040 err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1041 sRGBmap(expected[1]));
1042 err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1043 sRGBmap(expected[2]));
1044 err4 = ABS_ERROR(resultPtr[3], expected[3]);
1045 }
1046 }
1047
1048 // If the final result DOES match, then we've found a valid result and we're done with this pixel.
1049 found_pixel = (err1 <= maxErr) && (err2 <= maxErr) && (err3 <= maxErr) && (err4 <= maxErr);
1050 }//norm_offset_x
1051 }//norm_offset_y
1052
1053
1054 // Step 2: If we did not find a match, then print out debugging info.
1055 if (!found_pixel) {
1056 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
1057 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1058 checkOnlyOnePixel = 0;
1059 int shouldReturn = 0;
1060 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
1061 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
1062
1063 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
1064 // E.g., test one pixel.
1065 if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
1066 norm_offset_x = 0.0f;
1067 norm_offset_y = 0.0f;
1068 checkOnlyOnePixel = 1;
1069 }
1070
1071 int containsDenormals = 0;
1072 FloatPixel maxPixel;
1073 if(gTestMipmaps)
1074 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1075 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1076 imageSampler, expected, 0, &containsDenormals, lod );
1077 else
1078 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1079 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1080 imageSampler, expected, 0, &containsDenormals );
1081
1082 float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1083 sRGBmap(expected[0]));
1084 float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1085 sRGBmap(expected[1]));
1086 float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1087 sRGBmap(expected[2]));
1088 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
1089 float maxErr = 0.6;
1090
1091 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) ||
1092 ! (err3 <= maxErr) || ! (err4 <= maxErr) )
1093 {
1094 //try flushing the denormals, if there is a failure.
1095 if( containsDenormals )
1096 {
1097 // If implementation decide to flush subnormals to zero,
1098 // max error needs to be adjusted
1099 maxErr += 4 * FLT_MIN;
1100 if(gTestMipmaps)
1101 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1102 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1103 imageSampler, expected, 0, NULL, lod );
1104 else
1105 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1106 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1107 imageSampler, expected, 0, NULL );
1108
1109 err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1110 sRGBmap(expected[0]));
1111 err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1112 sRGBmap(expected[1]));
1113 err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1114 sRGBmap(expected[2]));
1115 err4 = ABS_ERROR(resultPtr[3], expected[3]);
1116 }
1117 }
1118 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) ||
1119 ! (err3 <= maxErr) || ! (err4 <= maxErr) )
1120 {
1121 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
1122
1123 float tempOut[4];
1124 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
1125 expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
1126
1127 log_error( "Step by step:\n" );
1128 FloatPixel temp;
1129 if( gTestMipmaps )
1130 temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
1131 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1132 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
1133 else
1134 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
1135 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1136 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
1137 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n",
1138 Ulp_Error( resultPtr[0], expected[0] ),
1139 Ulp_Error( resultPtr[1], expected[1] ),
1140 Ulp_Error( resultPtr[2], expected[2] ),
1141 Ulp_Error( resultPtr[3], expected[3] ),
1142 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
1143
1144 } else {
1145 log_error("Test error: we should have detected this passing above.\n");
1146 }
1147
1148 }//norm_offset_x
1149 }//norm_offset_y
1150 if( shouldReturn )
1151 return 1;
1152 } // if (!found_pixel)
1153
1154 resultPtr += 4;
1155 }
1156 }
1157 }
1158 else {
1159 log_error("Test error: NOT SUPPORTED.\n");
1160 }
1161 return 0;
1162 }
1163
validate_float_write_results(float * expected,float * actual,image_descriptor * imageInfo)1164 bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo )
1165 {
1166 bool pass = true;
1167 // Compare floats
1168 if( memcmp( expected, actual, sizeof( cl_float ) * get_format_channel_count( imageInfo->format ) ) != 0 )
1169 {
1170 // 8.3.3 Fix up cases where we have NaNs or flushed denorms; "all other values must be preserved"
1171 for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
1172 {
1173 if ( isnan( expected[j] ) && isnan( actual[j] ) )
1174 continue;
1175 if ( IsFloatSubnormal( expected[j] ) && actual[j] == 0.0f )
1176 continue;
1177 pass = false;
1178 break;
1179 }
1180 }
1181 return pass;
1182 }
1183
validate_half_write_results(cl_half * expected,cl_half * actual,image_descriptor * imageInfo)1184 bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo )
1185 {
1186 bool pass = true;
1187 // Compare half floats
1188 if (memcmp(expected, actual, sizeof( cl_half ) * get_format_channel_count(imageInfo->format)) != 0) {
1189
1190 // 8.3.2 Fix up cases where we have NaNs or generated half denormals
1191 for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ ) {
1192 if ( is_half_nan( expected[j] ) && is_half_nan( actual[j] ) )
1193 continue;
1194 if ( is_half_denorm( expected[j] ) && is_half_zero( actual[j] ) )
1195 continue;
1196 pass = false;
1197 break;
1198 }
1199 }
1200 return pass;
1201 }
1202
test_read_image_2D(cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,image_sampler_data * imageSampler,bool useFloatCoords,ExplicitType outputType,MTdata d)1203 int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel kernel,
1204 image_descriptor *imageInfo, image_sampler_data *imageSampler,
1205 bool useFloatCoords, ExplicitType outputType, MTdata d )
1206 {
1207 int error;
1208 static int initHalf = 0;
1209 cl_mem imageBuffer;
1210 cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
1211 size_t threads[2];
1212
1213 clMemWrapper xOffsets, yOffsets, results;
1214 clSamplerWrapper actualSampler;
1215 BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
1216
1217 // The DataBuffer template class really does use delete[], not free -- IRO
1218 BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1219 BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1220
1221 if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
1222 if( DetectFloatToHalfRoundingMode(queue) )
1223 return 1;
1224
1225 // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
1226 BufferOwningPtr<char> imageValues;
1227 generate_random_image_data( imageInfo, imageValues, d );
1228
1229 if( gDebugTrace )
1230 {
1231 log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
1232 if( gTestMipmaps )
1233 {
1234 log_info( " - with %d mip levels", (int) imageInfo->num_mip_levels );
1235 }
1236 }
1237
1238 // Construct testing sources
1239 clProtectedImage protImage;
1240 clMemWrapper unprotImage;
1241 cl_mem image;
1242
1243 if(gtestTypesToRun & kReadTests)
1244 {
1245 image_read_write_flags = CL_MEM_READ_ONLY;
1246 }
1247 else
1248 {
1249 image_read_write_flags = CL_MEM_READ_WRITE;
1250 }
1251
1252 if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
1253 {
1254 if (gTestImage2DFromBuffer)
1255 {
1256 generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1257 imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
1258 imageInfo->rowPitch * imageInfo->height, maxImageUseHostPtrBackingStore, &error);
1259 test_error( error, "Unable to create buffer" );
1260 unprotImage = create_image_2d_buffer( context,
1261 image_read_write_flags,
1262 imageInfo->format,
1263 imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1264 imageBuffer, &error );
1265
1266 }
1267 else
1268 {
1269 // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
1270 // Do not use protected images for max image size test since it rounds the row size to a page size
1271 if (gTestMaxImages) {
1272 generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1273 unprotImage = create_image_2d( context,
1274 image_read_write_flags | CL_MEM_USE_HOST_PTR,
1275 imageInfo->format,
1276 imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1277 maxImageUseHostPtrBackingStore, &error );
1278 }
1279 else
1280 {
1281 error = protImage.Create( context,
1282 image_read_write_flags,
1283 imageInfo->format, imageInfo->width, imageInfo->height );
1284 }
1285 }
1286
1287 if( error != CL_SUCCESS )
1288 {
1289 if (gTestImage2DFromBuffer) {
1290 clReleaseMemObject(imageBuffer);
1291 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1292 log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1293 return 0;
1294 }
1295 }
1296
1297 log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1298 return error;
1299 }
1300
1301 if (gTestMaxImages || gTestImage2DFromBuffer)
1302 image = (cl_mem)unprotImage;
1303 else
1304 image = (cl_mem)protImage;
1305 }
1306 else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
1307 {
1308 if (gTestImage2DFromBuffer)
1309 {
1310 imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1311 imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1312 test_error( error, "Unable to create buffer" );
1313 unprotImage = create_image_2d_buffer( context,
1314 image_read_write_flags,
1315 imageInfo->format,
1316 imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1317 imageBuffer, &error );
1318
1319 }
1320 else
1321 {
1322 // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
1323 unprotImage = create_image_2d( context,
1324 image_read_write_flags | CL_MEM_COPY_HOST_PTR,
1325 imageInfo->format,
1326 imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1327 imageValues, &error );
1328 }
1329 if( error != CL_SUCCESS )
1330 {
1331 if (gTestImage2DFromBuffer) {
1332 clReleaseMemObject(imageBuffer);
1333 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1334 log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1335 return 0;
1336 }
1337 }
1338
1339 log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1340 return error;
1341 }
1342 image = unprotImage;
1343 }
1344 else // Either CL_MEM_ALLOC_HOST_PTR or none
1345 {
1346 if( gTestMipmaps )
1347 {
1348 cl_image_desc image_desc = {0};
1349 image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
1350 image_desc.image_width = imageInfo->width;
1351 image_desc.image_height = imageInfo->height;
1352 image_desc.num_mip_levels = imageInfo->num_mip_levels;
1353 unprotImage = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
1354 }
1355 else if (gTestImage2DFromBuffer)
1356 {
1357 imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | gMemFlagsToUse,
1358 imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1359 test_error( error, "Unable to create buffer" );
1360 unprotImage = create_image_2d_buffer( context,
1361 image_read_write_flags,
1362 imageInfo->format,
1363 imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1364 imageBuffer, &error );
1365
1366 }
1367 else
1368 {
1369 // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
1370 // it works just as if no flag is specified, so we just do the same thing either way
1371 unprotImage = create_image_2d( context,
1372 image_read_write_flags | gMemFlagsToUse,
1373 imageInfo->format,
1374 imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1375 imageValues, &error );
1376 }
1377 if( error != CL_SUCCESS )
1378 {
1379 if (gTestImage2DFromBuffer) {
1380 clReleaseMemObject(imageBuffer);
1381 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1382 log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1383 return 0;
1384 }
1385 }
1386
1387 log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1388 return error;
1389 }
1390 image = unprotImage;
1391 }
1392
1393 if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
1394 {
1395 if( gDebugTrace )
1396 log_info( " - Writing image...\n" );
1397
1398 size_t origin[ 3 ] = { 0, 0, 0 };
1399 size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
1400
1401 if(!gTestMipmaps)
1402 {
1403 error = clEnqueueWriteImage(queue, image, CL_TRUE,
1404 origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0,
1405 imageValues, 0, NULL, NULL);
1406 if (error != CL_SUCCESS)
1407 {
1408 log_error( "ERROR: Unable to write to 2D image of size %d x %d\n", (int)imageInfo->width, (int)imageInfo->height );
1409 return error;
1410 }
1411 }
1412 else
1413 {
1414 size_t tmpNextLevelOffset = 0;
1415 for(size_t level = 0; level < imageInfo->num_mip_levels; level++)
1416 {
1417 origin[2] = level;
1418 error = clEnqueueWriteImage(queue, image, CL_TRUE,
1419 origin, region, (( gEnablePitch || gTestImage2DFromBuffer) ? imageInfo->rowPitch : 0 ), 0,
1420 (char*)imageValues + tmpNextLevelOffset, 0, NULL, NULL);
1421 tmpNextLevelOffset += region[0]*region[1]*get_pixel_size(imageInfo->format);
1422 region[0] = (region[0] >> 1) ? (region[0] >> 1) : 1;
1423 region[1] = (region[1] >> 1) ? (region[1] >> 1) : 1;
1424 }
1425 }
1426 }
1427
1428 if( gDebugTrace )
1429 log_info( " - Creating kernel arguments...\n" );
1430
1431 xOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height, xOffsetValues, &error );
1432 test_error( error, "Unable to create x offset buffer" );
1433 yOffsets = clCreateBuffer( context, (cl_mem_flags)( CL_MEM_COPY_HOST_PTR ), sizeof( cl_float ) * imageInfo->width * imageInfo->height, yOffsetValues, &error );
1434 test_error( error, "Unable to create y offset buffer" );
1435 results = clCreateBuffer( context, (cl_mem_flags)(CL_MEM_READ_WRITE), get_explicit_type_size( outputType ) * 4 * imageInfo->width * imageInfo->height, NULL, &error );
1436 test_error( error, "Unable to create result buffer" );
1437
1438 // Create sampler to use
1439 actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
1440 test_error(error, "Unable to create image sampler");
1441
1442 // Set arguments
1443 int idx = 0;
1444 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
1445 test_error( error, "Unable to set kernel arguments" );
1446 if( !gUseKernelSamplers )
1447 {
1448 error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
1449 test_error( error, "Unable to set kernel arguments" );
1450 }
1451 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
1452 test_error( error, "Unable to set kernel arguments" );
1453 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
1454 test_error( error, "Unable to set kernel arguments" );
1455 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
1456 test_error( error, "Unable to set kernel arguments" );
1457
1458 // A cast of troublesome offsets. The first one has to be zero.
1459 const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
1460 int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
1461 int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
1462 int loopCount = 2 * float_offset_count;
1463 if( ! useFloatCoords )
1464 loopCount = 1;
1465 if (gTestMaxImages) {
1466 loopCount = 1;
1467 log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
1468 }
1469
1470 if(gtestTypesToRun & kReadWriteTests)
1471 {
1472 loopCount = 1;
1473 }
1474
1475 // Get the maximum absolute error for this format
1476 double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
1477 if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
1478
1479 if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
1480 initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
1481 if (initHalf) {
1482 log_info("Half rounding mode successfully detected.\n");
1483 }
1484 }
1485
1486 size_t nextLevelOffset = 0;
1487 size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
1488 for( size_t lod = 0; (gTestMipmaps && (lod < imageInfo->num_mip_levels))|| (!gTestMipmaps && lod < 1); lod ++)
1489 {
1490 size_t resultValuesSize = width_lod * height_lod * get_explicit_type_size( outputType ) * 4;
1491 BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
1492 float lod_float = (float)lod;
1493 char *imagePtr = (char *)imageValues + nextLevelOffset;
1494 if( gTestMipmaps )
1495 {
1496 if(gDebugTrace)
1497 log_info("\t- Working at mip level %d\n", lod);
1498 error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
1499 }
1500
1501 // Validate results element by element
1502 for( int q = 0; q < loopCount; q++ )
1503 {
1504 float offset = float_offsets[ q % float_offset_count ];
1505
1506 // Init the coordinates
1507 InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues,
1508 q>=float_offset_count ? -offset: offset,
1509 q>=float_offset_count ? offset: -offset, imageSampler->normalized_coords, d, lod );
1510
1511 error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, xOffsetValues, 0, NULL, NULL );
1512 test_error( error, "Unable to write x offsets" );
1513 error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, yOffsetValues, 0, NULL, NULL );
1514 test_error( error, "Unable to write y offsets" );
1515
1516 // Get results
1517 memset( resultValues, 0xff, resultValuesSize );
1518 clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
1519
1520 // Run the kernel
1521 threads[0] = (size_t)width_lod;
1522 threads[1] = (size_t)height_lod;
1523 error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
1524 test_error( error, "Unable to run kernel" );
1525
1526 if( gDebugTrace )
1527 log_info( " reading results, %ld kbytes\n", (unsigned long)( width_lod * height_lod * get_explicit_type_size( outputType ) * 4 / 1024 ) );
1528
1529 error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL ); //XXX check
1530 test_error( error, "Unable to read results from kernel" );
1531 if( gDebugTrace )
1532 log_info( " results read\n" );
1533
1534 int retCode;
1535 switch (imageInfo->format->image_channel_order) {
1536 case CL_DEPTH:
1537 retCode = validate_image_2D_depth_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1538 break;
1539 case CL_sRGB:
1540 case CL_sRGBx:
1541 case CL_sRGBA:
1542 case CL_sBGRA:
1543 retCode = validate_image_2D_sRGB_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1544 break;
1545 default:
1546 retCode = validate_image_2D_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1547 }
1548 if (retCode)
1549 return retCode;
1550 }
1551 end:
1552 if ( gTestMipmaps )
1553 {
1554 nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format );
1555 width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
1556 height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
1557 }
1558 }
1559
1560 if (gTestImage2DFromBuffer) clReleaseMemObject(imageBuffer);
1561
1562 return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
1563 }
1564
test_read_image_set_2D(cl_device_id device,cl_context context,cl_command_queue queue,cl_image_format * format,image_sampler_data * imageSampler,bool floatCoords,ExplicitType outputType)1565 int test_read_image_set_2D( cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format, image_sampler_data *imageSampler,
1566 bool floatCoords, ExplicitType outputType )
1567 {
1568 char programSrc[10240];
1569 const char *ptr;
1570 const char *readFormat;
1571 clProgramWrapper program;
1572 clKernelWrapper kernel;
1573 const char *KernelSourcePattern = NULL;
1574
1575 if (gTestImage2DFromBuffer)
1576 {
1577 if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
1578 {
1579 switch (format->image_channel_data_type)
1580 {
1581 case CL_UNORM_INT8:
1582 case CL_UNORM_INT16:
1583 case CL_SNORM_INT8:
1584 case CL_SNORM_INT16:
1585 case CL_HALF_FLOAT:
1586 case CL_FLOAT:
1587 case CL_SIGNED_INT8:
1588 case CL_SIGNED_INT16:
1589 case CL_SIGNED_INT32:
1590 case CL_UNSIGNED_INT8:
1591 case CL_UNSIGNED_INT16:
1592 case CL_UNSIGNED_INT32:
1593 log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
1594 GetChannelTypeName( format->image_channel_data_type ));
1595 return 0;
1596 default:
1597 break;
1598 }
1599 }
1600 }
1601
1602
1603 RandomSeed seed( gRandomSeed );
1604 int error;
1605
1606 // Get our operating params
1607 size_t maxWidth, maxHeight;
1608 cl_ulong maxAllocSize, memSize;
1609 image_descriptor imageInfo = { 0x0 };
1610 size_t pixelSize;
1611
1612 imageInfo.format = format;
1613 imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
1614 imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
1615 pixelSize = get_pixel_size( imageInfo.format );
1616
1617 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
1618 error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
1619 error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
1620 error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
1621 test_error( error, "Unable to get max image 2D size from device" );
1622
1623 if (memSize > (cl_ulong)SIZE_MAX) {
1624 memSize = (cl_ulong)SIZE_MAX;
1625 }
1626
1627 // Determine types
1628 if( outputType == kInt )
1629 readFormat = "i";
1630 else if( outputType == kUInt )
1631 readFormat = "ui";
1632 else // kFloat
1633 readFormat = "f";
1634
1635 // Construct the source
1636 const char *samplerArg = samplerKernelArg;
1637 char samplerVar[ 1024 ] = "";
1638 if( gUseKernelSamplers )
1639 {
1640 get_sampler_kernel_code( imageSampler, samplerVar );
1641 samplerArg = "";
1642 }
1643
1644 if(gtestTypesToRun & kReadTests)
1645 {
1646 KernelSourcePattern = read2DKernelSourcePattern;
1647 }
1648 else
1649 {
1650 KernelSourcePattern = read_write2DKernelSourcePattern;
1651 }
1652
1653
1654 sprintf( programSrc, KernelSourcePattern,
1655 (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
1656 samplerArg, get_explicit_type_name( outputType ),
1657 (format->image_channel_order == CL_DEPTH) ? "" : "4",
1658 gTestMipmaps?", float lod":" ",
1659 samplerVar,
1660 gTestMipmaps? lodOffsetSource : offsetSource,
1661 floatCoords ? floatKernelSource : intCoordKernelSource,
1662 readFormat,
1663 gTestMipmaps?", lod":" ");
1664
1665 ptr = programSrc;
1666 error = create_single_kernel_helper_with_build_options( context, &program, &kernel, 1, &ptr, "sample_kernel", gDeviceLt20 ? "" : "-cl-std=CL2.0");
1667 test_error( error, "Unable to create testing kernel" );
1668
1669 if( gTestSmallImages )
1670 {
1671 for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
1672 {
1673 imageInfo.rowPitch = imageInfo.width * pixelSize;
1674 for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
1675 {
1676 if( gTestMipmaps )
1677 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1678
1679 if( gDebugTrace )
1680 log_info( " at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
1681
1682 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1683 if( retCode )
1684 return retCode;
1685 }
1686 }
1687 }
1688 else if( gTestMaxImages )
1689 {
1690 // Try a specific set of maximum sizes
1691 size_t numbeOfSizes;
1692 size_t sizes[100][3];
1693
1694 get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format, CL_TRUE);
1695
1696 for( size_t idx = 0; idx < numbeOfSizes; idx++ )
1697 {
1698 imageInfo.width = sizes[ idx ][ 0 ];
1699 imageInfo.height = sizes[ idx ][ 1 ];
1700 imageInfo.rowPitch = imageInfo.width * pixelSize;
1701 log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
1702
1703 if( gTestMipmaps )
1704 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1705
1706 if( gDebugTrace )
1707 log_info( " at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
1708 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1709 if( retCode )
1710 return retCode;
1711 }
1712 }
1713 else if( gTestRounding )
1714 {
1715 uint64_t typeRange = 1LL << ( get_format_type_size( imageInfo.format ) * 8 );
1716 typeRange /= pixelSize / get_format_type_size( imageInfo.format );
1717 imageInfo.height = (size_t)( ( typeRange + 255LL ) / 256LL );
1718 imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
1719 while( imageInfo.height >= maxHeight / 2 )
1720 {
1721 imageInfo.width <<= 1;
1722 imageInfo.height >>= 1;
1723 }
1724
1725 while( imageInfo.width >= maxWidth / 2 )
1726 imageInfo.width >>= 1;
1727 imageInfo.rowPitch = imageInfo.width * pixelSize;
1728
1729 gRoundingStartValue = 0;
1730 do
1731 {
1732 if( gDebugTrace )
1733 log_info( " at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.height, gRoundingStartValue, typeRange );
1734 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1735 if( retCode )
1736 return retCode;
1737
1738 gRoundingStartValue += imageInfo.width * imageInfo.height * pixelSize / get_format_type_size( imageInfo.format );
1739
1740 } while( gRoundingStartValue < typeRange );
1741 }
1742 else
1743 {
1744 cl_uint imagePitchAlign = 0;
1745 if (gTestImage2DFromBuffer)
1746 {
1747 #if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT)
1748 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof( cl_uint ), &imagePitchAlign, NULL );
1749 #endif
1750 if (!imagePitchAlign || error) {
1751 test_error( error, "Unable to get CL_DEVICE_IMAGE_PITCH_ALIGNMENT from device" );
1752 imagePitchAlign = 1;
1753 }
1754 }
1755
1756 int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
1757 int maxHeightRange = (int) reduceImageSizeRange(maxHeight);
1758
1759 for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
1760 {
1761 cl_ulong size;
1762 // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
1763 // image, the result array, plus offset arrays, will fit in the global ram space
1764 do
1765 {
1766 imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, seed );
1767 imageInfo.height = (size_t)random_log_in_range( 16, maxHeightRange, seed );
1768
1769 imageInfo.rowPitch = imageInfo.width * pixelSize;
1770 if( gTestMipmaps )
1771 {
1772 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1773 size = 4 * compute_mipmapped_image_size(imageInfo);
1774 }
1775 else
1776 {
1777 if( gEnablePitch )
1778 {
1779 size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
1780 imageInfo.rowPitch += extraWidth * pixelSize;
1781 }
1782
1783 // if we are creating a 2D image from a buffer, make sure that the rowpitch is aligned to CL_DEVICE_IMAGE_PITCH_ALIGNMENT_APPLE
1784 if (gTestImage2DFromBuffer)
1785 {
1786 size_t pitch = imagePitchAlign * pixelSize;
1787 imageInfo.rowPitch = ((imageInfo.rowPitch + pitch - 1) / pitch ) * pitch;
1788 }
1789
1790 size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
1791 }
1792 } while( size > maxAllocSize || ( size * 3 ) > memSize );
1793
1794 if( gDebugTrace )
1795 log_info( " at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
1796 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1797 if( retCode )
1798 return retCode;
1799 }
1800 }
1801
1802 return 0;
1803 }
1804