1 //
2 // Copyright (c) 2017, 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "test_common.h"
17 #include <float.h>
18
19 #include <algorithm>
20
21 #if defined( __APPLE__ )
22 #include <signal.h>
23 #include <sys/signal.h>
24 #include <setjmp.h>
25 #endif
26
27 extern bool gTestImage2DFromBuffer;
28
29 // Utility function to clamp down image sizes for certain tests to avoid
30 // using too much memory.
reduceImageSizeRange(size_t maxDimSize)31 static size_t reduceImageSizeRange(size_t maxDimSize) {
32 size_t DimSize = maxDimSize/32;
33 if (DimSize < (size_t) 16)
34 return 16;
35 else if (DimSize > (size_t) 256)
36 return 256;
37 else
38 return DimSize;
39 }
40
41 const char *read2DKernelSourcePattern =
42 "%s\n"
43 "__kernel void sample_kernel( read_only %s input,%s __global float "
44 "*xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
45 "{\n"
46 "%s"
47 " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
48 "%s"
49 "%s"
50 " results[offset] = read_image%s( input, imageSampler, coords %s);\n"
51 "}";
52
53 const char *read_write2DKernelSourcePattern =
54 "%s\n"
55 "__kernel void sample_kernel( read_write %s input,%s __global float "
56 "*xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
57 "{\n"
58 "%s"
59 " int tidX = get_global_id(0), tidY = get_global_id(1);\n"
60 "%s"
61 "%s"
62 " results[offset] = read_image%s( input, coords %s);\n"
63 "}";
64
65 const char *intCoordKernelSource =
66 " int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n";
67
68 const char *floatKernelSource =
69 " float2 coords = (float2)( (float)( xOffsets[offset] ), (float)( yOffsets[offset] ) );\n";
70
71 static const char *samplerKernelArg = " sampler_t imageSampler,";
72
73 static const char *lodOffsetSource =
74 " unsigned int lod_int = (unsigned int) lod;\n"
75 " int width_lod = (get_image_width(input) >> lod_int) ?(get_image_width(input) >> lod_int):1 ;\n"
76 " int offset = tidY*width_lod + tidX;\n";
77
78 static const char *offsetSource =
79 " int offset = tidY*get_image_width(input) + tidX;\n";
80
determine_validation_error(void * imagePtr,image_descriptor * imageInfo,image_sampler_data * imageSampler,T * resultPtr,T * expected,float error,float x,float y,float xAddressOffset,float yAddressOffset,size_t j,int & numTries,int & numClamped,bool printAsFloat,int lod=0)81 template <class T> int determine_validation_error( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
82 T *resultPtr, T * expected, float error,
83 float x, float y, float xAddressOffset, float yAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod = 0 )
84 {
85 int actualX, actualY;
86 int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, NULL, lod );
87 bool clampingErr = false, clamped = false, otherClampingBug = false;
88 int clampedX, clampedY, ignoreMe;
89
90 clamped = get_integer_coords_offset( x, y, 0.f, xAddressOffset, yAddressOffset, 0.0f, imageInfo->width, imageInfo->height, 0, imageSampler, imageInfo, clampedX, clampedY, ignoreMe );
91
92 if( found )
93 {
94 // Is it a clamping bug?
95 if( clamped && clampedX == actualX && clampedY == actualY )
96 {
97 if( (--numClamped) == 0 )
98 {
99 log_error( "ERROR: TEST FAILED: Read is erroneously clamping coordinates for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
100 if (imageInfo->format->image_channel_order == CL_DEPTH)
101 {
102 if( printAsFloat )
103 {
104 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot (%g),\n\terror of %g\n",
105 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
106 }
107 else
108 {
109 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot (%x)\n",
110 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
111 }
112 }
113 else
114 {
115 if( printAsFloat )
116 {
117 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g),\n\terror of %g\n",
118 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
119 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
120 }
121 else
122 {
123 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n",
124 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
125 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
126 }
127 }
128 return 1;
129 }
130 clampingErr = true;
131 otherClampingBug = true;
132 }
133 }
134 if( clamped && !otherClampingBug )
135 {
136 // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
137 if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
138 {
139 if( (--numClamped) == 0 )
140 {
141 log_error( "ERROR: TEST FAILED: Clamping is erroneously returning border color for image size %ld x %ld!\n", imageInfo->width, imageInfo->height );
142 if (imageInfo->format->image_channel_order == CL_DEPTH)
143 {
144 if( printAsFloat )
145 {
146 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot (%g),\n\terror of %g\n",
147 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
148 }
149 else
150 {
151 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot (%x)\n",
152 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
153 }
154 }
155 else
156 {
157 if( printAsFloat )
158 {
159 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g),\n\terror of %g\n",
160 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
161 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
162 }
163 else
164 {
165 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n",
166 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
167 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
168 }
169 }
170 return 1;
171 }
172 clampingErr = true;
173 }
174 }
175 if( !clampingErr )
176 {
177 if (imageInfo->format->image_channel_order == CL_DEPTH)
178 {
179 if( printAsFloat )
180 {
181 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g),\n\tgot (%g),\n\terror of %g\n",
182 (int)j, x, x, y, y, (float)expected[ 0 ], (float)resultPtr[ 0 ], error );
183 }
184 else
185 {
186 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x),\n\tgot (%x)\n",
187 (int)j, x, x, y, y, (int)expected[ 0 ], (int)resultPtr[ 0 ] );
188 }
189 }
190 else
191 {
192 if( printAsFloat )
193 {
194 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%g,%g,%g,%g),\n\tgot (%g,%g,%g,%g), error of %g\n",
195 (int)j, x, x, y, y, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
196 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
197 }
198 else
199 {
200 log_error( "Sample %d: coord {%f(%.6a), %f(%.6a)} did not validate!\n\tExpected (%x,%x,%x,%x),\n\tgot (%x,%x,%x,%x)\n",
201 (int)j, x, x, y, y, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
202 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
203 }
204 }
205 log_error( "img size %ld,%ld (pitch %ld)", imageInfo->width, imageInfo->height, imageInfo->rowPitch );
206 if( clamped )
207 {
208 log_error( " which would clamp to %d,%d\n", clampedX, clampedY );
209 }
210 if( printAsFloat && gExtraValidateInfo)
211 {
212 log_error( "Nearby values:\n" );
213 log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
214 for( int yOff = -2; yOff <= 1; yOff++ )
215 {
216 float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
217 read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
218 read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
219 read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
220 read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
221 if (imageInfo->format->image_channel_order == CL_DEPTH)
222 {
223 log_error( "%d\t(%g)",clampedY + yOff, top[0] );
224 log_error( " (%g)", real[0] );
225 log_error( " (%g)",bot[0] );
226 log_error( " (%g)\n",bot2[0] );
227 }
228 else
229 {
230 log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
231 log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
232 log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
233 log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
234 }
235 }
236
237 if( clampedY < 1 )
238 {
239 log_error( "Nearby values:\n" );
240 log_error( "\t%d\t%d\t%d\t%d\n", clampedX - 2, clampedX - 1, clampedX, clampedX + 1 );
241 for( int yOff = (int)imageInfo->height - 2; yOff <= (int)imageInfo->height + 1; yOff++ )
242 {
243 float top[ 4 ], real[ 4 ], bot[ 4 ], bot2[ 4 ];
244 read_image_pixel_float( imagePtr, imageInfo, clampedX - 2 , clampedY + yOff, 0, top );
245 read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 ,clampedY + yOff, 0, real );
246 read_image_pixel_float( imagePtr, imageInfo, clampedX, clampedY + yOff, 0, bot );
247 read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, 0, bot2 );
248 if (imageInfo->format->image_channel_order == CL_DEPTH)
249 {
250 log_error( "%d\t(%g)",clampedY + yOff, top[0] );
251 log_error( " (%g)", real[0] );
252 log_error( " (%g)",bot[0] );
253 log_error( " (%g)\n",bot2[0] );
254 }
255 else
256 {
257 log_error( "%d\t(%g,%g,%g,%g)",clampedY + yOff, top[0], top[1], top[2], top[3] );
258 log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
259 log_error( " (%g,%g,%g,%g)",bot[0], bot[1], bot[2], bot[3] );
260 log_error( " (%g,%g,%g,%g)\n",bot2[0], bot2[1], bot2[2], bot2[3] );
261 }
262 }
263 }
264 }
265
266 if( imageSampler->filter_mode != CL_FILTER_LINEAR )
267 {
268 if( found )
269 log_error( "\tValue really found in image at %d,%d (%s)\n", actualX, actualY, ( found > 1 ) ? "NOT unique!!" : "unique" );
270 else
271 log_error( "\tValue not actually found in image\n" );
272 }
273 log_error( "\n" );
274
275 numClamped = -1; // We force the clamped counter to never work
276 if( ( --numTries ) == 0 )
277 {
278 return 1;
279 }
280 }
281 return 0;
282 }
283
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float xfract,float yfract,int normalized_coords,MTdata d)284 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d )
285 {
286 size_t i = 0;
287 if( gDisableOffsets )
288 {
289 for( size_t y = 0; y < imageInfo->height; y++ )
290 {
291 for( size_t x = 0; x < imageInfo->width; x++, i++ )
292 {
293 xOffsets[ i ] = (float) (xfract + (double) x);
294 yOffsets[ i ] = (float) (yfract + (double) y);
295 }
296 }
297 }
298 else
299 {
300 for( size_t y = 0; y < imageInfo->height; y++ )
301 {
302 for( size_t x = 0; x < imageInfo->width; x++, i++ )
303 {
304 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
305 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
306 }
307 }
308 }
309
310 if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
311 {
312 i = 0;
313 for( size_t y = 0; y < imageInfo->height; y++ )
314 {
315 for( size_t x = 0; x < imageInfo->width; x++, i++ )
316 {
317 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
318 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)imageInfo->height - 1.0);
319 }
320 }
321 }
322
323 if( normalized_coords )
324 {
325 i = 0;
326 for( size_t y = 0; y < imageInfo->height; y++ )
327 {
328 for( size_t x = 0; x < imageInfo->width; x++, i++ )
329 {
330 xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
331 yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
332 }
333 }
334 }
335 }
336
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float xfract,float yfract,int normalized_coords,MTdata d,size_t lod)337 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d, size_t lod)
338 {
339 size_t i = 0;
340 size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
341
342 if( gTestMipmaps )
343 {
344 width_lod = (imageInfo->width >> lod)?(imageInfo->width >> lod):1;
345 height_lod = (imageInfo->height >> lod)?(imageInfo->height >> lod):1;
346 }
347 if( gDisableOffsets )
348 {
349 for( size_t y = 0; y < height_lod; y++ )
350 {
351 for( size_t x = 0; x < width_lod; x++, i++ )
352 {
353 xOffsets[ i ] = (float) (xfract + (float) x);
354 yOffsets[ i ] = (float) (yfract + (float) y);
355 }
356 }
357 }
358 else
359 {
360 for( size_t y = 0; y < height_lod; y++ )
361 {
362 for( size_t x = 0; x < width_lod; x++, i++ )
363 {
364 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
365 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
366 }
367 }
368 }
369
370 if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
371 {
372 i = 0;
373 for( size_t y = 0; y < height_lod; y++ )
374 {
375 for( size_t x = 0; x < width_lod; x++, i++ )
376 {
377 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) width_lod - 1.0);
378 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)height_lod - 1.0);
379 }
380 }
381 }
382
383 if( normalized_coords )
384 {
385 i = 0;
386 for( size_t y = 0; y < height_lod; y++ )
387 {
388 for( size_t x = 0; x < width_lod; x++, i++ )
389 {
390 xOffsets[ i ] = (float) ((float) xOffsets[ i ] / (float) width_lod);
391 yOffsets[ i ] = (float) ((float) yOffsets[ i ] / (float) height_lod);
392 }
393 }
394 }
395 }
396
validate_image_2D_depth_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)397 int validate_image_2D_depth_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
398 ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
399 {
400 // Validate results element by element
401 size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
402 size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
403 /*
404 * FLOAT output type
405 */
406 if( outputType == kFloat )
407 {
408 // Validate float results
409 float *resultPtr = (float *)(char *)resultValues;
410 float expected[4], error=0.0f;
411 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
412 for( size_t y = 0, j = 0; y < height_lod; y++ )
413 {
414 for( size_t x = 0; x < width_lod; x++, j++ )
415 {
416 // Step 1: go through and see if the results verify for the pixel
417 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
418 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
419 int checkOnlyOnePixel = 0;
420 int found_pixel = 0;
421 float offset = NORM_OFFSET;
422 if (!imageSampler->normalized_coords
423 || imageSampler->filter_mode != CL_FILTER_NEAREST
424 || NORM_OFFSET == 0
425 #if defined( __APPLE__ )
426 // Apple requires its CPU implementation to do correctly
427 // rounded address arithmetic in all modes
428 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
429 #endif
430 )
431 offset = 0.0f; // Loop only once
432
433 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
434 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
435
436 // Try sampling the pixel, without flushing denormals.
437 int containsDenormals = 0;
438 FloatPixel maxPixel;
439 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
440 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
441 imageSampler, expected, 0, &containsDenormals );
442
443 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
444 // Clamp to the minimum absolute error for the format
445 if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
446 float maxErr1 =
447 std::max(maxErr * maxPixel.p[0], FLT_MIN);
448
449 // Check if the result matches.
450 if( ! (err1 <= maxErr1) )
451 {
452 //try flushing the denormals, if there is a failure.
453 if( containsDenormals )
454 {
455 // If implementation decide to flush subnormals to zero,
456 // max error needs to be adjusted
457 maxErr1 += 4 * FLT_MIN;
458
459 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
460 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
461 imageSampler, expected, 0, NULL );
462
463 err1 = ABS_ERROR(resultPtr[0], expected[0]);
464 }
465 }
466
467 // If the final result DOES match, then we've found a valid result and we're done with this pixel.
468 found_pixel = (err1 <= maxErr1);
469 }//norm_offset_x
470 }//norm_offset_y
471
472
473 // Step 2: If we did not find a match, then print out debugging info.
474 if (!found_pixel) {
475 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
476 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
477 checkOnlyOnePixel = 0;
478 int shouldReturn = 0;
479 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
480 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
481
482 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
483 // E.g., test one pixel.
484 if (!imageSampler->normalized_coords
485 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
486 || NORM_OFFSET == 0)
487 {
488 norm_offset_x = 0.0f;
489 norm_offset_y = 0.0f;
490 checkOnlyOnePixel = 1;
491 }
492
493 int containsDenormals = 0;
494 FloatPixel maxPixel;
495 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
496 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
497 imageSampler, expected, 0, &containsDenormals );
498
499 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
500 float maxErr1 =
501 std::max(maxErr * maxPixel.p[0], FLT_MIN);
502
503
504 if( ! (err1 <= maxErr1) )
505 {
506 //try flushing the denormals, if there is a failure.
507 if( containsDenormals )
508 {
509 maxErr1 += 4 * FLT_MIN;
510
511 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
512 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
513 imageSampler, expected, 0, NULL );
514
515 err1 = ABS_ERROR(resultPtr[0], expected[0]);
516 }
517 }
518 if( ! (err1 <= maxErr1) )
519 {
520 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
521
522 float tempOut[4];
523 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
524 expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
525
526 log_error( "Step by step:\n" );
527 FloatPixel temp;
528 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
529 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
530 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
531 log_error( "\tulps: %2.2f (max allowed: %2.2f)\n\n",
532 Ulp_Error( resultPtr[0], expected[0] ),
533 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
534
535 } else {
536 log_error("Test error: we should have detected this passing above.\n");
537 }
538
539 }//norm_offset_x
540 }//norm_offset_y
541 if( shouldReturn )
542 return 1;
543 } // if (!found_pixel)
544
545 resultPtr += 1;
546 }
547 }
548 }
549 else
550 {
551 log_error("Test error: Not supported format.\n");
552 return 1;
553 }
554 return 0;
555 }
556
validate_image_2D_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)557 int validate_image_2D_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
558 ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
559 {
560 // Validate results element by element
561 size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
562 size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
563 /*
564 * FLOAT output type
565 */
566 if( outputType == kFloat )
567 {
568 // Validate float results
569 float *resultPtr = (float *)(char *)resultValues;
570 float expected[4], error=0.0f;
571 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
572 for( size_t y = 0, j = 0; y < height_lod; y++ )
573 {
574 for( size_t x = 0; x < width_lod; x++, j++ )
575 {
576 // Step 1: go through and see if the results verify for the pixel
577 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
578 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
579 int checkOnlyOnePixel = 0;
580 int found_pixel = 0;
581 float offset = NORM_OFFSET;
582 if (!imageSampler->normalized_coords
583 || imageSampler->filter_mode != CL_FILTER_NEAREST
584 || NORM_OFFSET == 0
585 #if defined( __APPLE__ )
586 // Apple requires its CPU implementation to do correctly
587 // rounded address arithmetic in all modes
588 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
589 #endif
590 )
591 offset = 0.0f; // Loop only once
592
593 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
594 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
595
596
597 // Try sampling the pixel, without flushing denormals.
598 int containsDenormals = 0;
599 FloatPixel maxPixel;
600 if ( gTestMipmaps )
601 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
602 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
603 imageSampler, expected, 0, &containsDenormals, lod );
604 else
605 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
606 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
607 imageSampler, expected, 0, &containsDenormals );
608
609 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
610 float err2 = ABS_ERROR(resultPtr[1], expected[1]);
611 float err3 = ABS_ERROR(resultPtr[2], expected[2]);
612 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
613 // Clamp to the minimum absolute error for the format
614 if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
615 if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
616 if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
617 if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
618 float maxErr1 =
619 std::max(maxErr * maxPixel.p[0], FLT_MIN);
620 float maxErr2 =
621 std::max(maxErr * maxPixel.p[1], FLT_MIN);
622 float maxErr3 =
623 std::max(maxErr * maxPixel.p[2], FLT_MIN);
624 float maxErr4 =
625 std::max(maxErr * maxPixel.p[3], FLT_MIN);
626
627 // Check if the result matches.
628 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
629 ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
630 {
631 //try flushing the denormals, if there is a failure.
632 if( containsDenormals )
633 {
634 // If implementation decide to flush subnormals to zero,
635 // max error needs to be adjusted
636 maxErr1 += 4 * FLT_MIN;
637 maxErr2 += 4 * FLT_MIN;
638 maxErr3 += 4 * FLT_MIN;
639 maxErr4 += 4 * FLT_MIN;
640
641 if(gTestMipmaps)
642 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
643 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
644 imageSampler, expected, 0, NULL,lod );
645 else
646 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
647 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
648 imageSampler, expected, 0, NULL );
649
650 err1 = ABS_ERROR(resultPtr[0], expected[0]);
651 err2 = ABS_ERROR(resultPtr[1], expected[1]);
652 err3 = ABS_ERROR(resultPtr[2], expected[2]);
653 err4 = ABS_ERROR(resultPtr[3], expected[3]);
654 }
655 }
656
657 // If the final result DOES match, then we've found a valid result and we're done with this pixel.
658 found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2) && (err3 <= maxErr3) && (err4 <= maxErr4);
659 }//norm_offset_x
660 }//norm_offset_y
661
662
663 // Step 2: If we did not find a match, then print out debugging info.
664 if (!found_pixel) {
665 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
666 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
667 checkOnlyOnePixel = 0;
668 int shouldReturn = 0;
669 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
670 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
671
672 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
673 // E.g., test one pixel.
674 if (!imageSampler->normalized_coords
675 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
676 || NORM_OFFSET == 0)
677 {
678 norm_offset_x = 0.0f;
679 norm_offset_y = 0.0f;
680 checkOnlyOnePixel = 1;
681 }
682
683 int containsDenormals = 0;
684 FloatPixel maxPixel;
685 if(gTestMipmaps)
686 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
687 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
688 imageSampler, expected, 0, &containsDenormals, lod );
689 else
690 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
691 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
692 imageSampler, expected, 0, &containsDenormals );
693
694 float err1 = ABS_ERROR(resultPtr[0], expected[0]);
695 float err2 = ABS_ERROR(resultPtr[1], expected[1]);
696 float err3 = ABS_ERROR(resultPtr[2], expected[2]);
697 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
698 float maxErr1 =
699 std::max(maxErr * maxPixel.p[0], FLT_MIN);
700 float maxErr2 =
701 std::max(maxErr * maxPixel.p[1], FLT_MIN);
702 float maxErr3 =
703 std::max(maxErr * maxPixel.p[2], FLT_MIN);
704 float maxErr4 =
705 std::max(maxErr * maxPixel.p[3], FLT_MIN);
706
707
708 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
709 ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
710 {
711 //try flushing the denormals, if there is a failure.
712 if( containsDenormals )
713 {
714 maxErr1 += 4 * FLT_MIN;
715 maxErr2 += 4 * FLT_MIN;
716 maxErr3 += 4 * FLT_MIN;
717 maxErr4 += 4 * FLT_MIN;
718
719 if(gTestMipmaps)
720 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
721 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
722 imageSampler, expected, 0, NULL, lod );
723 else
724 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
725 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
726 imageSampler, expected, 0, NULL );
727
728 err1 = ABS_ERROR(resultPtr[0], expected[0]);
729 err2 = ABS_ERROR(resultPtr[1], expected[1]);
730 err3 = ABS_ERROR(resultPtr[2], expected[2]);
731 err4 = ABS_ERROR(resultPtr[3], expected[3]);
732 }
733 }
734 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) ||
735 ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
736 {
737 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
738
739 float tempOut[4];
740 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
741 expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
742
743 log_error( "Step by step:\n" );
744 FloatPixel temp;
745 if( gTestMipmaps )
746 temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
747 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
748 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
749 else
750 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
751 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
752 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
753 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n",
754 Ulp_Error( resultPtr[0], expected[0] ),
755 Ulp_Error( resultPtr[1], expected[1] ),
756 Ulp_Error( resultPtr[2], expected[2] ),
757 Ulp_Error( resultPtr[3], expected[3] ),
758 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
759
760 } else {
761 log_error("Test error: we should have detected this passing above.\n");
762 }
763
764 }//norm_offset_x
765 }//norm_offset_y
766 if( shouldReturn )
767 return 1;
768 } // if (!found_pixel)
769
770 resultPtr += 4;
771 }
772 }
773 }
774 /*
775 * UINT output type
776 */
777 else if( outputType == kUInt )
778 {
779 // Validate unsigned integer results
780 unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
781 unsigned int expected[4];
782 float error;
783 for( size_t y = 0, j = 0; y < height_lod ; y++ )
784 {
785 for( size_t x = 0; x < width_lod ; x++, j++ )
786 {
787 // Step 1: go through and see if the results verify for the pixel
788 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
789 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
790 int checkOnlyOnePixel = 0;
791 int found_pixel = 0;
792 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
793 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
794
795 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
796 // E.g., test one pixel.
797 if (!imageSampler->normalized_coords
798 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
799 || NORM_OFFSET == 0)
800 {
801 norm_offset_x = 0.0f;
802 norm_offset_y = 0.0f;
803 checkOnlyOnePixel = 1;
804 }
805
806 if ( gTestMipmaps )
807 sample_image_pixel_offset<unsigned int>( (char*)imagePtr, imageInfo,
808 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
809 imageSampler, expected, lod );
810 else
811 sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
812 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
813 imageSampler, expected);
814
815
816 error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
817 errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
818
819 if (error <= MAX_ERR)
820 found_pixel = 1;
821 }//norm_offset_x
822 }//norm_offset_y
823
824 // Step 2: If we did not find a match, then print out debugging info.
825 if (!found_pixel) {
826 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
827 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
828 checkOnlyOnePixel = 0;
829 int shouldReturn = 0;
830 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
831 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
832
833 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
834 // E.g., test one pixel.
835 if (!imageSampler->normalized_coords
836 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
837 || NORM_OFFSET == 0)
838 {
839 norm_offset_x = 0.0f;
840 norm_offset_y = 0.0f;
841 checkOnlyOnePixel = 1;
842 }
843
844 if( gTestMipmaps )
845 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
846 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
847 imageSampler, expected, lod );
848 else
849 sample_image_pixel_offset<unsigned int>( imagePtr , imageInfo,
850 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
851 imageSampler, expected);
852
853
854 error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
855 errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
856
857 if( error > MAX_ERR )
858 {
859 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
860
861 shouldReturn |= determine_validation_error<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
862 expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
863 } else {
864 log_error("Test error: we should have detected this passing above.\n");
865 }
866 }//norm_offset_x
867 }//norm_offset_y
868 if( shouldReturn )
869 return 1;
870 } // if (!found_pixel)
871
872 resultPtr += 4;
873 }
874 }
875 }
876 /*
877 * INT output type
878 */
879 else
880 {
881 // Validate integer results
882 int *resultPtr = (int *)(char *)resultValues;
883 int expected[4];
884 float error;
885 for( size_t y = 0, j = 0; y < height_lod ; y++ )
886 {
887 for( size_t x = 0; x < width_lod; x++, j++ )
888 {
889 // Step 1: go through and see if the results verify for the pixel
890 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
891 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
892 int checkOnlyOnePixel = 0;
893 int found_pixel = 0;
894 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
895 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
896
897 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
898 // E.g., test one pixel.
899 if (!imageSampler->normalized_coords
900 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
901 || NORM_OFFSET == 0)
902 {
903 norm_offset_x = 0.0f;
904 norm_offset_y = 0.0f;
905 checkOnlyOnePixel = 1;
906 }
907
908 if ( gTestMipmaps )
909 sample_image_pixel_offset<int>( imagePtr, imageInfo,
910 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
911 imageSampler, expected , lod);
912 else
913 sample_image_pixel_offset<int>( imageValues, imageInfo,
914 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
915 imageSampler, expected );
916
917
918 error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
919 errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
920
921 if (error <= MAX_ERR)
922 found_pixel = 1;
923 }//norm_offset_x
924 }//norm_offset_y
925
926 // Step 2: If we did not find a match, then print out debugging info.
927 if (!found_pixel) {
928 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
929 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
930 checkOnlyOnePixel = 0;
931 int shouldReturn = 0;
932 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
933 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
934
935 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
936 // E.g., test one pixel.
937 if (!imageSampler->normalized_coords
938 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
939 || NORM_OFFSET == 0)
940 {
941 norm_offset_x = 0.0f;
942 norm_offset_y = 0.0f;
943 checkOnlyOnePixel = 1;
944 }
945
946 if ( gTestMipmaps )
947 sample_image_pixel_offset<int>( imageValues, imageInfo,
948 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
949 imageSampler, expected, lod );
950 else
951 sample_image_pixel_offset<int>( imageValues, imageInfo,
952 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
953 imageSampler, expected );
954
955
956 error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
957 errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
958
959 if( error > MAX_ERR )
960 {
961 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
962
963 shouldReturn |= determine_validation_error<int>( imagePtr, imageInfo, imageSampler, resultPtr,
964 expected, error, xOffsetValues[j], yOffsetValues[j], norm_offset_x, norm_offset_y, j, numTries, numClamped, false, lod );
965 } else {
966 log_error("Test error: we should have detected this passing above.\n");
967 }
968 }//norm_offset_x
969 }//norm_offset_y
970 if( shouldReturn )
971 return 1;
972 } // if (!found_pixel)
973
974 resultPtr += 4;
975 }
976 }
977 }
978 return 0;
979 }
980
validate_image_2D_sRGB_results(void * imageValues,void * resultValues,double formatAbsoluteError,float * xOffsetValues,float * yOffsetValues,ExplicitType outputType,int & numTries,int & numClamped,image_sampler_data * imageSampler,image_descriptor * imageInfo,size_t lod,char * imagePtr)981 int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double formatAbsoluteError, float *xOffsetValues, float *yOffsetValues,
982 ExplicitType outputType, int &numTries, int &numClamped, image_sampler_data *imageSampler, image_descriptor *imageInfo, size_t lod, char *imagePtr)
983 {
984 // Validate results element by element
985 size_t width_lod = (imageInfo->width >> lod ) ?(imageInfo->width >> lod ) : 1;
986 size_t height_lod = (imageInfo->height >> lod ) ?(imageInfo->height >> lod ) : 1;
987 /*
988 * FLOAT output type
989 */
990 if( outputType == kFloat )
991 {
992 // Validate float results
993 float *resultPtr = (float *)(char *)resultValues;
994 float expected[4], error=0.0f;
995 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 0 /*not 3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
996 for( size_t y = 0, j = 0; y < height_lod; y++ )
997 {
998 for( size_t x = 0; x < width_lod; x++, j++ )
999 {
1000 // Step 1: go through and see if the results verify for the pixel
1001 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
1002 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1003 int checkOnlyOnePixel = 0;
1004 int found_pixel = 0;
1005 float offset = NORM_OFFSET;
1006 if (!imageSampler->normalized_coords
1007 || imageSampler->filter_mode != CL_FILTER_NEAREST
1008 || NORM_OFFSET == 0
1009 #if defined( __APPLE__ )
1010 // Apple requires its CPU implementation to do correctly
1011 // rounded address arithmetic in all modes
1012 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
1013 #endif
1014 )
1015 offset = 0.0f; // Loop only once
1016
1017 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
1018 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel; norm_offset_y += NORM_OFFSET) {
1019
1020
1021 // Try sampling the pixel, without flushing denormals.
1022 int containsDenormals = 0;
1023 FloatPixel maxPixel;
1024 if ( gTestMipmaps )
1025 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1026 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
1027 imageSampler, expected, 0, &containsDenormals, lod );
1028 else
1029 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1030 xOffsetValues[ j ], yOffsetValues[ j ], 0.0f, norm_offset_x, norm_offset_y, 0.0f,
1031 imageSampler, expected, 0, &containsDenormals );
1032 float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1033 sRGBmap(expected[0]));
1034 float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1035 sRGBmap(expected[1]));
1036 float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1037 sRGBmap(expected[2]));
1038 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
1039 float maxErr = 0.5;
1040
1041 // Check if the result matches.
1042 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) ||
1043 ! (err3 <= maxErr) || ! (err4 <= maxErr) )
1044 {
1045 //try flushing the denormals, if there is a failure.
1046 if( containsDenormals )
1047 {
1048 // If implementation decide to flush subnormals to zero,
1049 // max error needs to be adjusted
1050 maxErr += 4 * FLT_MIN;
1051
1052 if(gTestMipmaps)
1053 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1054 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1055 imageSampler, expected, 0, NULL,lod );
1056 else
1057 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1058 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1059 imageSampler, expected, 0, NULL );
1060
1061 err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1062 sRGBmap(expected[0]));
1063 err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1064 sRGBmap(expected[1]));
1065 err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1066 sRGBmap(expected[2]));
1067 err4 = ABS_ERROR(resultPtr[3], expected[3]);
1068 }
1069 }
1070
1071 // If the final result DOES match, then we've found a valid result and we're done with this pixel.
1072 found_pixel = (err1 <= maxErr) && (err2 <= maxErr) && (err3 <= maxErr) && (err4 <= maxErr);
1073 }//norm_offset_x
1074 }//norm_offset_y
1075
1076
1077 // Step 2: If we did not find a match, then print out debugging info.
1078 if (!found_pixel) {
1079 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
1080 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1081 checkOnlyOnePixel = 0;
1082 int shouldReturn = 0;
1083 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
1084 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
1085
1086 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
1087 // E.g., test one pixel.
1088 if (!imageSampler->normalized_coords
1089 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
1090 || NORM_OFFSET == 0)
1091 {
1092 norm_offset_x = 0.0f;
1093 norm_offset_y = 0.0f;
1094 checkOnlyOnePixel = 1;
1095 }
1096
1097 int containsDenormals = 0;
1098 FloatPixel maxPixel;
1099 if(gTestMipmaps)
1100 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1101 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1102 imageSampler, expected, 0, &containsDenormals, lod );
1103 else
1104 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1105 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1106 imageSampler, expected, 0, &containsDenormals );
1107
1108 float err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1109 sRGBmap(expected[0]));
1110 float err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1111 sRGBmap(expected[1]));
1112 float err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1113 sRGBmap(expected[2]));
1114 float err4 = ABS_ERROR(resultPtr[3], expected[3]);
1115 float maxErr = 0.6;
1116
1117 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) ||
1118 ! (err3 <= maxErr) || ! (err4 <= maxErr) )
1119 {
1120 //try flushing the denormals, if there is a failure.
1121 if( containsDenormals )
1122 {
1123 // If implementation decide to flush subnormals to zero,
1124 // max error needs to be adjusted
1125 maxErr += 4 * FLT_MIN;
1126 if(gTestMipmaps)
1127 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1128 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1129 imageSampler, expected, 0, NULL, lod );
1130 else
1131 maxPixel = sample_image_pixel_float_offset( imageValues, imageInfo,
1132 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1133 imageSampler, expected, 0, NULL );
1134
1135 err1 = ABS_ERROR(sRGBmap(resultPtr[0]),
1136 sRGBmap(expected[0]));
1137 err2 = ABS_ERROR(sRGBmap(resultPtr[1]),
1138 sRGBmap(expected[1]));
1139 err3 = ABS_ERROR(sRGBmap(resultPtr[2]),
1140 sRGBmap(expected[2]));
1141 err4 = ABS_ERROR(resultPtr[3], expected[3]);
1142 }
1143 }
1144 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) ||
1145 ! (err3 <= maxErr) || ! (err4 <= maxErr) )
1146 {
1147 log_error("FAILED norm_offsets: %g , %g:\n", norm_offset_x, norm_offset_y);
1148
1149 float tempOut[4];
1150 shouldReturn |= determine_validation_error<float>( imagePtr, imageInfo, imageSampler, resultPtr,
1151 expected, error, xOffsetValues[ j ], yOffsetValues[ j ], norm_offset_x, norm_offset_y, j, numTries, numClamped, true, lod );
1152
1153 log_error( "Step by step:\n" );
1154 FloatPixel temp;
1155 if( gTestMipmaps )
1156 temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
1157 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1158 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/, lod );
1159 else
1160 temp = sample_image_pixel_float_offset( imageValues, imageInfo,
1161 xOffsetValues[ j ], yOffsetValues[ j ], 0.f, norm_offset_x, norm_offset_y, 0.0f,
1162 imageSampler, tempOut, 1 /* verbose */, &containsDenormals /*dont flush while error reporting*/ );
1163 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n",
1164 Ulp_Error( resultPtr[0], expected[0] ),
1165 Ulp_Error( resultPtr[1], expected[1] ),
1166 Ulp_Error( resultPtr[2], expected[2] ),
1167 Ulp_Error( resultPtr[3], expected[3] ),
1168 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
1169
1170 } else {
1171 log_error("Test error: we should have detected this passing above.\n");
1172 }
1173
1174 }//norm_offset_x
1175 }//norm_offset_y
1176 if( shouldReturn )
1177 return 1;
1178 } // if (!found_pixel)
1179
1180 resultPtr += 4;
1181 }
1182 }
1183 }
1184 else {
1185 log_error("Test error: NOT SUPPORTED.\n");
1186 }
1187 return 0;
1188 }
1189
validate_float_write_results(float * expected,float * actual,image_descriptor * imageInfo)1190 bool validate_float_write_results( float *expected, float *actual, image_descriptor *imageInfo )
1191 {
1192 bool pass = true;
1193 // Compare floats
1194 if( memcmp( expected, actual, sizeof( cl_float ) * get_format_channel_count( imageInfo->format ) ) != 0 )
1195 {
1196 // 8.3.3 Fix up cases where we have NaNs or flushed denorms; "all other values must be preserved"
1197 for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ )
1198 {
1199 if ( isnan( expected[j] ) && isnan( actual[j] ) )
1200 continue;
1201 if ( IsFloatSubnormal( expected[j] ) && actual[j] == 0.0f )
1202 continue;
1203 if (expected[j] != actual[j])
1204 {
1205 pass = false;
1206 break;
1207 }
1208 }
1209 }
1210 return pass;
1211 }
1212
validate_half_write_results(cl_half * expected,cl_half * actual,image_descriptor * imageInfo)1213 bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo )
1214 {
1215 bool pass = true;
1216 // Compare half floats
1217 if (memcmp(expected, actual, sizeof( cl_half ) * get_format_channel_count(imageInfo->format)) != 0) {
1218
1219 // 8.3.2 Fix up cases where we have NaNs or generated half denormals
1220 for ( size_t j = 0; j < get_format_channel_count( imageInfo->format ); j++ ) {
1221 if ( is_half_nan( expected[j] ) && is_half_nan( actual[j] ) )
1222 continue;
1223 if ( is_half_denorm( expected[j] ) && is_half_zero( actual[j] ) )
1224 continue;
1225 if (expected[j] != actual[j])
1226 {
1227 pass = false;
1228 break;
1229 }
1230 }
1231 }
1232 return pass;
1233 }
1234
test_read_image_2D(cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,image_sampler_data * imageSampler,bool useFloatCoords,ExplicitType outputType,MTdata d)1235 int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel kernel,
1236 image_descriptor *imageInfo, image_sampler_data *imageSampler,
1237 bool useFloatCoords, ExplicitType outputType, MTdata d )
1238 {
1239 int error;
1240 static int initHalf = 0;
1241 cl_mem imageBuffer;
1242 cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
1243 size_t threads[2];
1244
1245 clMemWrapper xOffsets, yOffsets, results;
1246 clSamplerWrapper actualSampler;
1247 BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
1248
1249 // The DataBuffer template class really does use delete[], not free -- IRO
1250 BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1251 BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height));
1252
1253 if( imageInfo->format->image_channel_data_type == CL_HALF_FLOAT )
1254 if( DetectFloatToHalfRoundingMode(queue) )
1255 return 1;
1256
1257 // generate_random_image_data allocates with malloc, so we use a MallocDataBuffer here
1258 BufferOwningPtr<char> imageValues;
1259 generate_random_image_data( imageInfo, imageValues, d );
1260
1261 if( gDebugTrace )
1262 {
1263 log_info( " - Creating image %d by %d...\n", (int)imageInfo->width, (int)imageInfo->height );
1264 if( gTestMipmaps )
1265 {
1266 log_info( " - with %d mip levels", (int) imageInfo->num_mip_levels );
1267 }
1268 }
1269
1270 // Construct testing sources
1271 clProtectedImage protImage;
1272 clMemWrapper unprotImage;
1273 cl_mem image;
1274
1275 if(gtestTypesToRun & kReadTests)
1276 {
1277 image_read_write_flags = CL_MEM_READ_ONLY;
1278 }
1279 else
1280 {
1281 image_read_write_flags = CL_MEM_READ_WRITE;
1282 }
1283
1284 if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
1285 {
1286 if (gTestImage2DFromBuffer)
1287 {
1288 generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1289 imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
1290 imageInfo->rowPitch * imageInfo->height, maxImageUseHostPtrBackingStore, &error);
1291 test_error( error, "Unable to create buffer" );
1292 unprotImage = create_image_2d_buffer( context,
1293 image_read_write_flags,
1294 imageInfo->format,
1295 imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1296 imageBuffer, &error );
1297
1298 }
1299 else
1300 {
1301 // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
1302 // Do not use protected images for max image size test since it rounds the row size to a page size
1303 if (gTestMaxImages) {
1304 generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
1305 unprotImage = create_image_2d( context,
1306 image_read_write_flags | CL_MEM_USE_HOST_PTR,
1307 imageInfo->format,
1308 imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1309 maxImageUseHostPtrBackingStore, &error );
1310 }
1311 else
1312 {
1313 error = protImage.Create( context,
1314 image_read_write_flags,
1315 imageInfo->format, imageInfo->width, imageInfo->height );
1316 }
1317 }
1318
1319 if( error != CL_SUCCESS )
1320 {
1321 if (gTestImage2DFromBuffer) {
1322 clReleaseMemObject(imageBuffer);
1323 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1324 log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1325 return 0;
1326 }
1327 }
1328
1329 log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1330 return error;
1331 }
1332
1333 if (gTestMaxImages || gTestImage2DFromBuffer)
1334 image = (cl_mem)unprotImage;
1335 else
1336 image = (cl_mem)protImage;
1337 }
1338 else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
1339 {
1340 if (gTestImage2DFromBuffer)
1341 {
1342 imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1343 imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1344 test_error( error, "Unable to create buffer" );
1345 unprotImage = create_image_2d_buffer( context,
1346 image_read_write_flags,
1347 imageInfo->format,
1348 imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1349 imageBuffer, &error );
1350
1351 }
1352 else
1353 {
1354 // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
1355 unprotImage = create_image_2d( context,
1356 image_read_write_flags | CL_MEM_COPY_HOST_PTR,
1357 imageInfo->format,
1358 imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1359 imageValues, &error );
1360 }
1361 if( error != CL_SUCCESS )
1362 {
1363 if (gTestImage2DFromBuffer) {
1364 clReleaseMemObject(imageBuffer);
1365 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1366 log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1367 return 0;
1368 }
1369 }
1370
1371 log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1372 return error;
1373 }
1374 image = unprotImage;
1375 }
1376 else // Either CL_MEM_ALLOC_HOST_PTR or none
1377 {
1378 if( gTestMipmaps )
1379 {
1380 cl_image_desc image_desc = {0};
1381 image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
1382 image_desc.image_width = imageInfo->width;
1383 image_desc.image_height = imageInfo->height;
1384 image_desc.num_mip_levels = imageInfo->num_mip_levels;
1385 unprotImage = clCreateImage( context, CL_MEM_READ_ONLY, imageInfo->format, &image_desc, NULL, &error);
1386 }
1387 else if (gTestImage2DFromBuffer)
1388 {
1389 imageBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | gMemFlagsToUse,
1390 imageInfo->rowPitch * imageInfo->height, imageValues, &error);
1391 test_error( error, "Unable to create buffer" );
1392 unprotImage = create_image_2d_buffer( context,
1393 image_read_write_flags,
1394 imageInfo->format,
1395 imageInfo->width, imageInfo->height, imageInfo->rowPitch,
1396 imageBuffer, &error );
1397
1398 }
1399 else
1400 {
1401 // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
1402 // it works just as if no flag is specified, so we just do the same thing either way
1403 unprotImage = create_image_2d( context,
1404 image_read_write_flags | gMemFlagsToUse,
1405 imageInfo->format,
1406 imageInfo->width, imageInfo->height, ( gEnablePitch ? imageInfo->rowPitch : 0 ),
1407 imageValues, &error );
1408 }
1409 if( error != CL_SUCCESS )
1410 {
1411 if (gTestImage2DFromBuffer) {
1412 clReleaseMemObject(imageBuffer);
1413 if (error == CL_INVALID_IMAGE_FORMAT_DESCRIPTOR) {
1414 log_info( "Format not supported for cl_khr_image2d_from_buffer skipping...\n" );
1415 return 0;
1416 }
1417 }
1418
1419 log_error( "ERROR: Unable to create 2D image of size %d x %d pitch %d (%s)\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->rowPitch, IGetErrorString( error ) );
1420 return error;
1421 }
1422 image = unprotImage;
1423 }
1424
1425 if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
1426 {
1427 if( gDebugTrace )
1428 log_info( " - Writing image...\n" );
1429
1430 size_t origin[ 3 ] = { 0, 0, 0 };
1431 size_t region[ 3 ] = { imageInfo->width, imageInfo->height, 1 };
1432
1433 if(!gTestMipmaps)
1434 {
1435 error = clEnqueueWriteImage(queue, image, CL_TRUE,
1436 origin, region, ( gEnablePitch ? imageInfo->rowPitch : 0 ), 0,
1437 imageValues, 0, NULL, NULL);
1438 if (error != CL_SUCCESS)
1439 {
1440 log_error( "ERROR: Unable to write to 2D image of size %d x %d\n", (int)imageInfo->width, (int)imageInfo->height );
1441 return error;
1442 }
1443 }
1444 else
1445 {
1446 size_t tmpNextLevelOffset = 0;
1447 for(size_t level = 0; level < imageInfo->num_mip_levels; level++)
1448 {
1449 origin[2] = level;
1450 error = clEnqueueWriteImage(queue, image, CL_TRUE,
1451 origin, region, (( gEnablePitch || gTestImage2DFromBuffer) ? imageInfo->rowPitch : 0 ), 0,
1452 (char*)imageValues + tmpNextLevelOffset, 0, NULL, NULL);
1453 tmpNextLevelOffset += region[0]*region[1]*get_pixel_size(imageInfo->format);
1454 region[0] = (region[0] >> 1) ? (region[0] >> 1) : 1;
1455 region[1] = (region[1] >> 1) ? (region[1] >> 1) : 1;
1456 }
1457 }
1458 }
1459
1460 if( gDebugTrace )
1461 log_info( " - Creating kernel arguments...\n" );
1462
1463 xOffsets =
1464 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
1465 sizeof(cl_float) * imageInfo->width * imageInfo->height,
1466 xOffsetValues, &error);
1467 test_error( error, "Unable to create x offset buffer" );
1468 yOffsets =
1469 clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
1470 sizeof(cl_float) * imageInfo->width * imageInfo->height,
1471 yOffsetValues, &error);
1472 test_error( error, "Unable to create y offset buffer" );
1473 results = clCreateBuffer(context, CL_MEM_READ_WRITE,
1474 get_explicit_type_size(outputType) * 4
1475 * imageInfo->width * imageInfo->height,
1476 NULL, &error);
1477 test_error( error, "Unable to create result buffer" );
1478
1479 // Create sampler to use
1480 actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
1481 test_error(error, "Unable to create image sampler");
1482
1483 // Set arguments
1484 int idx = 0;
1485 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
1486 test_error( error, "Unable to set kernel arguments" );
1487 if( !gUseKernelSamplers )
1488 {
1489 error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
1490 test_error( error, "Unable to set kernel arguments" );
1491 }
1492 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
1493 test_error( error, "Unable to set kernel arguments" );
1494 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
1495 test_error( error, "Unable to set kernel arguments" );
1496 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
1497 test_error( error, "Unable to set kernel arguments" );
1498
1499 // A cast of troublesome offsets. The first one has to be zero.
1500 const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
1501 int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
1502 int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
1503 int loopCount = 2 * float_offset_count;
1504 if( ! useFloatCoords )
1505 loopCount = 1;
1506 if (gTestMaxImages) {
1507 loopCount = 1;
1508 log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
1509 }
1510
1511 if(gtestTypesToRun & kReadWriteTests)
1512 {
1513 loopCount = 1;
1514 }
1515
1516 // Get the maximum absolute error for this format
1517 double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
1518 if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
1519
1520 if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
1521 initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
1522 if (initHalf) {
1523 log_info("Half rounding mode successfully detected.\n");
1524 }
1525 }
1526
1527 size_t nextLevelOffset = 0;
1528 size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
1529 for( size_t lod = 0; (gTestMipmaps && (lod < imageInfo->num_mip_levels))|| (!gTestMipmaps && lod < 1); lod ++)
1530 {
1531 size_t resultValuesSize = width_lod * height_lod * get_explicit_type_size( outputType ) * 4;
1532 BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
1533 float lod_float = (float)lod;
1534 char *imagePtr = (char *)imageValues + nextLevelOffset;
1535 if( gTestMipmaps )
1536 {
1537 if(gDebugTrace)
1538 log_info("\t- Working at mip level %d\n", lod);
1539 error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
1540 }
1541
1542 // Validate results element by element
1543 for( int q = 0; q < loopCount; q++ )
1544 {
1545 float offset = float_offsets[ q % float_offset_count ];
1546
1547 // Init the coordinates
1548 InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues,
1549 q>=float_offset_count ? -offset: offset,
1550 q>=float_offset_count ? offset: -offset, imageSampler->normalized_coords, d, lod );
1551
1552 error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, xOffsetValues, 0, NULL, NULL );
1553 test_error( error, "Unable to write x offsets" );
1554 error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width, yOffsetValues, 0, NULL, NULL );
1555 test_error( error, "Unable to write y offsets" );
1556
1557 // Get results
1558 memset( resultValues, 0xff, resultValuesSize );
1559 clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
1560
1561 // Run the kernel
1562 threads[0] = (size_t)width_lod;
1563 threads[1] = (size_t)height_lod;
1564 error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
1565 test_error( error, "Unable to run kernel" );
1566
1567 if( gDebugTrace )
1568 log_info( " reading results, %ld kbytes\n", (unsigned long)( width_lod * height_lod * get_explicit_type_size( outputType ) * 4 / 1024 ) );
1569
1570 error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL ); //XXX check
1571 test_error( error, "Unable to read results from kernel" );
1572 if( gDebugTrace )
1573 log_info( " results read\n" );
1574
1575 int retCode;
1576 switch (imageInfo->format->image_channel_order) {
1577 case CL_DEPTH:
1578 retCode = validate_image_2D_depth_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1579 break;
1580 case CL_sRGB:
1581 case CL_sRGBx:
1582 case CL_sRGBA:
1583 case CL_sBGRA:
1584 retCode = validate_image_2D_sRGB_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1585 break;
1586 default:
1587 retCode = validate_image_2D_results((char*)imageValues + nextLevelOffset, resultValues, formatAbsoluteError, xOffsetValues, yOffsetValues, outputType, numTries, numClamped, imageSampler, imageInfo, lod, imagePtr);
1588 }
1589 if (retCode)
1590 return retCode;
1591 }
1592 end:
1593 if ( gTestMipmaps )
1594 {
1595 nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format );
1596 width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
1597 height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
1598 }
1599 }
1600
1601 if (gTestImage2DFromBuffer) clReleaseMemObject(imageBuffer);
1602
1603 return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
1604 }
1605
test_read_image_set_2D(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,image_sampler_data * imageSampler,bool floatCoords,ExplicitType outputType)1606 int test_read_image_set_2D(cl_device_id device, cl_context context,
1607 cl_command_queue queue,
1608 const cl_image_format *format,
1609 image_sampler_data *imageSampler, bool floatCoords,
1610 ExplicitType outputType)
1611 {
1612 char programSrc[10240];
1613 const char *ptr;
1614 const char *readFormat;
1615 clProgramWrapper program;
1616 clKernelWrapper kernel;
1617 const char *KernelSourcePattern = NULL;
1618
1619 if (gTestImage2DFromBuffer)
1620 {
1621 if (format->image_channel_order == CL_RGB || format->image_channel_order == CL_RGBx)
1622 {
1623 switch (format->image_channel_data_type)
1624 {
1625 case CL_UNORM_INT8:
1626 case CL_UNORM_INT16:
1627 case CL_SNORM_INT8:
1628 case CL_SNORM_INT16:
1629 case CL_HALF_FLOAT:
1630 case CL_FLOAT:
1631 case CL_SIGNED_INT8:
1632 case CL_SIGNED_INT16:
1633 case CL_SIGNED_INT32:
1634 case CL_UNSIGNED_INT8:
1635 case CL_UNSIGNED_INT16:
1636 case CL_UNSIGNED_INT32:
1637 log_info( "Skipping image format: %s %s\n", GetChannelOrderName( format->image_channel_order ),
1638 GetChannelTypeName( format->image_channel_data_type ));
1639 return 0;
1640 default:
1641 break;
1642 }
1643 }
1644 }
1645
1646
1647 RandomSeed seed( gRandomSeed );
1648 int error;
1649
1650 // Get our operating params
1651 size_t maxWidth, maxHeight;
1652 cl_ulong maxAllocSize, memSize;
1653 image_descriptor imageInfo = { 0x0 };
1654 size_t pixelSize;
1655
1656 imageInfo.format = format;
1657 imageInfo.depth = imageInfo.arraySize = imageInfo.slicePitch = 0;
1658 imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
1659 pixelSize = get_pixel_size( imageInfo.format );
1660
1661 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
1662 error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
1663 error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
1664 error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
1665 test_error( error, "Unable to get max image 2D size from device" );
1666
1667 if (memSize > (cl_ulong)SIZE_MAX) {
1668 memSize = (cl_ulong)SIZE_MAX;
1669 }
1670
1671 // Determine types
1672 if( outputType == kInt )
1673 readFormat = "i";
1674 else if( outputType == kUInt )
1675 readFormat = "ui";
1676 else // kFloat
1677 readFormat = "f";
1678
1679 // Construct the source
1680 const char *samplerArg = samplerKernelArg;
1681 char samplerVar[ 1024 ] = "";
1682 if( gUseKernelSamplers )
1683 {
1684 get_sampler_kernel_code( imageSampler, samplerVar );
1685 samplerArg = "";
1686 }
1687
1688 if(gtestTypesToRun & kReadTests)
1689 {
1690 KernelSourcePattern = read2DKernelSourcePattern;
1691 }
1692 else
1693 {
1694 KernelSourcePattern = read_write2DKernelSourcePattern;
1695 }
1696
1697
1698 sprintf(programSrc, KernelSourcePattern,
1699 gTestMipmaps
1700 ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
1701 : "",
1702 (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t"
1703 : "image2d_t",
1704 samplerArg, get_explicit_type_name(outputType),
1705 (format->image_channel_order == CL_DEPTH) ? "" : "4",
1706 gTestMipmaps ? ", float lod" : " ", samplerVar,
1707 gTestMipmaps ? lodOffsetSource : offsetSource,
1708 floatCoords ? floatKernelSource : intCoordKernelSource, readFormat,
1709 gTestMipmaps ? ", lod" : " ");
1710
1711 ptr = programSrc;
1712 error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
1713 "sample_kernel");
1714 test_error( error, "Unable to create testing kernel" );
1715
1716 if( gTestSmallImages )
1717 {
1718 for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
1719 {
1720 imageInfo.rowPitch = imageInfo.width * pixelSize;
1721 for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
1722 {
1723 if( gTestMipmaps )
1724 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1725
1726 if( gDebugTrace )
1727 log_info( " at size %d,%d\n", (int)imageInfo.width, (int)imageInfo.height );
1728
1729 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1730 if( retCode )
1731 return retCode;
1732 }
1733 }
1734 }
1735 else if( gTestMaxImages )
1736 {
1737 // Try a specific set of maximum sizes
1738 size_t numbeOfSizes;
1739 size_t sizes[100][3];
1740
1741 get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, 1, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D, imageInfo.format, CL_TRUE);
1742
1743 for( size_t idx = 0; idx < numbeOfSizes; idx++ )
1744 {
1745 imageInfo.width = sizes[ idx ][ 0 ];
1746 imageInfo.height = sizes[ idx ][ 1 ];
1747 imageInfo.rowPitch = imageInfo.width * pixelSize;
1748 log_info("Testing %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ]);
1749
1750 if( gTestMipmaps )
1751 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1752
1753 if( gDebugTrace )
1754 log_info( " at max size %d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ] );
1755 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1756 if( retCode )
1757 return retCode;
1758 }
1759 }
1760 else if( gTestRounding )
1761 {
1762 uint64_t typeRange = 1LL << ( get_format_type_size( imageInfo.format ) * 8 );
1763 typeRange /= pixelSize / get_format_type_size( imageInfo.format );
1764 imageInfo.height = (size_t)( ( typeRange + 255LL ) / 256LL );
1765 imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
1766 while( imageInfo.height >= maxHeight / 2 )
1767 {
1768 imageInfo.width <<= 1;
1769 imageInfo.height >>= 1;
1770 }
1771
1772 while( imageInfo.width >= maxWidth / 2 )
1773 imageInfo.width >>= 1;
1774 imageInfo.rowPitch = imageInfo.width * pixelSize;
1775
1776 gRoundingStartValue = 0;
1777 do
1778 {
1779 if( gDebugTrace )
1780 log_info( " at size %d,%d, starting round ramp at %llu for range %llu\n", (int)imageInfo.width, (int)imageInfo.height, gRoundingStartValue, typeRange );
1781 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1782 if( retCode )
1783 return retCode;
1784
1785 gRoundingStartValue += imageInfo.width * imageInfo.height * pixelSize / get_format_type_size( imageInfo.format );
1786
1787 } while( gRoundingStartValue < typeRange );
1788 }
1789 else
1790 {
1791 cl_uint imagePitchAlign = 0;
1792 if (gTestImage2DFromBuffer)
1793 {
1794 #if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT)
1795 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, sizeof( cl_uint ), &imagePitchAlign, NULL );
1796 #endif
1797 if (!imagePitchAlign || error) {
1798 test_error( error, "Unable to get CL_DEVICE_IMAGE_PITCH_ALIGNMENT from device" );
1799 imagePitchAlign = 1;
1800 }
1801 }
1802
1803 int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
1804 int maxHeightRange = (int) reduceImageSizeRange(maxHeight);
1805
1806 for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
1807 {
1808 cl_ulong size;
1809 // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
1810 // image, the result array, plus offset arrays, will fit in the global ram space
1811 do
1812 {
1813 imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, seed );
1814 imageInfo.height = (size_t)random_log_in_range( 16, maxHeightRange, seed );
1815
1816 imageInfo.rowPitch = imageInfo.width * pixelSize;
1817 if( gTestMipmaps )
1818 {
1819 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1820 size = 4 * compute_mipmapped_image_size(imageInfo);
1821 }
1822 else
1823 {
1824 if( gEnablePitch )
1825 {
1826 size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
1827 imageInfo.rowPitch += extraWidth * pixelSize;
1828 }
1829
1830 // if we are creating a 2D image from a buffer, make sure that the rowpitch is aligned to CL_DEVICE_IMAGE_PITCH_ALIGNMENT_APPLE
1831 if (gTestImage2DFromBuffer)
1832 {
1833 size_t pitch = imagePitchAlign * pixelSize;
1834 imageInfo.rowPitch = ((imageInfo.rowPitch + pitch - 1) / pitch ) * pitch;
1835 }
1836
1837 size = (size_t)imageInfo.rowPitch * (size_t)imageInfo.height * 4;
1838 }
1839 } while( size > maxAllocSize || ( size * 3 ) > memSize );
1840
1841 if( gDebugTrace )
1842 log_info( " at size %d,%d (row pitch %d) out of %d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.rowPitch, (int)maxWidth, (int)maxHeight );
1843 int retCode = test_read_image_2D( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1844 if( retCode )
1845 return retCode;
1846 }
1847 }
1848
1849 return 0;
1850 }
1851