1 //
2 // Copyright (c) 2017, 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "test_common.h"
17 #include <float.h>
18
19 #include <algorithm>
20
21 // Utility function to clamp down image sizes for certain tests to avoid
22 // using too much memory.
reduceImageSizeRange(size_t maxDimSize)23 static size_t reduceImageSizeRange(size_t maxDimSize) {
24 size_t DimSize = maxDimSize/128;
25 if (DimSize < (size_t) 16)
26 return 16;
27 else if (DimSize > (size_t) 64)
28 return 64;
29 else
30 return DimSize;
31 }
32
reduceImageDepth(size_t maxDepth)33 static size_t reduceImageDepth(size_t maxDepth) {
34 size_t Depth = maxDepth/32;
35 if (Depth < (size_t) 8)
36 return 8;
37 else if (Depth > (size_t) 32)
38 return 32;
39 else
40 return Depth;
41 }
42
43 const char *read2DArrayKernelSourcePattern =
44 "%s\n"
45 "__kernel void sample_kernel( read_only %s input,%s __global float "
46 "*xOffsets, __global float *yOffsets, __global float *zOffsets, __global "
47 "%s%s *results %s )\n"
48 "{\n"
49 "%s"
50 " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
51 "get_global_id(2);\n"
52 "%s"
53 "%s"
54 " results[offset] = read_image%s( input, imageSampler, coords %s);\n"
55 "}";
56
57 const char *read_write2DArrayKernelSourcePattern =
58 "%s\n"
59 "__kernel void sample_kernel( read_write %s input,%s __global float "
60 "*xOffsets, __global float *yOffsets, __global float *zOffsets, __global "
61 "%s%s *results %s)\n"
62 "{\n"
63 "%s"
64 " int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
65 "get_global_id(2);\n"
66 "%s"
67 "%s"
68 " results[offset] = read_image%s( input, coords %s);\n"
69 "}";
70
71 const char* offset2DarraySource =" int offset = tidZ*get_image_width(input)*get_image_height(input) + tidY*get_image_width(input) + tidX;\n";
72 const char* offset2DarraySourceLod =
73 " int lod_int = (int)lod;\n"
74 " int width_lod, height_lod;\n"
75 " width_lod = (get_image_width(input) >> lod_int ) ? (get_image_width(input) >> lod_int ) : 1;\n"
76 " height_lod = (get_image_height(input) >> lod_int ) ? (get_image_height(input) >> lod_int ) : 1;\n"
77 " int offset = tidZ*width_lod*height_lod + tidY*width_lod + tidX;\n";
78
79 const char *int2DArrayCoordKernelSource =
80 " int4 coords = (int4)( (int) xOffsets[offset], (int) yOffsets[offset], (int) zOffsets[offset], 0 );\n";
81
82 const char *float2DArrayUnnormalizedCoordKernelSource =
83 " float4 coords = (float4)( xOffsets[offset], yOffsets[offset], zOffsets[offset], 0.0f );\n";
84
85
86 static const char *samplerKernelArg = " sampler_t imageSampler,";
87
determine_validation_error_offset_2D_array(void * imagePtr,image_descriptor * imageInfo,image_sampler_data * imageSampler,T * resultPtr,T * expected,float error,float x,float y,float z,float xAddressOffset,float yAddressOffset,float zAddressOffset,size_t j,int & numTries,int & numClamped,bool printAsFloat,int lod)88 template <class T> int determine_validation_error_offset_2D_array( void *imagePtr, image_descriptor *imageInfo, image_sampler_data *imageSampler,
89 T *resultPtr, T * expected, float error,
90 float x, float y, float z, float xAddressOffset, float yAddressOffset, float zAddressOffset, size_t j, int &numTries, int &numClamped, bool printAsFloat, int lod )
91 {
92 int actualX, actualY, actualZ;
93 int found = debug_find_pixel_in_image( imagePtr, imageInfo, resultPtr, &actualX, &actualY, &actualZ, lod );
94 bool clampingErr = false, clamped = false, otherClampingBug = false;
95 int clampedX, clampedY, clampedZ;
96
97 size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height, imageDepth = imageInfo->arraySize;
98
99 clamped = get_integer_coords_offset( x, y, z, xAddressOffset, yAddressOffset, zAddressOffset, imageWidth, imageHeight, imageDepth, imageSampler, imageInfo, clampedX, clampedY, clampedZ );
100
101 if( found )
102 {
103 // Is it a clamping bug?
104 if( clamped && clampedX == actualX && clampedY == actualY && clampedZ == actualZ )
105 {
106 if( (--numClamped) == 0 )
107 {
108 log_error( "\nERROR: TEST FAILED! Read is erroneously clamping coordinates!\n" );
109 if( printAsFloat )
110 {
111 log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n"
112 " Expected (%g,%g,%g,%g)\n"
113 " Observed (%g,%g,%g,%g)\n"
114 " error of %g\n",
115 j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
116 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
117 }
118 else
119 {
120 log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n"
121 " Expected (%x,%x,%x,%x)\n"
122 " Observed (%x,%x,%x,%x)\n",
123 j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
124 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
125 }
126
127 if( imageSampler->filter_mode != CL_FILTER_LINEAR )
128 {
129 if( found )
130 log_error( "\tValue really found in image at %d,%d,%d (%s)\n", actualX, actualY, actualZ, ( found > 1 ) ? "NOT unique!!" : "unique" );
131 else
132 log_error( "\tValue not actually found in image\n" );
133 }
134 log_error( "\n" );
135
136 return -1;
137 }
138 clampingErr = true;
139 otherClampingBug = true;
140 }
141 }
142 if( clamped && !otherClampingBug )
143 {
144 // If we are in clamp-to-edge mode and we're getting zeroes, it's possible we're getting border erroneously
145 if( resultPtr[ 0 ] == 0 && resultPtr[ 1 ] == 0 && resultPtr[ 2 ] == 0 && resultPtr[ 3 ] == 0 )
146 {
147 if( (--numClamped) == 0 )
148 {
149 log_error( "\nERROR: TEST FAILED: Clamping is erroneously returning border color!\n" );
150 if( printAsFloat )
151 {
152 log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n"
153 " Expected (%g,%g,%g,%g)\n"
154 " Observed (%g,%g,%g,%g)\n"
155 " error of %g\n",
156 j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
157 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
158 }
159 else
160 {
161 log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n"
162 " Expected (%x,%x,%x,%x)\n"
163 " Observed (%x,%x,%x,%x)\n",
164 j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
165 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
166 }
167 return -1;
168 }
169 clampingErr = true;
170 }
171 }
172 if( !clampingErr )
173 {
174 if( true ) // gExtraValidateInfo )
175 {
176 if( printAsFloat )
177 {
178 log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n"
179 " Expected (%g,%g,%g,%g)\n"
180 " Observed (%g,%g,%g,%g)\n"
181 " error of %g\n",
182 j, x, x, y, y, z, z, (float)expected[ 0 ], (float)expected[ 1 ], (float)expected[ 2 ], (float)expected[ 3 ],
183 (float)resultPtr[ 0 ], (float)resultPtr[ 1 ], (float)resultPtr[ 2 ], (float)resultPtr[ 3 ], error );
184 }
185 else
186 {
187 log_error( "Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not validate!\n"
188 " Expected (%x,%x,%x,%x)\n"
189 " Observed (%x,%x,%x,%x)\n",
190 j, x, x, y, y, z, z, (int)expected[ 0 ], (int)expected[ 1 ], (int)expected[ 2 ], (int)expected[ 3 ],
191 (int)resultPtr[ 0 ], (int)resultPtr[ 1 ], (int)resultPtr[ 2 ], (int)resultPtr[ 3 ] );
192 }
193 log_error( "Integer coords resolve to %d,%d,%d, image size = %d,%d,%d\n", clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight, (int)imageDepth );
194
195 if( printAsFloat && gExtraValidateInfo )
196 {
197 log_error( "\nNearby values:\n" );
198 for( int zOff = -1; zOff <= 1; zOff++ )
199 {
200 for( int yOff = -1; yOff <= 1; yOff++ )
201 {
202 float top[ 4 ], real[ 4 ], bot[ 4 ];
203 read_image_pixel_float( imagePtr, imageInfo, clampedX - 1 , clampedY + yOff, clampedZ + zOff, top );
204 read_image_pixel_float( imagePtr, imageInfo, clampedX ,clampedY + yOff, clampedZ + zOff, real );
205 read_image_pixel_float( imagePtr, imageInfo, clampedX + 1, clampedY + yOff, clampedZ + zOff, bot );
206 log_error( "\t(%g,%g,%g,%g)",top[0], top[1], top[2], top[3] );
207 log_error( " (%g,%g,%g,%g)", real[0], real[1], real[2], real[3] );
208 log_error( " (%g,%g,%g,%g)\n",bot[0], bot[1], bot[2], bot[3] );
209 }
210 }
211 }
212 if( imageSampler->filter_mode != CL_FILTER_LINEAR )
213 {
214 if( found )
215 log_error( "Value really found in image at %d,%d,%d (%s)\n", actualX, actualY, actualZ, ( found > 1 ) ? "NOT unique!!" : "unique" );
216 else
217 log_error( "Value not actually found in image\n" );
218 }
219 log_error( "\n" );
220 }
221
222 numClamped = -1; // We force the clamped counter to never work
223 if( ( --numTries ) == 0 )
224 return -1;
225 }
226 return 0;
227 }
228
InitFloatCoords(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float * zOffsets,float xfract,float yfract,float zfract,int normalized_coords,MTdata d,int lod)229 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float *zOffsets, float xfract, float yfract, float zfract, int normalized_coords, MTdata d , int lod)
230 {
231 size_t i = 0;
232 size_t width_lod = imageInfo->width;
233 size_t height_lod = imageInfo->height;
234 if(gTestMipmaps)
235 {
236 width_lod = ( imageInfo->width >> lod) ?( imageInfo->width >> lod) : 1;
237 height_lod = ( imageInfo->height >> lod) ?( imageInfo->height >> lod) : 1;
238
239 }
240 if( gDisableOffsets )
241 {
242 for( size_t z = 0; z < imageInfo->arraySize; z++ )
243 {
244 for( size_t y = 0; y < height_lod; y++ )
245 {
246 for( size_t x = 0; x < width_lod; x++, i++ )
247 {
248 xOffsets[ i ] = (float) (xfract + (double) x);
249 yOffsets[ i ] = (float) (yfract + (double) y);
250 zOffsets[ i ] = (float) (zfract + (double) z);
251 }
252 }
253 }
254 }
255 else
256 {
257 for( size_t z = 0; z < imageInfo->arraySize; z++ )
258 {
259 for( size_t y = 0; y < height_lod; y++ )
260 {
261 for( size_t x = 0; x < width_lod; x++, i++ )
262 {
263 xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
264 yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
265 zOffsets[ i ] = (float) (zfract + (double) ((int) z + random_in_range( -10, 10, d )));
266 }
267 }
268 }
269 }
270
271 if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
272 {
273 i = 0;
274 for( size_t z = 0; z < imageInfo->arraySize; z++ )
275 {
276 for( size_t y = 0; y < height_lod; y++ )
277 {
278 for( size_t x = 0; x < width_lod; x++, i++ )
279 {
280 xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) width_lod - 1.0);
281 yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double) height_lod - 1.0);
282 zOffsets[ i ] = (float) CLAMP( (double) zOffsets[ i ], 0.0, (double) imageInfo->arraySize - 1.0);
283 }
284 }
285 }
286 }
287
288 if( normalized_coords )
289 {
290 i = 0;
291 for( size_t z = 0; z < imageInfo->arraySize; z++ )
292 {
293 for( size_t y = 0; y < height_lod; y++ )
294 {
295 for( size_t x = 0; x < width_lod; x++, i++ )
296 {
297 xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) width_lod);
298 yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) height_lod);
299 }
300 }
301 }
302 }
303 }
304
test_read_image_2D_array(cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,image_sampler_data * imageSampler,bool useFloatCoords,ExplicitType outputType,MTdata d)305 int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_kernel kernel,
306 image_descriptor *imageInfo, image_sampler_data *imageSampler,
307 bool useFloatCoords, ExplicitType outputType, MTdata d )
308 {
309 int error;
310 size_t threads[3];
311 static int initHalf = 0;
312 cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
313
314 clMemWrapper xOffsets, yOffsets, zOffsets, results;
315 clSamplerWrapper actualSampler;
316 BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
317
318 // Create offset data
319 BufferOwningPtr<cl_float> xOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->arraySize));
320 BufferOwningPtr<cl_float> yOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->arraySize));
321 BufferOwningPtr<cl_float> zOffsetValues(malloc(sizeof(cl_float) *imageInfo->width * imageInfo->height * imageInfo->arraySize));
322
323 BufferOwningPtr<char> imageValues;
324 generate_random_image_data( imageInfo, imageValues, d );
325
326 // Construct testing sources
327 clProtectedImage protImage;
328 clMemWrapper unprotImage;
329 cl_mem image;
330
331 if(gtestTypesToRun & kReadTests)
332 {
333 image_read_write_flags = CL_MEM_READ_ONLY;
334 }
335 else
336 {
337 image_read_write_flags = CL_MEM_READ_WRITE;
338 }
339
340 if( gMemFlagsToUse == CL_MEM_USE_HOST_PTR )
341 {
342 // clProtectedImage uses USE_HOST_PTR, so just rely on that for the testing (via Ian)
343 // Do not use protected images for max image size test since it rounds the row size to a page size
344 if (gTestMaxImages) {
345 generate_random_image_data( imageInfo, maxImageUseHostPtrBackingStore, d );
346 unprotImage = create_image_2d_array( context,
347 image_read_write_flags | CL_MEM_USE_HOST_PTR,
348 imageInfo->format,
349 imageInfo->width, imageInfo->height,
350 imageInfo->arraySize,
351 ( gEnablePitch ? imageInfo->rowPitch : 0 ),
352 ( gEnablePitch ? imageInfo->slicePitch : 0 ),
353 maxImageUseHostPtrBackingStore, &error );
354 } else {
355 error = protImage.Create( context, CL_MEM_OBJECT_IMAGE2D_ARRAY,
356 image_read_write_flags,
357 imageInfo->format, imageInfo->width, imageInfo->height, 1, imageInfo->arraySize );
358 }
359 if( error != CL_SUCCESS )
360 {
361 log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
362 return error;
363 }
364 if (gTestMaxImages)
365 image = (cl_mem)unprotImage;
366 else
367 image = (cl_mem)protImage;
368 }
369 else if( gMemFlagsToUse == CL_MEM_COPY_HOST_PTR )
370 {
371 // Don't use clEnqueueWriteImage; just use copy host ptr to get the data in
372 unprotImage = create_image_2d_array( context,
373 image_read_write_flags | CL_MEM_COPY_HOST_PTR,
374 imageInfo->format,
375 imageInfo->width,
376 imageInfo->height,
377 imageInfo->arraySize,
378 ( gEnablePitch ? imageInfo->rowPitch : 0 ),
379 ( gEnablePitch ? imageInfo->slicePitch : 0 ),
380 imageValues, &error );
381 if( error != CL_SUCCESS )
382 {
383 log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
384 return error;
385 }
386 image = unprotImage;
387 }
388 else // Either CL_MEM_ALLOC_HOST_PTR or none
389 {
390 if ( gTestMipmaps )
391 {
392 cl_image_desc image_desc = {0};
393 image_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
394 image_desc.image_width = imageInfo->width;
395 image_desc.image_height = imageInfo->height;
396 image_desc.image_array_size = imageInfo->arraySize;
397 //image_desc.image_rowPitch = imageInfo->rowPitch;
398 //image_desc.image_slicePitch = imageInfo->slicePitch;
399 image_desc.num_mip_levels = imageInfo->num_mip_levels;
400
401 unprotImage = clCreateImage( context,
402 image_read_write_flags,
403 imageInfo->format, &image_desc, NULL, &error);
404 if( error != CL_SUCCESS )
405 {
406 log_error( "ERROR: Unable to create %d level mipmapped 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
407 return error;
408 }
409 }
410 else
411 {
412 // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can be accessed by the host, but otherwise
413 // it works just as if no flag is specified, so we just do the same thing either way
414 unprotImage = create_image_2d_array( context,
415 image_read_write_flags | gMemFlagsToUse,
416 imageInfo->format,
417 imageInfo->width, imageInfo->height, imageInfo->arraySize,
418 ( gEnablePitch ? imageInfo->rowPitch : 0 ),
419 ( gEnablePitch ? imageInfo->slicePitch : 0 ),
420 imageValues, &error );
421 if( error != CL_SUCCESS )
422 {
423 log_error( "ERROR: Unable to create 2D image array of size %d x %d x %d (pitch %d, %d ) (%s)", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize, (int)imageInfo->rowPitch, (int)imageInfo->slicePitch, IGetErrorString( error ) );
424 return error;
425 }
426 }
427 image = unprotImage;
428 }
429
430 if( gMemFlagsToUse != CL_MEM_COPY_HOST_PTR )
431 {
432 if( gDebugTrace )
433 log_info( " - Writing image...\n" );
434
435 size_t origin[ 4 ] = { 0, 0, 0, 0 };
436 size_t region[ 3 ] = { imageInfo->width, imageInfo->height, imageInfo->arraySize };
437 size_t tmpNextLevelOffset = 0;
438
439 if( gTestMipmaps )
440 {
441 for(int level = 0; level < imageInfo->num_mip_levels; level++)
442 {
443 origin[3] = level;
444 error = clEnqueueWriteImage(queue, image, CL_TRUE,
445 origin, region, /*gEnablePitch ? imageInfo->rowPitch :*/ 0, /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
446 imageValues + tmpNextLevelOffset, 0, NULL, NULL);
447 if (error != CL_SUCCESS)
448 {
449 log_error( "ERROR: Unable to write to level %d of 2D image array of size %d x %d x %d\n", (int)imageInfo->num_mip_levels, (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize );
450 return error;
451 }
452
453 tmpNextLevelOffset += region[0]*region[1]*region[2]*get_pixel_size(imageInfo->format);
454 region[0] = ( region[0] >> 1 ) ? ( region[0] >> 1 ) : 1;
455 region[1] = ( region[1] >> 1 ) ? ( region[1] >> 1 ) : 1;
456 }
457 }
458 else
459 {
460 error = clEnqueueWriteImage(queue, image, CL_TRUE,
461 origin, region, gEnablePitch ? imageInfo->rowPitch : 0, gEnablePitch ? imageInfo->slicePitch : 0,
462 imageValues, 0, NULL, NULL);
463 if (error != CL_SUCCESS)
464 {
465 log_error( "ERROR: Unable to write to 2D image array of size %d x %d x %d\n", (int)imageInfo->width, (int)imageInfo->height, (int)imageInfo->arraySize );
466 return error;
467 }
468 }
469 }
470
471 xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
472 sizeof(cl_float) * imageInfo->width
473 * imageInfo->height * imageInfo->arraySize,
474 xOffsetValues, &error);
475 test_error( error, "Unable to create x offset buffer" );
476 yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
477 sizeof(cl_float) * imageInfo->width
478 * imageInfo->height * imageInfo->arraySize,
479 yOffsetValues, &error);
480 test_error( error, "Unable to create y offset buffer" );
481 zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
482 sizeof(cl_float) * imageInfo->width
483 * imageInfo->height * imageInfo->arraySize,
484 zOffsetValues, &error);
485 test_error( error, "Unable to create y offset buffer" );
486 results =
487 clCreateBuffer(context, CL_MEM_READ_WRITE,
488 get_explicit_type_size(outputType) * 4 * imageInfo->width
489 * imageInfo->height * imageInfo->arraySize,
490 NULL, &error);
491 test_error( error, "Unable to create result buffer" );
492
493 // Create sampler to use
494 actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
495 test_error(error, "Unable to create image sampler");
496
497 // Set arguments
498 int idx = 0;
499 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &image );
500 test_error( error, "Unable to set kernel arguments" );
501 if( !gUseKernelSamplers )
502 {
503 error = clSetKernelArg( kernel, idx++, sizeof( cl_sampler ), &actualSampler );
504 test_error( error, "Unable to set kernel arguments" );
505 }
506 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &xOffsets );
507 test_error( error, "Unable to set kernel arguments" );
508 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &yOffsets );
509 test_error( error, "Unable to set kernel arguments" );
510 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &zOffsets );
511 test_error( error, "Unable to set kernel arguments" );
512 error = clSetKernelArg( kernel, idx++, sizeof( cl_mem ), &results );
513 test_error( error, "Unable to set kernel arguments" );
514
515 const float float_offsets[] = { 0.0f, MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30), 0.25f, 0.3f, 0.5f - FLT_EPSILON/4.0f, 0.5f, 0.9f, 1.0f - FLT_EPSILON/2 };
516 int float_offset_count = sizeof( float_offsets) / sizeof( float_offsets[0] );
517 int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
518 int loopCount = 2 * float_offset_count;
519 if( ! useFloatCoords )
520 loopCount = 1;
521 if (gTestMaxImages) {
522 loopCount = 1;
523 log_info("Testing each size only once with pixel offsets of %g for max sized images.\n", float_offsets[0]);
524 }
525
526 // Get the maximum absolute error for this format
527 double formatAbsoluteError = get_max_absolute_error(imageInfo->format, imageSampler);
528 if (gDebugTrace) log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
529
530 if (0 == initHalf && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT ) {
531 initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode( queue );
532 if (initHalf) {
533 log_info("Half rounding mode successfully detected.\n");
534 }
535 }
536 size_t nextLevelOffset = 0;
537 size_t width_lod = imageInfo->width, height_lod = imageInfo->height;
538 for( size_t lod = 0; (gTestMipmaps && (lod < imageInfo->num_mip_levels))|| (!gTestMipmaps && lod < 1); lod ++)
539 {
540 size_t resultValuesSize = width_lod * height_lod * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4;
541 BufferOwningPtr<char> resultValues(malloc( resultValuesSize ));
542 float lod_float = (float)lod;
543 if( gTestMipmaps )
544 {
545 if(gDebugTrace)
546 log_info(" - Working at mip level %d\n", lod);
547 error = clSetKernelArg( kernel, idx, sizeof(float), &lod_float);
548 }
549 for( int q = 0; q < loopCount; q++ )
550 {
551 float offset = float_offsets[ q % float_offset_count ];
552
553 // Init the coordinates
554 InitFloatCoords( imageInfo, imageSampler, xOffsetValues, yOffsetValues, zOffsetValues,
555 q>=float_offset_count ? -offset: offset,
556 q>=float_offset_count ? offset: -offset,
557 q>=float_offset_count ? -offset: offset,
558 imageSampler->normalized_coords, d, lod );
559
560 error = clEnqueueWriteBuffer( queue, xOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->arraySize, xOffsetValues, 0, NULL, NULL );
561 test_error( error, "Unable to write x offsets" );
562 error = clEnqueueWriteBuffer( queue, yOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->arraySize, yOffsetValues, 0, NULL, NULL );
563 test_error( error, "Unable to write y offsets" );
564 error = clEnqueueWriteBuffer( queue, zOffsets, CL_TRUE, 0, sizeof(cl_float) * imageInfo->height * imageInfo->width * imageInfo->arraySize, zOffsetValues, 0, NULL, NULL );
565 test_error( error, "Unable to write z offsets" );
566
567
568 memset( resultValues, 0xff, resultValuesSize );
569 clEnqueueWriteBuffer( queue, results, CL_TRUE, 0, resultValuesSize, resultValues, 0, NULL, NULL );
570
571 // Figure out thread dimensions
572 threads[0] = (size_t)width_lod;
573 threads[1] = (size_t)height_lod;
574 threads[2] = (size_t)imageInfo->arraySize;
575
576 // Run the kernel
577 error = clEnqueueNDRangeKernel( queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL );
578 test_error( error, "Unable to run kernel" );
579
580 // Get results
581 error = clEnqueueReadBuffer( queue, results, CL_TRUE, 0, width_lod * height_lod * imageInfo->arraySize * get_explicit_type_size( outputType ) * 4, resultValues, 0, NULL, NULL );
582 test_error( error, "Unable to read results from kernel" );
583 if( gDebugTrace )
584 log_info( " results read\n" );
585
586 // Validate results element by element
587 char *imagePtr = (char *)imageValues + nextLevelOffset;
588
589 if((imageInfo->format->image_channel_order == CL_DEPTH) && (outputType == kFloat) )
590 {
591 // Validate float results
592 float *resultPtr = (float *)(char *)resultValues;
593 float expected[4], error=0.0f;
594 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
595
596 for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ )
597 {
598 for( size_t y = 0; y < height_lod; y++ )
599 {
600 for( size_t x = 0; x < width_lod; x++, j++ )
601 {
602 // Step 1: go through and see if the results verify for the pixel
603 // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
604 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
605 int checkOnlyOnePixel = 0;
606 int found_pixel = 0;
607 float offset = NORM_OFFSET;
608 if (!imageSampler->normalized_coords
609 || imageSampler->filter_mode != CL_FILTER_NEAREST
610 || NORM_OFFSET == 0
611 #if defined( __APPLE__ )
612 // Apple requires its CPU implementation to do
613 // correctly rounded address arithmetic in all modes
614 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
615 #endif
616 )
617 offset = 0.0f; // Loop only once
618
619 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
620 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) {
621 for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) {
622
623 int hasDenormals = 0;
624 FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
625 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
626 norm_offset_x, norm_offset_y, norm_offset_z,
627 imageSampler, expected, 0, &hasDenormals, lod );
628
629 float err1 =
630 ABS_ERROR(resultPtr[0], expected[0]);
631 // Clamp to the minimum absolute error for the format
632 if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
633 float maxErr1 = std::max(
634 maxErr * maxPixel.p[0], FLT_MIN);
635
636 if( ! (err1 <= maxErr1) )
637 {
638 // Try flushing the denormals
639 if( hasDenormals )
640 {
641 // If implementation decide to flush subnormals to zero,
642 // max error needs to be adjusted
643 maxErr1 += 4 * FLT_MIN;
644
645 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
646 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
647 norm_offset_x, norm_offset_y, norm_offset_z,
648 imageSampler, expected, 0, NULL, lod );
649
650 err1 = ABS_ERROR(resultPtr[0],
651 expected[0]);
652 }
653 }
654
655 found_pixel = (err1 <= maxErr1);
656 }//norm_offset_z
657 }//norm_offset_y
658 }//norm_offset_x
659
660 // Step 2: If we did not find a match, then print out debugging info.
661 if (!found_pixel) {
662 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
663 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
664 checkOnlyOnePixel = 0;
665 int shouldReturn = 0;
666 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
667 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
668 for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
669
670 int hasDenormals = 0;
671 FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
672 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
673 norm_offset_x, norm_offset_y, norm_offset_z,
674 imageSampler, expected, 0, &hasDenormals, lod );
675
676 float err1 = ABS_ERROR(resultPtr[0],
677 expected[0]);
678 float maxErr1 = std::max(
679 maxErr * maxPixel.p[0], FLT_MIN);
680
681
682 if( ! (err1 <= maxErr1) )
683 {
684 // Try flushing the denormals
685 if( hasDenormals )
686 {
687 maxErr1 += 4 * FLT_MIN;
688
689 maxPixel = sample_image_pixel_float( imagePtr, imageInfo,
690 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
691 imageSampler, expected, 0, NULL, lod );
692
693 err1 = ABS_ERROR(resultPtr[0],
694 expected[0]);
695 }
696 }
697
698 if( ! (err1 <= maxErr1) )
699 {
700 log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
701
702 float tempOut[4];
703 shouldReturn |= determine_validation_error_offset_2D_array<float>( imagePtr, imageInfo, imageSampler, resultPtr,
704 expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
705 norm_offset_x, norm_offset_y, norm_offset_z, j,
706 numTries, numClamped, true, lod );
707 log_error( "Step by step:\n" );
708 FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
709 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
710 norm_offset_x, norm_offset_y, norm_offset_z,
711 imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod);
712 log_error( "\tulps: %2.2f (max allowed: %2.2f)\n\n",
713 Ulp_Error( resultPtr[0], expected[0] ),
714 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
715 } else {
716 log_error("Test error: we should have detected this passing above.\n");
717 }
718 }//norm_offset_z
719 }//norm_offset_y
720 }//norm_offset_x
721 if( shouldReturn )
722 return 1;
723 } // if (!found_pixel)
724
725 resultPtr += 1;
726 }
727 }
728 }
729 }
730 /*
731 * FLOAT output type, order=CL_sRGBA, CL_sRGB, CL_sRGBx, CL_BGRA
732 */
733 else if(is_sRGBA_order(imageInfo->format->image_channel_order) && (outputType == kFloat) )
734 {
735 // Validate float results
736 float *resultPtr = (float *)(char *)resultValues;
737 float expected[4], error=0.0f;
738 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
739
740 for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ )
741 {
742 for( size_t y = 0; y < height_lod; y++ )
743 {
744 for( size_t x = 0; x < width_lod; x++, j++ )
745 {
746 // Step 1: go through and see if the results verify for the pixel
747 // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
748 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
749 int checkOnlyOnePixel = 0;
750 int found_pixel = 0;
751 float offset = NORM_OFFSET;
752 if (!imageSampler->normalized_coords
753 || imageSampler->filter_mode != CL_FILTER_NEAREST
754 || NORM_OFFSET == 0
755 #if defined( __APPLE__ )
756 // Apple requires its CPU implementation to do
757 // correctly rounded address arithmetic in all modes
758 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
759 #endif
760 )
761 offset = 0.0f; // Loop only once
762
763 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
764 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) {
765 for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) {
766
767 int hasDenormals = 0;
768 FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
769 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
770 norm_offset_x, norm_offset_y, norm_offset_z,
771 imageSampler, expected, 0, &hasDenormals, lod );
772
773 float err1 =
774 ABS_ERROR(sRGBmap(resultPtr[0]),
775 sRGBmap(expected[0]));
776 float err2 =
777 ABS_ERROR(sRGBmap(resultPtr[1]),
778 sRGBmap(expected[1]));
779 float err3 =
780 ABS_ERROR(sRGBmap(resultPtr[2]),
781 sRGBmap(expected[2]));
782 float err4 =
783 ABS_ERROR(resultPtr[3], expected[3]);
784 float maxErr = 0.5;
785
786 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
787 {
788 // Try flushing the denormals
789 if( hasDenormals )
790 {
791 // If implementation decide to flush subnormals to zero,
792 // max error needs to be adjusted
793 maxErr += 4 * FLT_MIN;
794
795 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
796 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
797 norm_offset_x, norm_offset_y, norm_offset_z,
798 imageSampler, expected, 0, NULL, lod );
799
800 err1 =
801 ABS_ERROR(sRGBmap(resultPtr[0]),
802 sRGBmap(expected[0]));
803 err2 =
804 ABS_ERROR(sRGBmap(resultPtr[1]),
805 sRGBmap(expected[1]));
806 err3 =
807 ABS_ERROR(sRGBmap(resultPtr[2]),
808 sRGBmap(expected[2]));
809 err4 = ABS_ERROR(resultPtr[3],
810 expected[3]);
811 }
812 }
813
814 found_pixel = (err1 <= maxErr) && (err2 <= maxErr) && (err3 <= maxErr) && (err4 <= maxErr);
815 }//norm_offset_z
816 }//norm_offset_y
817 }//norm_offset_x
818
819 // Step 2: If we did not find a match, then print out debugging info.
820 if (!found_pixel) {
821 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
822 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
823 checkOnlyOnePixel = 0;
824 int shouldReturn = 0;
825 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
826 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
827 for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
828
829 int hasDenormals = 0;
830 FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
831 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
832 norm_offset_x, norm_offset_y, norm_offset_z,
833 imageSampler, expected, 0, &hasDenormals, lod );
834
835 float err1 =
836 ABS_ERROR(sRGBmap(resultPtr[0]),
837 sRGBmap(expected[0]));
838 float err2 =
839 ABS_ERROR(sRGBmap(resultPtr[1]),
840 sRGBmap(expected[1]));
841 float err3 =
842 ABS_ERROR(sRGBmap(resultPtr[2]),
843 sRGBmap(expected[2]));
844 float err4 = ABS_ERROR(resultPtr[3],
845 expected[3]);
846 float maxErr = 0.6;
847
848 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
849 {
850 // Try flushing the denormals
851 if( hasDenormals )
852 {
853 // If implementation decide to flush subnormals to zero,
854 // max error needs to be adjusted
855 maxErr += 4 * FLT_MIN;
856
857 maxPixel = sample_image_pixel_float( imagePtr, imageInfo,
858 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
859 imageSampler, expected, 0, NULL, lod );
860
861 err1 = ABS_ERROR(
862 sRGBmap(resultPtr[0]),
863 sRGBmap(expected[0]));
864 err2 = ABS_ERROR(
865 sRGBmap(resultPtr[1]),
866 sRGBmap(expected[1]));
867 err3 = ABS_ERROR(
868 sRGBmap(resultPtr[2]),
869 sRGBmap(expected[2]));
870 err4 = ABS_ERROR(resultPtr[3],
871 expected[3]);
872 }
873 }
874
875 if( ! (err1 <= maxErr) || ! (err2 <= maxErr) || ! (err3 <= maxErr) || ! (err4 <= maxErr) )
876 {
877 log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
878
879 float tempOut[4];
880 shouldReturn |= determine_validation_error_offset_2D_array<float>( imagePtr, imageInfo, imageSampler, resultPtr,
881 expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
882 norm_offset_x, norm_offset_y, norm_offset_z, j,
883 numTries, numClamped, true, lod );
884 log_error( "Step by step:\n" );
885 FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
886 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
887 norm_offset_x, norm_offset_y, norm_offset_z,
888 imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod);
889 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n",
890 Ulp_Error( resultPtr[0], expected[0] ),
891 Ulp_Error( resultPtr[1], expected[1] ),
892 Ulp_Error( resultPtr[2], expected[2] ),
893 Ulp_Error( resultPtr[3], expected[3] ),
894 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
895 } else {
896 log_error("Test error: we should have detected this passing above.\n");
897 }
898 }//norm_offset_z
899 }//norm_offset_y
900 }//norm_offset_x
901 if( shouldReturn )
902 return 1;
903 } // if (!found_pixel)
904
905 resultPtr += 4;
906 }
907 }
908 }
909 }
910 /*
911 * FLOAT output type
912 */
913 else if( outputType == kFloat )
914 {
915 // Validate float results
916 float *resultPtr = (float *)(char *)resultValues;
917 float expected[4], error=0.0f;
918 float maxErr = get_max_relative_error( imageInfo->format, imageSampler, 1 /*3D*/, CL_FILTER_LINEAR == imageSampler->filter_mode );
919
920 for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ )
921 {
922 for( size_t y = 0; y < height_lod; y++ )
923 {
924 for( size_t x = 0; x < width_lod; x++, j++ )
925 {
926 // Step 1: go through and see if the results verify for the pixel
927 // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
928 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
929 int checkOnlyOnePixel = 0;
930 int found_pixel = 0;
931 float offset = NORM_OFFSET;
932 if (!imageSampler->normalized_coords
933 || imageSampler->filter_mode != CL_FILTER_NEAREST
934 || NORM_OFFSET == 0
935 #if defined( __APPLE__ )
936 // Apple requires its CPU implementation to do
937 // correctly rounded address arithmetic in all modes
938 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
939 #endif
940 )
941 offset = 0.0f; // Loop only once
942
943 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
944 for (float norm_offset_y = -offset; norm_offset_y <= offset && !found_pixel ; norm_offset_y += NORM_OFFSET) {
945 for (float norm_offset_z = -offset; norm_offset_z <= NORM_OFFSET && !found_pixel; norm_offset_z += NORM_OFFSET) {
946
947 int hasDenormals = 0;
948 FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
949 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
950 norm_offset_x, norm_offset_y, norm_offset_z,
951 imageSampler, expected, 0, &hasDenormals, lod );
952
953 float err1 =
954 ABS_ERROR(resultPtr[0], expected[0]);
955 float err2 =
956 ABS_ERROR(resultPtr[1], expected[1]);
957 float err3 =
958 ABS_ERROR(resultPtr[2], expected[2]);
959 float err4 =
960 ABS_ERROR(resultPtr[3], expected[3]);
961 // Clamp to the minimum absolute error for the format
962 if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
963 if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
964 if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
965 if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
966 float maxErr1 = std::max(
967 maxErr * maxPixel.p[0], FLT_MIN);
968 float maxErr2 = std::max(
969 maxErr * maxPixel.p[1], FLT_MIN);
970 float maxErr3 = std::max(
971 maxErr * maxPixel.p[2], FLT_MIN);
972 float maxErr4 = std::max(
973 maxErr * maxPixel.p[3], FLT_MIN);
974
975 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
976 {
977 // Try flushing the denormals
978 if( hasDenormals )
979 {
980 // If implementation decide to flush subnormals to zero,
981 // max error needs to be adjusted
982 maxErr1 += 4 * FLT_MIN;
983 maxErr2 += 4 * FLT_MIN;
984 maxErr3 += 4 * FLT_MIN;
985 maxErr4 += 4 * FLT_MIN;
986
987 maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
988 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
989 norm_offset_x, norm_offset_y, norm_offset_z,
990 imageSampler, expected, 0, NULL, lod );
991
992 err1 = ABS_ERROR(resultPtr[0],
993 expected[0]);
994 err2 = ABS_ERROR(resultPtr[1],
995 expected[1]);
996 err3 = ABS_ERROR(resultPtr[2],
997 expected[2]);
998 err4 = ABS_ERROR(resultPtr[3],
999 expected[3]);
1000 }
1001 }
1002
1003 found_pixel = (err1 <= maxErr1) && (err2 <= maxErr2) && (err3 <= maxErr3) && (err4 <= maxErr4);
1004 }//norm_offset_z
1005 }//norm_offset_y
1006 }//norm_offset_x
1007
1008 // Step 2: If we did not find a match, then print out debugging info.
1009 if (!found_pixel) {
1010 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
1011 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1012 checkOnlyOnePixel = 0;
1013 int shouldReturn = 0;
1014 for (float norm_offset_x = -offset; norm_offset_x <= offset && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
1015 for (float norm_offset_y = -offset; norm_offset_y <= offset && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
1016 for (float norm_offset_z = -offset; norm_offset_z <= offset && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
1017
1018 int hasDenormals = 0;
1019 FloatPixel maxPixel = sample_image_pixel_float_offset( imagePtr, imageInfo,
1020 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
1021 norm_offset_x, norm_offset_y, norm_offset_z,
1022 imageSampler, expected, 0, &hasDenormals, lod );
1023
1024 float err1 = ABS_ERROR(resultPtr[0],
1025 expected[0]);
1026 float err2 = ABS_ERROR(resultPtr[1],
1027 expected[1]);
1028 float err3 = ABS_ERROR(resultPtr[2],
1029 expected[2]);
1030 float err4 = ABS_ERROR(resultPtr[3],
1031 expected[3]);
1032 float maxErr1 = std::max(
1033 maxErr * maxPixel.p[0], FLT_MIN);
1034 float maxErr2 = std::max(
1035 maxErr * maxPixel.p[1], FLT_MIN);
1036 float maxErr3 = std::max(
1037 maxErr * maxPixel.p[2], FLT_MIN);
1038 float maxErr4 = std::max(
1039 maxErr * maxPixel.p[3], FLT_MIN);
1040
1041
1042 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
1043 {
1044 // Try flushing the denormals
1045 if( hasDenormals )
1046 {
1047 maxErr1 += 4 * FLT_MIN;
1048 maxErr2 += 4 * FLT_MIN;
1049 maxErr3 += 4 * FLT_MIN;
1050 maxErr4 += 4 * FLT_MIN;
1051
1052 maxPixel = sample_image_pixel_float( imagePtr, imageInfo,
1053 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
1054 imageSampler, expected, 0, NULL, lod );
1055
1056 err1 = ABS_ERROR(resultPtr[0],
1057 expected[0]);
1058 err2 = ABS_ERROR(resultPtr[1],
1059 expected[1]);
1060 err3 = ABS_ERROR(resultPtr[2],
1061 expected[2]);
1062 err4 = ABS_ERROR(resultPtr[3],
1063 expected[3]);
1064 }
1065 }
1066
1067 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2) || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
1068 {
1069 log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
1070
1071 float tempOut[4];
1072 shouldReturn |= determine_validation_error_offset_2D_array<float>( imagePtr, imageInfo, imageSampler, resultPtr,
1073 expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
1074 norm_offset_x, norm_offset_y, norm_offset_z, j,
1075 numTries, numClamped, true, lod );
1076 log_error( "Step by step:\n" );
1077 FloatPixel temp = sample_image_pixel_float_offset( imagePtr, imageInfo,
1078 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
1079 norm_offset_x, norm_offset_y, norm_offset_z,
1080 imageSampler, tempOut, 1 /*verbose*/, &hasDenormals, lod);
1081 log_error( "\tulps: %2.2f, %2.2f, %2.2f, %2.2f (max allowed: %2.2f)\n\n",
1082 Ulp_Error( resultPtr[0], expected[0] ),
1083 Ulp_Error( resultPtr[1], expected[1] ),
1084 Ulp_Error( resultPtr[2], expected[2] ),
1085 Ulp_Error( resultPtr[3], expected[3] ),
1086 Ulp_Error( MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) + maxErr, MAKE_HEX_FLOAT(0x1.000002p0f, 0x1000002L, -24) ) );
1087 } else {
1088 log_error("Test error: we should have detected this passing above.\n");
1089 }
1090 }//norm_offset_z
1091 }//norm_offset_y
1092 }//norm_offset_x
1093 if( shouldReturn )
1094 return 1;
1095 } // if (!found_pixel)
1096
1097 resultPtr += 4;
1098 }
1099 }
1100 }
1101 }
1102 /*
1103 * UINT output type
1104 */
1105 else if( outputType == kUInt )
1106 {
1107 // Validate unsigned integer results
1108 unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
1109 unsigned int expected[4];
1110 float error;
1111 for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ )
1112 {
1113 for( size_t y = 0; y < height_lod; y++ )
1114 {
1115 for( size_t x = 0; x < width_lod; x++, j++ )
1116 {
1117 // Step 1: go through and see if the results verify for the pixel
1118 // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
1119 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1120 int checkOnlyOnePixel = 0;
1121 int found_pixel = 0;
1122 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
1123 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
1124 for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
1125
1126 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
1127 // E.g., test one pixel.
1128 if (!imageSampler->normalized_coords
1129 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
1130 || NORM_OFFSET == 0)
1131 {
1132 norm_offset_x = 0.0f;
1133 norm_offset_y = 0.0f;
1134 norm_offset_z = 0.0f;
1135 checkOnlyOnePixel = 1;
1136 }
1137
1138 if(gTestMipmaps)
1139 sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
1140 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
1141 norm_offset_x, norm_offset_y, norm_offset_z,
1142 imageSampler, expected, lod );
1143 else
1144 sample_image_pixel_offset<unsigned int>( imageValues, imageInfo,
1145 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
1146 norm_offset_x, norm_offset_y, norm_offset_z,
1147 imageSampler, expected );
1148
1149 error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
1150 errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
1151
1152 if (error < MAX_ERR)
1153 found_pixel = 1;
1154 }//norm_offset_z
1155 }//norm_offset_y
1156 }//norm_offset_x
1157
1158 // Step 2: If we did not find a match, then print out debugging info.
1159 if (!found_pixel) {
1160 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
1161 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1162 checkOnlyOnePixel = 0;
1163 int shouldReturn = 0;
1164 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
1165 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
1166 for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
1167
1168 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
1169 // E.g., test one pixel.
1170 if (!imageSampler->normalized_coords
1171 || !(gDeviceType
1172 & CL_DEVICE_TYPE_GPU)
1173 || NORM_OFFSET == 0)
1174 {
1175 norm_offset_x = 0.0f;
1176 norm_offset_y = 0.0f;
1177 norm_offset_z = 0.0f;
1178 checkOnlyOnePixel = 1;
1179 }
1180
1181 if(gTestMipmaps)
1182 sample_image_pixel_offset<unsigned int>( imagePtr, imageInfo,
1183 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
1184 norm_offset_x, norm_offset_y, norm_offset_z,
1185 imageSampler, expected, lod );
1186 else
1187 sample_image_pixel_offset<unsigned int>( imageValues, imageInfo,
1188 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
1189 norm_offset_x, norm_offset_y, norm_offset_z,
1190 imageSampler, expected );
1191
1192 error = errMax( errMax( abs_diff_uint(expected[ 0 ], resultPtr[ 0 ]), abs_diff_uint(expected[ 1 ], resultPtr[ 1 ]) ),
1193 errMax( abs_diff_uint(expected[ 2 ], resultPtr[ 2 ]), abs_diff_uint(expected[ 3 ], resultPtr[ 3 ]) ) );
1194
1195 if( error > MAX_ERR )
1196 {
1197 log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
1198 shouldReturn |= determine_validation_error_offset_2D_array<unsigned int>( imagePtr, imageInfo, imageSampler, resultPtr,
1199 expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
1200 norm_offset_x, norm_offset_y, norm_offset_z,
1201 j, numTries, numClamped, false, lod );
1202 } else {
1203 log_error("Test error: we should have detected this passing above.\n");
1204 }
1205 }//norm_offset_z
1206 }//norm_offset_y
1207 }//norm_offset_x
1208 if( shouldReturn )
1209 return 1;
1210 } // if (!found_pixel)
1211
1212 resultPtr += 4;
1213 }
1214 }
1215 }
1216 }
1217 else
1218 /*
1219 * INT output type
1220 */
1221 {
1222 // Validate integer results
1223 int *resultPtr = (int *)(char *)resultValues;
1224 int expected[4];
1225 float error;
1226 for( size_t z = 0, j = 0; z < imageInfo->arraySize; z++ )
1227 {
1228 for( size_t y = 0; y < height_lod; y++ )
1229 {
1230 for( size_t x = 0; x < width_lod; x++, j++ )
1231 {
1232 // Step 1: go through and see if the results verify for the pixel
1233 // For the normalized case on a GPU we put in offsets to the X, Y and Z to see if we land on the
1234 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1235 int checkOnlyOnePixel = 0;
1236 int found_pixel = 0;
1237 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
1238 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
1239 for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !found_pixel && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
1240
1241 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
1242 // E.g., test one pixel.
1243 if (!imageSampler->normalized_coords
1244 || !(gDeviceType & CL_DEVICE_TYPE_GPU)
1245 || NORM_OFFSET == 0)
1246 {
1247 norm_offset_x = 0.0f;
1248 norm_offset_y = 0.0f;
1249 norm_offset_z = 0.0f;
1250 checkOnlyOnePixel = 1;
1251 }
1252
1253 if(gTestMipmaps)
1254 sample_image_pixel_offset<int>( imagePtr, imageInfo,
1255 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
1256 norm_offset_x, norm_offset_y, norm_offset_z,
1257 imageSampler, expected, lod );
1258 else
1259 sample_image_pixel_offset<int>( imageValues, imageInfo,
1260 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
1261 norm_offset_x, norm_offset_y, norm_offset_z,
1262 imageSampler, expected );
1263
1264 error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
1265 errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
1266
1267 if (error < MAX_ERR)
1268 found_pixel = 1;
1269 }//norm_offset_z
1270 }//norm_offset_y
1271 }//norm_offset_x
1272
1273 // Step 2: If we did not find a match, then print out debugging info.
1274 if (!found_pixel) {
1275 // For the normalized case on a GPU we put in offsets to the X and Y to see if we land on the
1276 // right pixel. This addresses the significant inaccuracy in GPU normalization in OpenCL 1.0.
1277 checkOnlyOnePixel = 0;
1278 int shouldReturn = 0;
1279 for (float norm_offset_x = -NORM_OFFSET; norm_offset_x <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_x += NORM_OFFSET) {
1280 for (float norm_offset_y = -NORM_OFFSET; norm_offset_y <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_y += NORM_OFFSET) {
1281 for (float norm_offset_z = -NORM_OFFSET; norm_offset_z <= NORM_OFFSET && !checkOnlyOnePixel; norm_offset_z += NORM_OFFSET) {
1282
1283 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
1284 // E.g., test one pixel.
1285 if (!imageSampler->normalized_coords
1286 || !(gDeviceType
1287 & CL_DEVICE_TYPE_GPU)
1288 || NORM_OFFSET == 0)
1289 {
1290 norm_offset_x = 0.0f;
1291 norm_offset_y = 0.0f;
1292 norm_offset_z = 0.0f;
1293 checkOnlyOnePixel = 1;
1294 }
1295
1296 if(gTestMipmaps)
1297 sample_image_pixel_offset<int>( imagePtr, imageInfo,
1298 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
1299 norm_offset_x, norm_offset_y, norm_offset_z,
1300 imageSampler, expected, lod );
1301 else
1302 sample_image_pixel_offset<int>( imageValues, imageInfo,
1303 xOffsetValues[ j ], yOffsetValues[ j ], zOffsetValues[ j ],
1304 norm_offset_x, norm_offset_y, norm_offset_z,
1305 imageSampler, expected, 0 );
1306
1307 error = errMax( errMax( abs_diff_int(expected[ 0 ], resultPtr[ 0 ]), abs_diff_int(expected[ 1 ], resultPtr[ 1 ]) ),
1308 errMax( abs_diff_int(expected[ 2 ], resultPtr[ 2 ]), abs_diff_int(expected[ 3 ], resultPtr[ 3 ]) ) );
1309
1310 if( error > MAX_ERR )
1311 {
1312 log_error("FAILED norm_offsets: %g , %g , %g:\n", norm_offset_x, norm_offset_y, norm_offset_z);
1313 shouldReturn |= determine_validation_error_offset_2D_array<int>( imagePtr, imageInfo, imageSampler, resultPtr,
1314 expected, error, xOffsetValues[j], yOffsetValues[j], zOffsetValues[j],
1315 norm_offset_x, norm_offset_y, norm_offset_z,
1316 j, numTries, numClamped, false, lod );
1317 } else {
1318 log_error("Test error: we should have detected this passing above.\n");
1319 }
1320 }//norm_offset_z
1321 }//norm_offset_y
1322 }//norm_offset_x
1323 if( shouldReturn )
1324 return 1;
1325 } // if (!found_pixel)
1326
1327 resultPtr += 4;
1328 }
1329 }
1330 }
1331 }
1332 }
1333 {
1334 nextLevelOffset += width_lod * height_lod * imageInfo->arraySize * get_pixel_size(imageInfo->format);
1335 width_lod = ( width_lod >> 1 ) ? ( width_lod >> 1 ) : 1;
1336 height_lod = ( height_lod >> 1 ) ? ( height_lod >> 1 ) : 1;
1337 }
1338 }
1339
1340 return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
1341 }
1342
test_read_image_set_2D_array(cl_device_id device,cl_context context,cl_command_queue queue,const cl_image_format * format,image_sampler_data * imageSampler,bool floatCoords,ExplicitType outputType)1343 int test_read_image_set_2D_array(cl_device_id device, cl_context context,
1344 cl_command_queue queue,
1345 const cl_image_format *format,
1346 image_sampler_data *imageSampler,
1347 bool floatCoords, ExplicitType outputType)
1348 {
1349 char programSrc[10240];
1350 const char *ptr;
1351 const char *readFormat;
1352 RandomSeed seed( gRandomSeed );
1353
1354 const char *KernelSourcePattern = NULL;
1355
1356 int error;
1357
1358 clProgramWrapper program;
1359 clKernelWrapper kernel;
1360
1361 // Get operating parameters
1362 size_t maxWidth, maxHeight, maxArraySize;
1363 cl_ulong maxAllocSize, memSize;
1364 image_descriptor imageInfo = { 0x0 };
1365 size_t pixelSize;
1366
1367 imageInfo.format = format;
1368 imageInfo.type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
1369 pixelSize = get_pixel_size( imageInfo.format );
1370
1371 error = clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
1372 error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
1373 error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxArraySize ), &maxArraySize, NULL );
1374 error |= clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
1375 error |= clGetDeviceInfo( device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
1376 test_error( error, "Unable to get max image 3D size from device" );
1377
1378 if (memSize > (cl_ulong)SIZE_MAX) {
1379 memSize = (cl_ulong)SIZE_MAX;
1380 }
1381
1382 // Determine types
1383 if( outputType == kInt )
1384 readFormat = "i";
1385 else if( outputType == kUInt )
1386 readFormat = "ui";
1387 else // kFloat
1388 readFormat = "f";
1389
1390 // Construct the source
1391 const char *samplerArg = samplerKernelArg;
1392 char samplerVar[ 1024 ] = "";
1393 if( gUseKernelSamplers )
1394 {
1395 get_sampler_kernel_code( imageSampler, samplerVar );
1396 samplerArg = "";
1397 }
1398
1399 const char *imageType;
1400 const char *imageElement;
1401 if (format->image_channel_order == CL_DEPTH)
1402 {
1403 imageType = "image2d_array_depth_t";
1404 imageElement = "";
1405 }
1406 else
1407 {
1408 imageType = "image2d_array_t";
1409 imageElement = "4";
1410 }
1411
1412 // Construct the source
1413 if(gtestTypesToRun & kReadTests)
1414 {
1415 KernelSourcePattern = read2DArrayKernelSourcePattern;
1416 }
1417 else
1418 {
1419 KernelSourcePattern = read_write2DArrayKernelSourcePattern;
1420 }
1421
1422 // Construct the source
1423 sprintf(programSrc, KernelSourcePattern,
1424 gTestMipmaps
1425 ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
1426 : "",
1427 imageType, samplerArg, get_explicit_type_name(outputType),
1428 imageElement, gTestMipmaps ? ", float lod" : " ", samplerVar,
1429 gTestMipmaps ? offset2DarraySourceLod : offset2DarraySource,
1430 floatCoords ? float2DArrayUnnormalizedCoordKernelSource
1431 : int2DArrayCoordKernelSource,
1432 readFormat, gTestMipmaps ? ", lod" : " ");
1433
1434 ptr = programSrc;
1435 error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
1436 "sample_kernel");
1437 test_error( error, "Unable to create testing kernel" );
1438
1439 // Run tests
1440
1441 if( gTestSmallImages )
1442 {
1443 for( imageInfo.width = 1; imageInfo.width < 13; imageInfo.width++ )
1444 {
1445 imageInfo.rowPitch = imageInfo.width * pixelSize;
1446
1447 for( imageInfo.height = 1; imageInfo.height < 9; imageInfo.height++ )
1448 {
1449 imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
1450 for( imageInfo.arraySize = 2; imageInfo.arraySize < 9; imageInfo.arraySize++ )
1451 {
1452 if( gTestMipmaps )
1453 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1454
1455 if( gDebugTrace )
1456 log_info( " at size %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize );
1457 int retCode = test_read_image_2D_array( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1458 if( retCode )
1459 return retCode;
1460 }
1461 }
1462 }
1463 }
1464 else if( gTestMaxImages )
1465 {
1466 // Try a specific set of maximum sizes
1467 size_t numbeOfSizes;
1468 size_t sizes[100][3];
1469
1470 get_max_sizes(&numbeOfSizes, 100, sizes, maxWidth, maxHeight, 1, maxArraySize, maxAllocSize, memSize, CL_MEM_OBJECT_IMAGE2D_ARRAY, imageInfo.format, CL_TRUE);
1471
1472 for( size_t idx = 0; idx < numbeOfSizes; idx++ )
1473 {
1474 imageInfo.width = sizes[ idx ][ 0 ];
1475 imageInfo.height = sizes[ idx ][ 1 ];
1476 imageInfo.arraySize = sizes[ idx ][ 2 ];
1477 imageInfo.rowPitch = imageInfo.width * pixelSize;
1478 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
1479 if( gTestMipmaps )
1480 imageInfo.num_mip_levels = (size_t) random_in_range(2, compute_max_mip_levels(imageInfo.width, imageInfo.height, 0)-1, seed);
1481 cl_ulong size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
1482 // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
1483 // image, the result array, plus offset arrays, will fit in the global ram space
1484 while( size > maxAllocSize || ( size * 3 ) > memSize )
1485 {
1486 if(imageInfo.arraySize == 1)
1487 {
1488 // ArraySize cannot be 0.
1489 break;
1490 }
1491 imageInfo.arraySize--;
1492 size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
1493 }
1494
1495 while( size > maxAllocSize || ( size * 3 ) > memSize )
1496 {
1497 imageInfo.height--;
1498 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
1499 size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
1500 }
1501 log_info("Testing %d x %d x %d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ]);
1502 if( gDebugTrace )
1503 log_info( " at max size %d,%d,%d\n", (int)sizes[ idx ][ 0 ], (int)sizes[ idx ][ 1 ], (int)sizes[ idx ][ 2 ] );
1504 int retCode = test_read_image_2D_array( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1505 if( retCode )
1506 return retCode;
1507 }
1508 }
1509 else if( gTestRounding )
1510 {
1511 size_t typeRange = 1 << ( get_format_type_size( imageInfo.format ) * 8 );
1512 imageInfo.height = typeRange / 256;
1513 imageInfo.width = (size_t)( typeRange / (cl_ulong)imageInfo.height );
1514 imageInfo.arraySize = 2;
1515
1516 imageInfo.rowPitch = imageInfo.width * pixelSize;
1517 imageInfo.slicePitch = imageInfo.height * imageInfo.rowPitch;
1518 int retCode = test_read_image_2D_array( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1519 if( retCode )
1520 return retCode;
1521 }
1522 else
1523 {
1524 int maxWidthRange = (int) reduceImageSizeRange(maxWidth);
1525 int maxHeighthRange = (int) reduceImageSizeRange(maxHeight);
1526 int maxArraySizeRange = (int) reduceImageDepth(maxArraySize);
1527
1528 for( int i = 0; i < NUM_IMAGE_ITERATIONS; i++ )
1529 {
1530 cl_ulong size;
1531 // Loop until we get a size that a) will fit in the max alloc size and b) that an allocation of that
1532 // image, the result array, plus offset arrays, will fit in the global ram space
1533 do
1534 {
1535 imageInfo.width = (size_t)random_log_in_range( 16, maxWidthRange, seed );
1536 imageInfo.height = (size_t)random_log_in_range( 16, maxHeighthRange, seed );
1537 imageInfo.arraySize = (size_t)random_log_in_range( 8, maxArraySizeRange, seed );
1538
1539 imageInfo.rowPitch = imageInfo.width * pixelSize;
1540 imageInfo.slicePitch = imageInfo.rowPitch * imageInfo.height;
1541
1542 if( gTestMipmaps )
1543 {
1544 imageInfo.num_mip_levels = random_in_range(2,compute_max_mip_levels(imageInfo.width, imageInfo.height, 0) - 1, seed);
1545 //Need to take into account the output buffer size, otherwise we will end up with input buffer that is exceeding MaxAlloc
1546 size = (cl_ulong) 4*compute_mipmapped_image_size( imageInfo ) * get_explicit_type_size( outputType );
1547 }
1548 else
1549 {
1550 if( gEnablePitch )
1551 {
1552 size_t extraWidth = (int)random_log_in_range( 0, 64, seed );
1553 imageInfo.rowPitch += extraWidth * pixelSize;
1554
1555 size_t extraHeight = (int)random_log_in_range( 0, 64, seed );
1556 imageInfo.slicePitch = imageInfo.rowPitch * (imageInfo.height + extraHeight);
1557 }
1558
1559 size = (cl_ulong)imageInfo.slicePitch * (cl_ulong)imageInfo.arraySize * 4 * 4;
1560 }
1561 } while( size > maxAllocSize || ( size * 3 ) > memSize );
1562
1563 if( gDebugTrace )
1564 {
1565 log_info( " at size %d,%d,%d (pitch %d,%d) out of %d,%d,%d\n", (int)imageInfo.width, (int)imageInfo.height, (int)imageInfo.arraySize, (int)imageInfo.rowPitch, (int)imageInfo.slicePitch, (int)maxWidth, (int)maxHeight, (int)maxArraySize );
1566 if ( gTestMipmaps )
1567 log_info(" and %d mip levels\n", (int) imageInfo.num_mip_levels);
1568 }
1569 int retCode = test_read_image_2D_array( context, queue, kernel, &imageInfo, imageSampler, floatCoords, outputType, seed );
1570 if( retCode )
1571 return retCode;
1572 }
1573 }
1574
1575 return 0;
1576 }
1577