• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "test_common.h"
18 
19 
create_sampler(cl_context context,image_sampler_data * sdata,bool test_mipmaps,cl_int * error)20 cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error) {
21     cl_sampler sampler = nullptr;
22     if (test_mipmaps) {
23         cl_sampler_properties properties[] = {
24             CL_SAMPLER_NORMALIZED_COORDS, sdata->normalized_coords,
25             CL_SAMPLER_ADDRESSING_MODE, sdata->addressing_mode,
26             CL_SAMPLER_FILTER_MODE, sdata->filter_mode,
27             CL_SAMPLER_MIP_FILTER_MODE, sdata->filter_mode,
28             0};
29         sampler = clCreateSamplerWithProperties(context, properties, error);
30     } else {
31         sampler = clCreateSampler(context, sdata->normalized_coords, sdata->addressing_mode, sdata->filter_mode, error);
32     }
33     return sampler;
34 }
35 
InitFloatCoordsCommon(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float * zOffsets,float xfract,float yfract,float zfract,int normalized_coords,MTdata d,int lod)36 void InitFloatCoordsCommon(image_descriptor *imageInfo,
37                            image_sampler_data *imageSampler, float *xOffsets,
38                            float *yOffsets, float *zOffsets, float xfract,
39                            float yfract, float zfract, int normalized_coords,
40                            MTdata d, int lod)
41 {
42     size_t i = 0;
43     if (gDisableOffsets)
44     {
45         for (size_t z = 0; z < imageInfo->depth; z++)
46         {
47             for (size_t y = 0; y < imageInfo->height; y++)
48             {
49                 for (size_t x = 0; x < imageInfo->width; x++, i++)
50                 {
51                     xOffsets[i] = (float)(xfract + (double)x);
52                     yOffsets[i] = (float)(yfract + (double)y);
53                     zOffsets[i] = (float)(zfract + (double)z);
54                 }
55             }
56         }
57     }
58     else
59     {
60         for (size_t z = 0; z < imageInfo->depth; z++)
61         {
62             for (size_t y = 0; y < imageInfo->height; y++)
63             {
64                 for (size_t x = 0; x < imageInfo->width; x++, i++)
65                 {
66                     xOffsets[i] =
67                         (float)(xfract
68                                 + (double)((int)x
69                                            + random_in_range(-10, 10, d)));
70                     yOffsets[i] =
71                         (float)(yfract
72                                 + (double)((int)y
73                                            + random_in_range(-10, 10, d)));
74                     zOffsets[i] =
75                         (float)(zfract
76                                 + (double)((int)z
77                                            + random_in_range(-10, 10, d)));
78                 }
79             }
80         }
81     }
82 
83     if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
84     {
85         i = 0;
86         for (size_t z = 0; z < imageInfo->depth; z++)
87         {
88             for (size_t y = 0; y < imageInfo->height; y++)
89             {
90                 for (size_t x = 0; x < imageInfo->width; x++, i++)
91                 {
92                     xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
93                                                (double)imageInfo->width - 1.0);
94                     yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
95                                                (double)imageInfo->height - 1.0);
96                     zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
97                                                (double)imageInfo->depth - 1.0);
98                 }
99             }
100         }
101     }
102 
103     if (normalized_coords || gTestMipmaps)
104     {
105         i = 0;
106         if (lod == 0)
107         {
108             for (size_t z = 0; z < imageInfo->depth; z++)
109             {
110                 for (size_t y = 0; y < imageInfo->height; y++)
111                 {
112                     for (size_t x = 0; x < imageInfo->width; x++, i++)
113                     {
114                         xOffsets[i] = (float)((double)xOffsets[i]
115                                               / (double)imageInfo->width);
116                         yOffsets[i] = (float)((double)yOffsets[i]
117                                               / (double)imageInfo->height);
118                         zOffsets[i] = (float)((double)zOffsets[i]
119                                               / (double)imageInfo->depth);
120                     }
121                 }
122             }
123         }
124         else if (gTestMipmaps)
125         {
126             size_t width_lod, height_lod, depth_lod;
127 
128             width_lod =
129                 (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
130             height_lod =
131                 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
132             depth_lod =
133                 (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
134 
135             for (size_t z = 0; z < depth_lod; z++)
136             {
137                 for (size_t y = 0; y < height_lod; y++)
138                 {
139                     for (size_t x = 0; x < width_lod; x++, i++)
140                     {
141                         xOffsets[i] =
142                             (float)((double)xOffsets[i] / (double)width_lod);
143                         yOffsets[i] =
144                             (float)((double)yOffsets[i] / (double)height_lod);
145                         zOffsets[i] =
146                             (float)((double)zOffsets[i] / (double)depth_lod);
147                     }
148                 }
149             }
150         }
151     }
152 }
153 
test_read_image(cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,image_sampler_data * imageSampler,bool useFloatCoords,ExplicitType outputType,MTdata d)154 int test_read_image(cl_context context, cl_command_queue queue,
155                     cl_kernel kernel, image_descriptor *imageInfo,
156                     image_sampler_data *imageSampler, bool useFloatCoords,
157                     ExplicitType outputType, MTdata d)
158 {
159     int error;
160     size_t threads[3];
161     static int initHalf = 0;
162 
163     cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
164 
165     clMemWrapper xOffsets, yOffsets, zOffsets, results;
166     clSamplerWrapper actualSampler;
167     BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
168 
169     // Create offset data
170     BufferOwningPtr<cl_float> xOffsetValues(
171         malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
172                * imageInfo->depth));
173     BufferOwningPtr<cl_float> yOffsetValues(
174         malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
175                * imageInfo->depth));
176     BufferOwningPtr<cl_float> zOffsetValues(
177         malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
178                * imageInfo->depth));
179 
180     if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
181         if (DetectFloatToHalfRoundingMode(queue)) return 1;
182 
183     BufferOwningPtr<char> imageValues;
184     generate_random_image_data(imageInfo, imageValues, d);
185 
186     // Construct testing sources
187     clProtectedImage protImage;
188     clMemWrapper unprotImage;
189     cl_mem image;
190 
191     if (gtestTypesToRun & kReadTests)
192     {
193         image_read_write_flags = CL_MEM_READ_ONLY;
194     }
195     else
196     {
197         image_read_write_flags = CL_MEM_READ_WRITE;
198     }
199 
200     if (gMemFlagsToUse == CL_MEM_USE_HOST_PTR)
201     {
202         // clProtectedImage uses USE_HOST_PTR, so just rely on that for the
203         // testing (via Ian) Do not use protected images for max image size test
204         // since it rounds the row size to a page size
205         if (gTestMaxImages)
206         {
207             generate_random_image_data(imageInfo,
208                                        maxImageUseHostPtrBackingStore, d);
209             unprotImage = create_image_3d(
210                 context, image_read_write_flags | CL_MEM_USE_HOST_PTR,
211                 imageInfo->format, imageInfo->width, imageInfo->height,
212                 imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
213                 (gEnablePitch ? imageInfo->slicePitch : 0),
214                 maxImageUseHostPtrBackingStore, &error);
215         }
216         else
217         {
218             error = protImage.Create(context, image_read_write_flags,
219                                      imageInfo->format, imageInfo->width,
220                                      imageInfo->height, imageInfo->depth);
221         }
222         if (error != CL_SUCCESS)
223         {
224             log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
225                       "(pitch %d, %d ) (%s)",
226                       (int)imageInfo->width, (int)imageInfo->height,
227                       (int)imageInfo->depth, (int)imageInfo->rowPitch,
228                       (int)imageInfo->slicePitch, IGetErrorString(error));
229             return error;
230         }
231         if (gTestMaxImages)
232             image = (cl_mem)unprotImage;
233         else
234             image = (cl_mem)protImage;
235     }
236     else if (gMemFlagsToUse == CL_MEM_COPY_HOST_PTR)
237     {
238         // Don't use clEnqueueWriteImage; just use copy host ptr to get the data
239         // in
240         unprotImage = create_image_3d(
241             context, image_read_write_flags | CL_MEM_COPY_HOST_PTR,
242             imageInfo->format, imageInfo->width, imageInfo->height,
243             imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
244             (gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error);
245         if (error != CL_SUCCESS)
246         {
247             log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
248                       "(pitch %d, %d ) (%s)",
249                       (int)imageInfo->width, (int)imageInfo->height,
250                       (int)imageInfo->depth, (int)imageInfo->rowPitch,
251                       (int)imageInfo->slicePitch, IGetErrorString(error));
252             return error;
253         }
254         image = unprotImage;
255     }
256     else // Either CL_MEM_ALLOC_HOST_PTR or none
257     {
258         // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can
259         // be accessed by the host, but otherwise it works just as if no flag is
260         // specified, so we just do the same thing either way
261         if (!gTestMipmaps)
262         {
263             unprotImage = create_image_3d(
264                 context, image_read_write_flags | gMemFlagsToUse,
265                 imageInfo->format, imageInfo->width, imageInfo->height,
266                 imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
267                 (gEnablePitch ? imageInfo->slicePitch : 0), imageValues,
268                 &error);
269             if (error != CL_SUCCESS)
270             {
271                 log_error("ERROR: Unable to create 3D image of size %d x %d x "
272                           "%d (pitch %d, %d ) (%s)",
273                           (int)imageInfo->width, (int)imageInfo->height,
274                           (int)imageInfo->depth, (int)imageInfo->rowPitch,
275                           (int)imageInfo->slicePitch, IGetErrorString(error));
276                 return error;
277             }
278             image = unprotImage;
279         }
280         else
281         {
282             cl_image_desc image_desc = { 0 };
283             image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
284             image_desc.image_width = imageInfo->width;
285             image_desc.image_height = imageInfo->height;
286             image_desc.image_depth = imageInfo->depth;
287             image_desc.num_mip_levels = imageInfo->num_mip_levels;
288 
289 
290             unprotImage =
291                 clCreateImage(context, image_read_write_flags,
292                               imageInfo->format, &image_desc, NULL, &error);
293             if (error != CL_SUCCESS)
294             {
295                 log_error("ERROR: Unable to create %d level mipmapped 3D image "
296                           "of size %d x %d x %d (pitch %d, %d ) (%s)",
297                           (int)imageInfo->num_mip_levels, (int)imageInfo->width,
298                           (int)imageInfo->height, (int)imageInfo->depth,
299                           (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
300                           IGetErrorString(error));
301                 return error;
302             }
303             image = unprotImage;
304         }
305     }
306 
307     if (gMemFlagsToUse != CL_MEM_COPY_HOST_PTR)
308     {
309         size_t origin[4] = { 0, 0, 0, 0 };
310         size_t region[3] = { imageInfo->width, imageInfo->height,
311                              imageInfo->depth };
312 
313         if (gDebugTrace) log_info(" - Writing image...\n");
314 
315         if (!gTestMipmaps)
316         {
317 
318             error =
319                 clEnqueueWriteImage(queue, image, CL_TRUE, origin, region,
320                                     gEnablePitch ? imageInfo->rowPitch : 0,
321                                     gEnablePitch ? imageInfo->slicePitch : 0,
322                                     imageValues, 0, NULL, NULL);
323 
324             if (error != CL_SUCCESS)
325             {
326                 log_error("ERROR: Unable to write to 3D image of size %d x %d "
327                           "x %d \n",
328                           (int)imageInfo->width, (int)imageInfo->height,
329                           (int)imageInfo->depth);
330                 return error;
331             }
332         }
333         else
334         {
335             int nextLevelOffset = 0;
336 
337             for (int i = 0; i < imageInfo->num_mip_levels; i++)
338             {
339                 origin[3] = i;
340                 error = clEnqueueWriteImage(
341                     queue, image, CL_TRUE, origin, region,
342                     /*gEnablePitch ? imageInfo->rowPitch :*/ 0,
343                     /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
344                     ((char *)imageValues + nextLevelOffset), 0, NULL, NULL);
345                 if (error != CL_SUCCESS)
346                 {
347                     log_error("ERROR: Unable to write to %d level mipmapped 3D "
348                               "image of size %d x %d x %d\n",
349                               (int)imageInfo->num_mip_levels,
350                               (int)imageInfo->width, (int)imageInfo->height,
351                               (int)imageInfo->depth);
352                     return error;
353                 }
354                 nextLevelOffset += region[0] * region[1] * region[2]
355                     * get_pixel_size(imageInfo->format);
356                 // Subsequent mip level dimensions keep halving
357                 region[0] = region[0] >> 1 ? region[0] >> 1 : 1;
358                 region[1] = region[1] >> 1 ? region[1] >> 1 : 1;
359                 region[2] = region[2] >> 1 ? region[2] >> 1 : 1;
360             }
361         }
362     }
363 
364     xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
365                               sizeof(cl_float) * imageInfo->width
366                                   * imageInfo->height * imageInfo->depth,
367                               xOffsetValues, &error);
368     test_error(error, "Unable to create x offset buffer");
369     yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
370                               sizeof(cl_float) * imageInfo->width
371                                   * imageInfo->height * imageInfo->depth,
372                               yOffsetValues, &error);
373     test_error(error, "Unable to create y offset buffer");
374     zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
375                               sizeof(cl_float) * imageInfo->width
376                                   * imageInfo->height * imageInfo->depth,
377                               zOffsetValues, &error);
378     test_error(error, "Unable to create y offset buffer");
379     results =
380         clCreateBuffer(context, CL_MEM_READ_WRITE,
381                        get_explicit_type_size(outputType) * 4 * imageInfo->width
382                            * imageInfo->height * imageInfo->depth,
383                        NULL, &error);
384     test_error(error, "Unable to create result buffer");
385 
386     // Create sampler to use
387     actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
388     test_error(error, "Unable to create image sampler");
389 
390     // Set arguments
391     int idx = 0;
392     error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &image);
393     test_error(error, "Unable to set kernel arguments");
394     if (!gUseKernelSamplers)
395     {
396         error =
397             clSetKernelArg(kernel, idx++, sizeof(cl_sampler), &actualSampler);
398         test_error(error, "Unable to set kernel arguments");
399     }
400     error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &xOffsets);
401     test_error(error, "Unable to set kernel arguments");
402     error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &yOffsets);
403     test_error(error, "Unable to set kernel arguments");
404     error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &zOffsets);
405     test_error(error, "Unable to set kernel arguments");
406     error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &results);
407     test_error(error, "Unable to set kernel arguments");
408 
409     const float float_offsets[] = { 0.0f,
410                                     MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30),
411                                     0.25f,
412                                     0.3f,
413                                     0.5f - FLT_EPSILON / 4.0f,
414                                     0.5f,
415                                     0.9f,
416                                     1.0f - FLT_EPSILON / 2 };
417     int float_offset_count = sizeof(float_offsets) / sizeof(float_offsets[0]);
418     int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
419     int loopCount = 2 * float_offset_count;
420     if (!useFloatCoords) loopCount = 1;
421     if (gTestMaxImages)
422     {
423         loopCount = 1;
424         log_info("Testing each size only once with pixel offsets of %g for max "
425                  "sized images.\n",
426                  float_offsets[0]);
427     }
428 
429     // Get the maximum absolute error for this format
430     double formatAbsoluteError =
431         get_max_absolute_error(imageInfo->format, imageSampler);
432     if (gDebugTrace)
433         log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
434 
435     if (0 == initHalf
436         && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
437     {
438         initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode(queue);
439         if (initHalf)
440         {
441             log_info("Half rounding mode successfully detected.\n");
442         }
443     }
444 
445     int nextLevelOffset = 0;
446     size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
447            depth_lod = imageInfo->depth;
448 
449     // Loop over all mipmap levels, if we are testing mipmapped images.
450     for (int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels)
451          || (!gTestMipmaps && lod < 1);
452          lod++)
453     {
454         size_t resultValuesSize = width_lod * height_lod * depth_lod
455             * get_explicit_type_size(outputType) * 4;
456         BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
457         float lod_float = (float)lod;
458         if (gTestMipmaps)
459         {
460             // Set the lod kernel arg
461             if (gDebugTrace) log_info(" - Working at mip level %d\n", lod);
462             error = clSetKernelArg(kernel, idx, sizeof(float), &lod_float);
463             test_error(error, "Unable to set kernel arguments");
464         }
465 
466         for (int q = 0; q < loopCount; q++)
467         {
468             float offset = float_offsets[q % float_offset_count];
469 
470             // Init the coordinates
471             InitFloatCoordsCommon(imageInfo, imageSampler, xOffsetValues,
472                                   yOffsetValues, zOffsetValues,
473                                   q >= float_offset_count ? -offset : offset,
474                                   q >= float_offset_count ? offset : -offset,
475                                   q >= float_offset_count ? -offset : offset,
476                                   imageSampler->normalized_coords, d, lod);
477 
478             error =
479                 clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
480                                      sizeof(cl_float) * imageInfo->height
481                                          * imageInfo->width * imageInfo->depth,
482                                      xOffsetValues, 0, NULL, NULL);
483             test_error(error, "Unable to write x offsets");
484             error =
485                 clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
486                                      sizeof(cl_float) * imageInfo->height
487                                          * imageInfo->width * imageInfo->depth,
488                                      yOffsetValues, 0, NULL, NULL);
489             test_error(error, "Unable to write y offsets");
490             error =
491                 clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
492                                      sizeof(cl_float) * imageInfo->height
493                                          * imageInfo->width * imageInfo->depth,
494                                      zOffsetValues, 0, NULL, NULL);
495             test_error(error, "Unable to write z offsets");
496 
497 
498             memset(resultValues, 0xff, resultValuesSize);
499             clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, resultValuesSize,
500                                  resultValues, 0, NULL, NULL);
501 
502             // Figure out thread dimensions
503             threads[0] = (size_t)width_lod;
504             threads[1] = (size_t)height_lod;
505             threads[2] = (size_t)depth_lod;
506 
507             // Run the kernel
508             error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads,
509                                            NULL, 0, NULL, NULL);
510             test_error(error, "Unable to run kernel");
511 
512             // Get results
513             error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0,
514                                         width_lod * height_lod * depth_lod
515                                             * get_explicit_type_size(outputType)
516                                             * 4,
517                                         resultValues, 0, NULL, NULL);
518             test_error(error, "Unable to read results from kernel");
519             if (gDebugTrace) log_info("    results read\n");
520 
521             // Validate results element by element
522             char *imagePtr = (char *)imageValues + nextLevelOffset;
523             /*
524              * FLOAT output type
525              */
526             if (is_sRGBA_order(imageInfo->format->image_channel_order)
527                 && (outputType == kFloat))
528             {
529                 // Validate float results
530                 float *resultPtr = (float *)(char *)resultValues;
531                 float expected[4], error = 0.0f;
532                 float maxErr = get_max_relative_error(
533                     imageInfo->format, imageSampler, 1 /*3D*/,
534                     CL_FILTER_LINEAR == imageSampler->filter_mode);
535 
536                 for (size_t z = 0, j = 0; z < depth_lod; z++)
537                 {
538                     for (size_t y = 0; y < height_lod; y++)
539                     {
540                         for (size_t x = 0; x < width_lod; x++, j++)
541                         {
542                             // Step 1: go through and see if the results verify
543                             // for the pixel For the normalized case on a GPU we
544                             // put in offsets to the X, Y and Z to see if we
545                             // land on the right pixel. This addresses the
546                             // significant inaccuracy in GPU normalization in
547                             // OpenCL 1.0.
548                             int checkOnlyOnePixel = 0;
549                             int found_pixel = 0;
550                             float offset = NORM_OFFSET;
551                             if (!imageSampler->normalized_coords
552                                 || imageSampler->filter_mode
553                                     != CL_FILTER_NEAREST
554                                 || NORM_OFFSET == 0
555 #if defined(__APPLE__)
556                                 // Apple requires its CPU implementation to do
557                                 // correctly rounded address arithmetic in all
558                                 // modes
559                                 || gDeviceType != CL_DEVICE_TYPE_GPU
560 #endif
561                             )
562                                 offset = 0.0f; // Loop only once
563 
564                             for (float norm_offset_x = -offset;
565                                  norm_offset_x <= offset && !found_pixel;
566                                  norm_offset_x += NORM_OFFSET)
567                             {
568                                 for (float norm_offset_y = -offset;
569                                      norm_offset_y <= offset && !found_pixel;
570                                      norm_offset_y += NORM_OFFSET)
571                                 {
572                                     for (float norm_offset_z = -offset;
573                                          norm_offset_z <= NORM_OFFSET
574                                          && !found_pixel;
575                                          norm_offset_z += NORM_OFFSET)
576                                     {
577 
578                                         int hasDenormals = 0;
579                                         FloatPixel maxPixel =
580                                             sample_image_pixel_float_offset(
581                                                 imagePtr, imageInfo,
582                                                 xOffsetValues[j],
583                                                 yOffsetValues[j],
584                                                 zOffsetValues[j], norm_offset_x,
585                                                 norm_offset_y, norm_offset_z,
586                                                 imageSampler, expected, 0,
587                                                 &hasDenormals, lod);
588 
589                                         float err1 =
590                                             ABS_ERROR(sRGBmap(resultPtr[0]),
591                                                       sRGBmap(expected[0]));
592                                         float err2 =
593                                             ABS_ERROR(sRGBmap(resultPtr[1]),
594                                                       sRGBmap(expected[1]));
595                                         float err3 =
596                                             ABS_ERROR(sRGBmap(resultPtr[2]),
597                                                       sRGBmap(expected[2]));
598                                         float err4 = ABS_ERROR(resultPtr[3],
599                                                                expected[3]);
600                                         // Clamp to the minimum absolute error
601                                         // for the format
602                                         if (err1 > 0
603                                             && err1 < formatAbsoluteError)
604                                         {
605                                             err1 = 0.0f;
606                                         }
607                                         if (err2 > 0
608                                             && err2 < formatAbsoluteError)
609                                         {
610                                             err2 = 0.0f;
611                                         }
612                                         if (err3 > 0
613                                             && err3 < formatAbsoluteError)
614                                         {
615                                             err3 = 0.0f;
616                                         }
617                                         if (err4 > 0
618                                             && err4 < formatAbsoluteError)
619                                         {
620                                             err4 = 0.0f;
621                                         }
622                                         float maxErr = 0.5;
623 
624                                         if (!(err1 <= maxErr)
625                                             || !(err2 <= maxErr)
626                                             || !(err3 <= maxErr)
627                                             || !(err4 <= maxErr))
628                                         {
629                                             // Try flushing the denormals
630                                             if (hasDenormals)
631                                             {
632                                                 // If implementation decide to
633                                                 // flush subnormals to zero, max
634                                                 // error needs to be adjusted
635                                                 maxErr += 4 * FLT_MIN;
636 
637                                                 maxPixel =
638                                                     sample_image_pixel_float_offset(
639                                                         imagePtr, imageInfo,
640                                                         xOffsetValues[j],
641                                                         yOffsetValues[j],
642                                                         zOffsetValues[j],
643                                                         norm_offset_x,
644                                                         norm_offset_y,
645                                                         norm_offset_z,
646                                                         imageSampler, expected,
647                                                         0, NULL, lod);
648 
649                                                 err1 = ABS_ERROR(
650                                                     sRGBmap(resultPtr[0]),
651                                                     sRGBmap(expected[0]));
652                                                 err2 = ABS_ERROR(
653                                                     sRGBmap(resultPtr[1]),
654                                                     sRGBmap(expected[1]));
655                                                 err3 = ABS_ERROR(
656                                                     sRGBmap(resultPtr[2]),
657                                                     sRGBmap(expected[2]));
658                                                 err4 = ABS_ERROR(resultPtr[3],
659                                                                  expected[3]);
660                                             }
661                                         }
662 
663                                         found_pixel = (err1 <= maxErr)
664                                             && (err2 <= maxErr)
665                                             && (err3 <= maxErr)
666                                             && (err4 <= maxErr);
667                                     } // norm_offset_z
668                                 } // norm_offset_y
669                             } // norm_offset_x
670 
671                             // Step 2: If we did not find a match, then print
672                             // out debugging info.
673                             if (!found_pixel)
674                             {
675                                 // For the normalized case on a GPU we put in
676                                 // offsets to the X and Y to see if we land on
677                                 // the right pixel. This addresses the
678                                 // significant inaccuracy in GPU normalization
679                                 // in OpenCL 1.0.
680                                 checkOnlyOnePixel = 0;
681                                 int shouldReturn = 0;
682                                 for (float norm_offset_x = -offset;
683                                      norm_offset_x <= offset
684                                      && !checkOnlyOnePixel;
685                                      norm_offset_x += NORM_OFFSET)
686                                 {
687                                     for (float norm_offset_y = -offset;
688                                          norm_offset_y <= offset
689                                          && !checkOnlyOnePixel;
690                                          norm_offset_y += NORM_OFFSET)
691                                     {
692                                         for (float norm_offset_z = -offset;
693                                              norm_offset_z <= offset
694                                              && !checkOnlyOnePixel;
695                                              norm_offset_z += NORM_OFFSET)
696                                         {
697 
698                                             int hasDenormals = 0;
699                                             FloatPixel maxPixel =
700                                                 sample_image_pixel_float_offset(
701                                                     imagePtr, imageInfo,
702                                                     xOffsetValues[j],
703                                                     yOffsetValues[j],
704                                                     zOffsetValues[j],
705                                                     norm_offset_x,
706                                                     norm_offset_y,
707                                                     norm_offset_z, imageSampler,
708                                                     expected, 0, &hasDenormals,
709                                                     lod);
710 
711                                             float err1 =
712                                                 ABS_ERROR(sRGBmap(resultPtr[0]),
713                                                           sRGBmap(expected[0]));
714                                             float err2 =
715                                                 ABS_ERROR(sRGBmap(resultPtr[1]),
716                                                           sRGBmap(expected[1]));
717                                             float err3 =
718                                                 ABS_ERROR(sRGBmap(resultPtr[2]),
719                                                           sRGBmap(expected[2]));
720                                             float err4 = ABS_ERROR(resultPtr[3],
721                                                                    expected[3]);
722                                             float maxErr = 0.6;
723 
724                                             if (!(err1 <= maxErr)
725                                                 || !(err2 <= maxErr)
726                                                 || !(err3 <= maxErr)
727                                                 || !(err4 <= maxErr))
728                                             {
729                                                 // Try flushing the denormals
730                                                 if (hasDenormals)
731                                                 {
732                                                     // If implementation decide
733                                                     // to flush subnormals to
734                                                     // zero, max error needs to
735                                                     // be adjusted
736                                                     maxErr += 4 * FLT_MIN;
737 
738                                                     maxPixel =
739                                                         sample_image_pixel_float(
740                                                             imagePtr, imageInfo,
741                                                             xOffsetValues[j],
742                                                             yOffsetValues[j],
743                                                             zOffsetValues[j],
744                                                             imageSampler,
745                                                             expected, 0, NULL,
746                                                             lod);
747 
748                                                     err1 = ABS_ERROR(
749                                                         sRGBmap(resultPtr[0]),
750                                                         sRGBmap(expected[0]));
751                                                     err2 = ABS_ERROR(
752                                                         sRGBmap(resultPtr[1]),
753                                                         sRGBmap(expected[1]));
754                                                     err3 = ABS_ERROR(
755                                                         sRGBmap(resultPtr[2]),
756                                                         sRGBmap(expected[2]));
757                                                     err4 =
758                                                         ABS_ERROR(resultPtr[3],
759                                                                   expected[3]);
760                                                 }
761                                             }
762 
763                                             if (!(err1 <= maxErr)
764                                                 || !(err2 <= maxErr)
765                                                 || !(err3 <= maxErr)
766                                                 || !(err4 <= maxErr))
767                                             {
768                                                 log_error(
769                                                     "FAILED norm_offsets: %g , "
770                                                     "%g , %g:\n",
771                                                     norm_offset_x,
772                                                     norm_offset_y,
773                                                     norm_offset_z);
774 
775                                                 float tempOut[4];
776                                                 shouldReturn |=
777                                                     determine_validation_error_offset<
778                                                         float>(
779                                                         imagePtr, imageInfo,
780                                                         imageSampler, resultPtr,
781                                                         expected, error,
782                                                         xOffsetValues[j],
783                                                         yOffsetValues[j],
784                                                         zOffsetValues[j],
785                                                         norm_offset_x,
786                                                         norm_offset_y,
787                                                         norm_offset_z, j,
788                                                         numTries, numClamped,
789                                                         true, lod);
790                                                 log_error("Step by step:\n");
791                                                 FloatPixel temp =
792                                                     sample_image_pixel_float_offset(
793                                                         imagePtr, imageInfo,
794                                                         xOffsetValues[j],
795                                                         yOffsetValues[j],
796                                                         zOffsetValues[j],
797                                                         norm_offset_x,
798                                                         norm_offset_y,
799                                                         norm_offset_z,
800                                                         imageSampler, tempOut,
801                                                         1 /*verbose*/,
802                                                         &hasDenormals, lod);
803                                                 log_error(
804                                                     "\tulps: %2.2f, %2.2f, "
805                                                     "%2.2f, %2.2f  (max "
806                                                     "allowed: %2.2f)\n\n",
807                                                     Ulp_Error(resultPtr[0],
808                                                               expected[0]),
809                                                     Ulp_Error(resultPtr[1],
810                                                               expected[1]),
811                                                     Ulp_Error(resultPtr[2],
812                                                               expected[2]),
813                                                     Ulp_Error(resultPtr[3],
814                                                               expected[3]),
815                                                     Ulp_Error(
816                                                         MAKE_HEX_FLOAT(
817                                                             0x1.000002p0f,
818                                                             0x1000002L, -24)
819                                                             + maxErr,
820                                                         MAKE_HEX_FLOAT(
821                                                             0x1.000002p0f,
822                                                             0x1000002L, -24)));
823                                             }
824                                             else
825                                             {
826                                                 log_error(
827                                                     "Test error: we should "
828                                                     "have detected this "
829                                                     "passing above.\n");
830                                             }
831                                         } // norm_offset_z
832                                     } // norm_offset_y
833                                 } // norm_offset_x
834                                 if (shouldReturn) return 1;
835                             } // if (!found_pixel)
836 
837                             resultPtr += 4;
838                         }
839                     }
840                 }
841             }
842             /*
843              * FLOAT output type
844              */
845             else if (outputType == kFloat)
846             {
847                 // Validate float results
848                 float *resultPtr = (float *)(char *)resultValues;
849                 float expected[4], error = 0.0f;
850                 float maxErr = get_max_relative_error(
851                     imageInfo->format, imageSampler, 1 /*3D*/,
852                     CL_FILTER_LINEAR == imageSampler->filter_mode);
853 
854                 for (size_t z = 0, j = 0; z < depth_lod; z++)
855                 {
856                     for (size_t y = 0; y < height_lod; y++)
857                     {
858                         for (size_t x = 0; x < width_lod; x++, j++)
859                         {
860                             // Step 1: go through and see if the results verify
861                             // for the pixel For the normalized case on a GPU we
862                             // put in offsets to the X, Y and Z to see if we
863                             // land on the right pixel. This addresses the
864                             // significant inaccuracy in GPU normalization in
865                             // OpenCL 1.0.
866                             int checkOnlyOnePixel = 0;
867                             int found_pixel = 0;
868                             float offset = NORM_OFFSET;
869                             if (!imageSampler->normalized_coords
870                                 || imageSampler->filter_mode
871                                     != CL_FILTER_NEAREST
872                                 || NORM_OFFSET == 0
873 #if defined(__APPLE__)
874                                 // Apple requires its CPU implementation to do
875                                 // correctly rounded address arithmetic in all
876                                 // modes
877                                 || gDeviceType != CL_DEVICE_TYPE_GPU
878 #endif
879                             )
880                                 offset = 0.0f; // Loop only once
881 
882                             for (float norm_offset_x = -offset;
883                                  norm_offset_x <= offset && !found_pixel;
884                                  norm_offset_x += NORM_OFFSET)
885                             {
886                                 for (float norm_offset_y = -offset;
887                                      norm_offset_y <= offset && !found_pixel;
888                                      norm_offset_y += NORM_OFFSET)
889                                 {
890                                     for (float norm_offset_z = -offset;
891                                          norm_offset_z <= NORM_OFFSET
892                                          && !found_pixel;
893                                          norm_offset_z += NORM_OFFSET)
894                                     {
895 
896                                         int hasDenormals = 0;
897                                         FloatPixel maxPixel =
898                                             sample_image_pixel_float_offset(
899                                                 imagePtr, imageInfo,
900                                                 xOffsetValues[j],
901                                                 yOffsetValues[j],
902                                                 zOffsetValues[j], norm_offset_x,
903                                                 norm_offset_y, norm_offset_z,
904                                                 imageSampler, expected, 0,
905                                                 &hasDenormals, lod);
906 
907                                         float err1 = ABS_ERROR(resultPtr[0],
908                                                                expected[0]);
909                                         float err2 = ABS_ERROR(resultPtr[1],
910                                                                expected[1]);
911                                         float err3 = ABS_ERROR(resultPtr[2],
912                                                                expected[2]);
913                                         float err4 = ABS_ERROR(resultPtr[3],
914                                                                expected[3]);
915                                         // Clamp to the minimum absolute error
916                                         // for the format
917                                         if (err1 > 0
918                                             && err1 < formatAbsoluteError)
919                                         {
920                                             err1 = 0.0f;
921                                         }
922                                         if (err2 > 0
923                                             && err2 < formatAbsoluteError)
924                                         {
925                                             err2 = 0.0f;
926                                         }
927                                         if (err3 > 0
928                                             && err3 < formatAbsoluteError)
929                                         {
930                                             err3 = 0.0f;
931                                         }
932                                         if (err4 > 0
933                                             && err4 < formatAbsoluteError)
934                                         {
935                                             err4 = 0.0f;
936                                         }
937                                         float maxErr1 = MAX(
938                                             maxErr * maxPixel.p[0], FLT_MIN);
939                                         float maxErr2 = MAX(
940                                             maxErr * maxPixel.p[1], FLT_MIN);
941                                         float maxErr3 = MAX(
942                                             maxErr * maxPixel.p[2], FLT_MIN);
943                                         float maxErr4 = MAX(
944                                             maxErr * maxPixel.p[3], FLT_MIN);
945 
946                                         if (!(err1 <= maxErr1)
947                                             || !(err2 <= maxErr2)
948                                             || !(err3 <= maxErr3)
949                                             || !(err4 <= maxErr4))
950                                         {
951                                             // Try flushing the denormals
952                                             if (hasDenormals)
953                                             {
954                                                 // If implementation decide to
955                                                 // flush subnormals to zero, max
956                                                 // error needs to be adjusted
957                                                 maxErr1 += 4 * FLT_MIN;
958                                                 maxErr2 += 4 * FLT_MIN;
959                                                 maxErr3 += 4 * FLT_MIN;
960                                                 maxErr4 += 4 * FLT_MIN;
961 
962                                                 maxPixel =
963                                                     sample_image_pixel_float_offset(
964                                                         imagePtr, imageInfo,
965                                                         xOffsetValues[j],
966                                                         yOffsetValues[j],
967                                                         zOffsetValues[j],
968                                                         norm_offset_x,
969                                                         norm_offset_y,
970                                                         norm_offset_z,
971                                                         imageSampler, expected,
972                                                         0, NULL, lod);
973 
974                                                 err1 = ABS_ERROR(resultPtr[0],
975                                                                  expected[0]);
976                                                 err2 = ABS_ERROR(resultPtr[1],
977                                                                  expected[1]);
978                                                 err3 = ABS_ERROR(resultPtr[2],
979                                                                  expected[2]);
980                                                 err4 = ABS_ERROR(resultPtr[3],
981                                                                  expected[3]);
982                                             }
983                                         }
984 
985                                         found_pixel = (err1 <= maxErr1)
986                                             && (err2 <= maxErr2)
987                                             && (err3 <= maxErr3)
988                                             && (err4 <= maxErr4);
989                                     } // norm_offset_z
990                                 } // norm_offset_y
991                             } // norm_offset_x
992 
993                             // Step 2: If we did not find a match, then print
994                             // out debugging info.
995                             if (!found_pixel)
996                             {
997                                 // For the normalized case on a GPU we put in
998                                 // offsets to the X and Y to see if we land on
999                                 // the right pixel. This addresses the
1000                                 // significant inaccuracy in GPU normalization
1001                                 // in OpenCL 1.0.
1002                                 checkOnlyOnePixel = 0;
1003                                 int shouldReturn = 0;
1004                                 for (float norm_offset_x = -offset;
1005                                      norm_offset_x <= offset
1006                                      && !checkOnlyOnePixel;
1007                                      norm_offset_x += NORM_OFFSET)
1008                                 {
1009                                     for (float norm_offset_y = -offset;
1010                                          norm_offset_y <= offset
1011                                          && !checkOnlyOnePixel;
1012                                          norm_offset_y += NORM_OFFSET)
1013                                     {
1014                                         for (float norm_offset_z = -offset;
1015                                              norm_offset_z <= offset
1016                                              && !checkOnlyOnePixel;
1017                                              norm_offset_z += NORM_OFFSET)
1018                                         {
1019 
1020                                             int hasDenormals = 0;
1021                                             FloatPixel maxPixel =
1022                                                 sample_image_pixel_float_offset(
1023                                                     imagePtr, imageInfo,
1024                                                     xOffsetValues[j],
1025                                                     yOffsetValues[j],
1026                                                     zOffsetValues[j],
1027                                                     norm_offset_x,
1028                                                     norm_offset_y,
1029                                                     norm_offset_z, imageSampler,
1030                                                     expected, 0, &hasDenormals,
1031                                                     lod);
1032 
1033                                             float err1 = ABS_ERROR(resultPtr[0],
1034                                                                    expected[0]);
1035                                             float err2 = ABS_ERROR(resultPtr[1],
1036                                                                    expected[1]);
1037                                             float err3 = ABS_ERROR(resultPtr[2],
1038                                                                    expected[2]);
1039                                             float err4 = ABS_ERROR(resultPtr[3],
1040                                                                    expected[3]);
1041                                             float maxErr1 =
1042                                                 MAX(maxErr * maxPixel.p[0],
1043                                                     FLT_MIN);
1044                                             float maxErr2 =
1045                                                 MAX(maxErr * maxPixel.p[1],
1046                                                     FLT_MIN);
1047                                             float maxErr3 =
1048                                                 MAX(maxErr * maxPixel.p[2],
1049                                                     FLT_MIN);
1050                                             float maxErr4 =
1051                                                 MAX(maxErr * maxPixel.p[3],
1052                                                     FLT_MIN);
1053 
1054 
1055                                             if (!(err1 <= maxErr1)
1056                                                 || !(err2 <= maxErr2)
1057                                                 || !(err3 <= maxErr3)
1058                                                 || !(err4 <= maxErr4))
1059                                             {
1060                                                 // Try flushing the denormals
1061                                                 if (hasDenormals)
1062                                                 {
1063                                                     maxErr1 += 4 * FLT_MIN;
1064                                                     maxErr2 += 4 * FLT_MIN;
1065                                                     maxErr3 += 4 * FLT_MIN;
1066                                                     maxErr4 += 4 * FLT_MIN;
1067 
1068                                                     maxPixel =
1069                                                         sample_image_pixel_float(
1070                                                             imagePtr, imageInfo,
1071                                                             xOffsetValues[j],
1072                                                             yOffsetValues[j],
1073                                                             zOffsetValues[j],
1074                                                             imageSampler,
1075                                                             expected, 0, NULL,
1076                                                             lod);
1077 
1078                                                     err1 =
1079                                                         ABS_ERROR(resultPtr[0],
1080                                                                   expected[0]);
1081                                                     err2 =
1082                                                         ABS_ERROR(resultPtr[1],
1083                                                                   expected[1]);
1084                                                     err3 =
1085                                                         ABS_ERROR(resultPtr[2],
1086                                                                   expected[2]);
1087                                                     err4 =
1088                                                         ABS_ERROR(resultPtr[3],
1089                                                                   expected[3]);
1090                                                 }
1091                                             }
1092 
1093                                             if (!(err1 <= maxErr1)
1094                                                 || !(err2 <= maxErr2)
1095                                                 || !(err3 <= maxErr3)
1096                                                 || !(err4 <= maxErr4))
1097                                             {
1098                                                 log_error(
1099                                                     "FAILED norm_offsets: %g , "
1100                                                     "%g , %g:\n",
1101                                                     norm_offset_x,
1102                                                     norm_offset_y,
1103                                                     norm_offset_z);
1104 
1105                                                 float tempOut[4];
1106                                                 shouldReturn |=
1107                                                     determine_validation_error_offset<
1108                                                         float>(
1109                                                         imagePtr, imageInfo,
1110                                                         imageSampler, resultPtr,
1111                                                         expected, error,
1112                                                         xOffsetValues[j],
1113                                                         yOffsetValues[j],
1114                                                         zOffsetValues[j],
1115                                                         norm_offset_x,
1116                                                         norm_offset_y,
1117                                                         norm_offset_z, j,
1118                                                         numTries, numClamped,
1119                                                         true, lod);
1120                                                 log_error("Step by step:\n");
1121                                                 FloatPixel temp =
1122                                                     sample_image_pixel_float_offset(
1123                                                         imagePtr, imageInfo,
1124                                                         xOffsetValues[j],
1125                                                         yOffsetValues[j],
1126                                                         zOffsetValues[j],
1127                                                         norm_offset_x,
1128                                                         norm_offset_y,
1129                                                         norm_offset_z,
1130                                                         imageSampler, tempOut,
1131                                                         1 /*verbose*/,
1132                                                         &hasDenormals, lod);
1133                                                 log_error(
1134                                                     "\tulps: %2.2f, %2.2f, "
1135                                                     "%2.2f, %2.2f  (max "
1136                                                     "allowed: %2.2f)\n\n",
1137                                                     Ulp_Error(resultPtr[0],
1138                                                               expected[0]),
1139                                                     Ulp_Error(resultPtr[1],
1140                                                               expected[1]),
1141                                                     Ulp_Error(resultPtr[2],
1142                                                               expected[2]),
1143                                                     Ulp_Error(resultPtr[3],
1144                                                               expected[3]),
1145                                                     Ulp_Error(
1146                                                         MAKE_HEX_FLOAT(
1147                                                             0x1.000002p0f,
1148                                                             0x1000002L, -24)
1149                                                             + maxErr,
1150                                                         MAKE_HEX_FLOAT(
1151                                                             0x1.000002p0f,
1152                                                             0x1000002L, -24)));
1153                                             }
1154                                             else
1155                                             {
1156                                                 log_error(
1157                                                     "Test error: we should "
1158                                                     "have detected this "
1159                                                     "passing above.\n");
1160                                             }
1161                                         } // norm_offset_z
1162                                     } // norm_offset_y
1163                                 } // norm_offset_x
1164                                 if (shouldReturn) return 1;
1165                             } // if (!found_pixel)
1166 
1167                             resultPtr += 4;
1168                         }
1169                     }
1170                 }
1171             }
1172             /*
1173              * UINT output type
1174              */
1175             else if (outputType == kUInt)
1176             {
1177                 // Validate unsigned integer results
1178                 unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
1179                 unsigned int expected[4];
1180                 float error;
1181                 for (size_t z = 0, j = 0; z < depth_lod; z++)
1182                 {
1183                     for (size_t y = 0; y < height_lod; y++)
1184                     {
1185                         for (size_t x = 0; x < width_lod; x++, j++)
1186                         {
1187                             // Step 1: go through and see if the results verify
1188                             // for the pixel For the normalized case on a GPU we
1189                             // put in offsets to the X, Y and Z to see if we
1190                             // land on the right pixel. This addresses the
1191                             // significant inaccuracy in GPU normalization in
1192                             // OpenCL 1.0.
1193                             int checkOnlyOnePixel = 0;
1194                             int found_pixel = 0;
1195                             for (float norm_offset_x = -NORM_OFFSET;
1196                                  norm_offset_x <= NORM_OFFSET && !found_pixel
1197                                  && !checkOnlyOnePixel;
1198                                  norm_offset_x += NORM_OFFSET)
1199                             {
1200                                 for (float norm_offset_y = -NORM_OFFSET;
1201                                      norm_offset_y <= NORM_OFFSET
1202                                      && !found_pixel && !checkOnlyOnePixel;
1203                                      norm_offset_y += NORM_OFFSET)
1204                                 {
1205                                     for (float norm_offset_z = -NORM_OFFSET;
1206                                          norm_offset_z <= NORM_OFFSET
1207                                          && !found_pixel && !checkOnlyOnePixel;
1208                                          norm_offset_z += NORM_OFFSET)
1209                                     {
1210 
1211                                         // If we are not on a GPU, or we are not
1212                                         // normalized, then only test with
1213                                         // offsets (0.0, 0.0) E.g., test one
1214                                         // pixel.
1215                                         if (!imageSampler->normalized_coords
1216                                             || gDeviceType != CL_DEVICE_TYPE_GPU
1217                                             || NORM_OFFSET == 0)
1218                                         {
1219                                             norm_offset_x = 0.0f;
1220                                             norm_offset_y = 0.0f;
1221                                             norm_offset_z = 0.0f;
1222                                             checkOnlyOnePixel = 1;
1223                                         }
1224 
1225                                         sample_image_pixel_offset<unsigned int>(
1226                                             imagePtr, imageInfo,
1227                                             xOffsetValues[j], yOffsetValues[j],
1228                                             zOffsetValues[j], norm_offset_x,
1229                                             norm_offset_y, norm_offset_z,
1230                                             imageSampler, expected, lod);
1231 
1232                                         error = errMax(
1233                                             errMax(abs_diff_uint(expected[0],
1234                                                                  resultPtr[0]),
1235                                                    abs_diff_uint(expected[1],
1236                                                                  resultPtr[1])),
1237                                             errMax(
1238                                                 abs_diff_uint(expected[2],
1239                                                               resultPtr[2]),
1240                                                 abs_diff_uint(expected[3],
1241                                                               resultPtr[3])));
1242 
1243                                         if (error < MAX_ERR) found_pixel = 1;
1244                                     } // norm_offset_z
1245                                 } // norm_offset_y
1246                             } // norm_offset_x
1247 
1248                             // Step 2: If we did not find a match, then print
1249                             // out debugging info.
1250                             if (!found_pixel)
1251                             {
1252                                 // For the normalized case on a GPU we put in
1253                                 // offsets to the X and Y to see if we land on
1254                                 // the right pixel. This addresses the
1255                                 // significant inaccuracy in GPU normalization
1256                                 // in OpenCL 1.0.
1257                                 checkOnlyOnePixel = 0;
1258                                 int shouldReturn = 0;
1259                                 for (float norm_offset_x = -NORM_OFFSET;
1260                                      norm_offset_x <= NORM_OFFSET
1261                                      && !checkOnlyOnePixel;
1262                                      norm_offset_x += NORM_OFFSET)
1263                                 {
1264                                     for (float norm_offset_y = -NORM_OFFSET;
1265                                          norm_offset_y <= NORM_OFFSET
1266                                          && !checkOnlyOnePixel;
1267                                          norm_offset_y += NORM_OFFSET)
1268                                     {
1269                                         for (float norm_offset_z = -NORM_OFFSET;
1270                                              norm_offset_z <= NORM_OFFSET
1271                                              && !checkOnlyOnePixel;
1272                                              norm_offset_z += NORM_OFFSET)
1273                                         {
1274 
1275                                             // If we are not on a GPU, or we are
1276                                             // not normalized, then only test
1277                                             // with offsets (0.0, 0.0) E.g.,
1278                                             // test one pixel.
1279                                             if (!imageSampler->normalized_coords
1280                                                 || gDeviceType
1281                                                     != CL_DEVICE_TYPE_GPU
1282                                                 || NORM_OFFSET == 0)
1283                                             {
1284                                                 norm_offset_x = 0.0f;
1285                                                 norm_offset_y = 0.0f;
1286                                                 norm_offset_z = 0.0f;
1287                                                 checkOnlyOnePixel = 1;
1288                                             }
1289 
1290                                             sample_image_pixel_offset<
1291                                                 unsigned int>(
1292                                                 imagePtr, imageInfo,
1293                                                 xOffsetValues[j],
1294                                                 yOffsetValues[j],
1295                                                 zOffsetValues[j], norm_offset_x,
1296                                                 norm_offset_y, norm_offset_z,
1297                                                 imageSampler, expected, lod);
1298 
1299                                             error = errMax(
1300                                                 errMax(
1301                                                     abs_diff_uint(expected[0],
1302                                                                   resultPtr[0]),
1303                                                     abs_diff_uint(
1304                                                         expected[1],
1305                                                         resultPtr[1])),
1306                                                 errMax(
1307                                                     abs_diff_uint(expected[2],
1308                                                                   resultPtr[2]),
1309                                                     abs_diff_uint(
1310                                                         expected[3],
1311                                                         resultPtr[3])));
1312 
1313                                             if (error > MAX_ERR)
1314                                             {
1315                                                 log_error(
1316                                                     "FAILED norm_offsets: %g , "
1317                                                     "%g , %g:\n",
1318                                                     norm_offset_x,
1319                                                     norm_offset_y,
1320                                                     norm_offset_z);
1321                                                 shouldReturn |=
1322                                                     determine_validation_error_offset<
1323                                                         unsigned int>(
1324                                                         imagePtr, imageInfo,
1325                                                         imageSampler, resultPtr,
1326                                                         expected, error,
1327                                                         xOffsetValues[j],
1328                                                         yOffsetValues[j],
1329                                                         zOffsetValues[j],
1330                                                         norm_offset_x,
1331                                                         norm_offset_y,
1332                                                         norm_offset_z, j,
1333                                                         numTries, numClamped,
1334                                                         false, lod);
1335                                             }
1336                                             else
1337                                             {
1338                                                 log_error(
1339                                                     "Test error: we should "
1340                                                     "have detected this "
1341                                                     "passing above.\n");
1342                                             }
1343                                         } // norm_offset_z
1344                                     } // norm_offset_y
1345                                 } // norm_offset_x
1346                                 if (shouldReturn) return 1;
1347                             } // if (!found_pixel)
1348 
1349                             resultPtr += 4;
1350                         }
1351                     }
1352                 }
1353             }
1354             else
1355             /*
1356              * INT output type
1357              */
1358             {
1359                 // Validate integer results
1360                 int *resultPtr = (int *)(char *)resultValues;
1361                 int expected[4];
1362                 float error;
1363                 for (size_t z = 0, j = 0; z < depth_lod; z++)
1364                 {
1365                     for (size_t y = 0; y < height_lod; y++)
1366                     {
1367                         for (size_t x = 0; x < width_lod; x++, j++)
1368                         {
1369                             // Step 1: go through and see if the results verify
1370                             // for the pixel For the normalized case on a GPU we
1371                             // put in offsets to the X, Y and Z to see if we
1372                             // land on the right pixel. This addresses the
1373                             // significant inaccuracy in GPU normalization in
1374                             // OpenCL 1.0.
1375                             int checkOnlyOnePixel = 0;
1376                             int found_pixel = 0;
1377                             for (float norm_offset_x = -NORM_OFFSET;
1378                                  norm_offset_x <= NORM_OFFSET && !found_pixel
1379                                  && !checkOnlyOnePixel;
1380                                  norm_offset_x += NORM_OFFSET)
1381                             {
1382                                 for (float norm_offset_y = -NORM_OFFSET;
1383                                      norm_offset_y <= NORM_OFFSET
1384                                      && !found_pixel && !checkOnlyOnePixel;
1385                                      norm_offset_y += NORM_OFFSET)
1386                                 {
1387                                     for (float norm_offset_z = -NORM_OFFSET;
1388                                          norm_offset_z <= NORM_OFFSET
1389                                          && !found_pixel && !checkOnlyOnePixel;
1390                                          norm_offset_z += NORM_OFFSET)
1391                                     {
1392 
1393                                         // If we are not on a GPU, or we are not
1394                                         // normalized, then only test with
1395                                         // offsets (0.0, 0.0) E.g., test one
1396                                         // pixel.
1397                                         if (!imageSampler->normalized_coords
1398                                             || gDeviceType != CL_DEVICE_TYPE_GPU
1399                                             || NORM_OFFSET == 0)
1400                                         {
1401                                             norm_offset_x = 0.0f;
1402                                             norm_offset_y = 0.0f;
1403                                             norm_offset_z = 0.0f;
1404                                             checkOnlyOnePixel = 1;
1405                                         }
1406 
1407                                         sample_image_pixel_offset<int>(
1408                                             imagePtr, imageInfo,
1409                                             xOffsetValues[j], yOffsetValues[j],
1410                                             zOffsetValues[j], norm_offset_x,
1411                                             norm_offset_y, norm_offset_z,
1412                                             imageSampler, expected, lod);
1413 
1414                                         error = errMax(
1415                                             errMax(abs_diff_int(expected[0],
1416                                                                 resultPtr[0]),
1417                                                    abs_diff_int(expected[1],
1418                                                                 resultPtr[1])),
1419                                             errMax(abs_diff_int(expected[2],
1420                                                                 resultPtr[2]),
1421                                                    abs_diff_int(expected[3],
1422                                                                 resultPtr[3])));
1423 
1424                                         if (error < MAX_ERR) found_pixel = 1;
1425                                     } // norm_offset_z
1426                                 } // norm_offset_y
1427                             } // norm_offset_x
1428 
1429                             // Step 2: If we did not find a match, then print
1430                             // out debugging info.
1431                             if (!found_pixel)
1432                             {
1433                                 // For the normalized case on a GPU we put in
1434                                 // offsets to the X and Y to see if we land on
1435                                 // the right pixel. This addresses the
1436                                 // significant inaccuracy in GPU normalization
1437                                 // in OpenCL 1.0.
1438                                 checkOnlyOnePixel = 0;
1439                                 int shouldReturn = 0;
1440                                 for (float norm_offset_x = -NORM_OFFSET;
1441                                      norm_offset_x <= NORM_OFFSET
1442                                      && !checkOnlyOnePixel;
1443                                      norm_offset_x += NORM_OFFSET)
1444                                 {
1445                                     for (float norm_offset_y = -NORM_OFFSET;
1446                                          norm_offset_y <= NORM_OFFSET
1447                                          && !checkOnlyOnePixel;
1448                                          norm_offset_y += NORM_OFFSET)
1449                                     {
1450                                         for (float norm_offset_z = -NORM_OFFSET;
1451                                              norm_offset_z <= NORM_OFFSET
1452                                              && !checkOnlyOnePixel;
1453                                              norm_offset_z += NORM_OFFSET)
1454                                         {
1455 
1456                                             // If we are not on a GPU, or we are
1457                                             // not normalized, then only test
1458                                             // with offsets (0.0, 0.0) E.g.,
1459                                             // test one pixel.
1460                                             if (!imageSampler->normalized_coords
1461                                                 || gDeviceType
1462                                                     != CL_DEVICE_TYPE_GPU
1463                                                 || NORM_OFFSET == 0
1464                                                 || NORM_OFFSET == 0
1465                                                 || NORM_OFFSET == 0)
1466                                             {
1467                                                 norm_offset_x = 0.0f;
1468                                                 norm_offset_y = 0.0f;
1469                                                 norm_offset_z = 0.0f;
1470                                                 checkOnlyOnePixel = 1;
1471                                             }
1472 
1473                                             sample_image_pixel_offset<int>(
1474                                                 imagePtr, imageInfo,
1475                                                 xOffsetValues[j],
1476                                                 yOffsetValues[j],
1477                                                 zOffsetValues[j], norm_offset_x,
1478                                                 norm_offset_y, norm_offset_z,
1479                                                 imageSampler, expected, lod);
1480 
1481                                             error = errMax(
1482                                                 errMax(
1483                                                     abs_diff_int(expected[0],
1484                                                                  resultPtr[0]),
1485                                                     abs_diff_int(expected[1],
1486                                                                  resultPtr[1])),
1487                                                 errMax(
1488                                                     abs_diff_int(expected[2],
1489                                                                  resultPtr[2]),
1490                                                     abs_diff_int(
1491                                                         expected[3],
1492                                                         resultPtr[3])));
1493 
1494                                             if (error > MAX_ERR)
1495                                             {
1496                                                 log_error(
1497                                                     "FAILED norm_offsets: %g , "
1498                                                     "%g , %g:\n",
1499                                                     norm_offset_x,
1500                                                     norm_offset_y,
1501                                                     norm_offset_z);
1502                                                 shouldReturn |=
1503                                                     determine_validation_error_offset<
1504                                                         int>(
1505                                                         imagePtr, imageInfo,
1506                                                         imageSampler, resultPtr,
1507                                                         expected, error,
1508                                                         xOffsetValues[j],
1509                                                         yOffsetValues[j],
1510                                                         zOffsetValues[j],
1511                                                         norm_offset_x,
1512                                                         norm_offset_y,
1513                                                         norm_offset_z, j,
1514                                                         numTries, numClamped,
1515                                                         false, lod);
1516                                             }
1517                                             else
1518                                             {
1519                                                 log_error(
1520                                                     "Test error: we should "
1521                                                     "have detected this "
1522                                                     "passing above.\n");
1523                                             }
1524                                         } // norm_offset_z
1525                                     } // norm_offset_y
1526                                 } // norm_offset_x
1527                                 if (shouldReturn) return 1;
1528                             } // if (!found_pixel)
1529 
1530                             resultPtr += 4;
1531                         }
1532                     }
1533                 }
1534             }
1535         }
1536         {
1537             nextLevelOffset += width_lod * height_lod * depth_lod
1538                 * get_pixel_size(imageInfo->format);
1539             width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
1540             height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
1541             depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1;
1542         }
1543     }
1544 
1545     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
1546 }