1 //
2 // Copyright (c) 2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "test_common.h"
18
19
create_sampler(cl_context context,image_sampler_data * sdata,bool test_mipmaps,cl_int * error)20 cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error) {
21 cl_sampler sampler = nullptr;
22 if (test_mipmaps) {
23 cl_sampler_properties properties[] = {
24 CL_SAMPLER_NORMALIZED_COORDS, sdata->normalized_coords,
25 CL_SAMPLER_ADDRESSING_MODE, sdata->addressing_mode,
26 CL_SAMPLER_FILTER_MODE, sdata->filter_mode,
27 CL_SAMPLER_MIP_FILTER_MODE, sdata->filter_mode,
28 0};
29 sampler = clCreateSamplerWithProperties(context, properties, error);
30 } else {
31 sampler = clCreateSampler(context, sdata->normalized_coords, sdata->addressing_mode, sdata->filter_mode, error);
32 }
33 return sampler;
34 }
35
InitFloatCoordsCommon(image_descriptor * imageInfo,image_sampler_data * imageSampler,float * xOffsets,float * yOffsets,float * zOffsets,float xfract,float yfract,float zfract,int normalized_coords,MTdata d,int lod)36 void InitFloatCoordsCommon(image_descriptor *imageInfo,
37 image_sampler_data *imageSampler, float *xOffsets,
38 float *yOffsets, float *zOffsets, float xfract,
39 float yfract, float zfract, int normalized_coords,
40 MTdata d, int lod)
41 {
42 size_t i = 0;
43 if (gDisableOffsets)
44 {
45 for (size_t z = 0; z < imageInfo->depth; z++)
46 {
47 for (size_t y = 0; y < imageInfo->height; y++)
48 {
49 for (size_t x = 0; x < imageInfo->width; x++, i++)
50 {
51 xOffsets[i] = (float)(xfract + (double)x);
52 yOffsets[i] = (float)(yfract + (double)y);
53 zOffsets[i] = (float)(zfract + (double)z);
54 }
55 }
56 }
57 }
58 else
59 {
60 for (size_t z = 0; z < imageInfo->depth; z++)
61 {
62 for (size_t y = 0; y < imageInfo->height; y++)
63 {
64 for (size_t x = 0; x < imageInfo->width; x++, i++)
65 {
66 xOffsets[i] =
67 (float)(xfract
68 + (double)((int)x
69 + random_in_range(-10, 10, d)));
70 yOffsets[i] =
71 (float)(yfract
72 + (double)((int)y
73 + random_in_range(-10, 10, d)));
74 zOffsets[i] =
75 (float)(zfract
76 + (double)((int)z
77 + random_in_range(-10, 10, d)));
78 }
79 }
80 }
81 }
82
83 if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
84 {
85 i = 0;
86 for (size_t z = 0; z < imageInfo->depth; z++)
87 {
88 for (size_t y = 0; y < imageInfo->height; y++)
89 {
90 for (size_t x = 0; x < imageInfo->width; x++, i++)
91 {
92 xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
93 (double)imageInfo->width - 1.0);
94 yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
95 (double)imageInfo->height - 1.0);
96 zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
97 (double)imageInfo->depth - 1.0);
98 }
99 }
100 }
101 }
102
103 if (normalized_coords || gTestMipmaps)
104 {
105 i = 0;
106 if (lod == 0)
107 {
108 for (size_t z = 0; z < imageInfo->depth; z++)
109 {
110 for (size_t y = 0; y < imageInfo->height; y++)
111 {
112 for (size_t x = 0; x < imageInfo->width; x++, i++)
113 {
114 xOffsets[i] = (float)((double)xOffsets[i]
115 / (double)imageInfo->width);
116 yOffsets[i] = (float)((double)yOffsets[i]
117 / (double)imageInfo->height);
118 zOffsets[i] = (float)((double)zOffsets[i]
119 / (double)imageInfo->depth);
120 }
121 }
122 }
123 }
124 else if (gTestMipmaps)
125 {
126 size_t width_lod, height_lod, depth_lod;
127
128 width_lod =
129 (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
130 height_lod =
131 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
132 depth_lod =
133 (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
134
135 for (size_t z = 0; z < depth_lod; z++)
136 {
137 for (size_t y = 0; y < height_lod; y++)
138 {
139 for (size_t x = 0; x < width_lod; x++, i++)
140 {
141 xOffsets[i] =
142 (float)((double)xOffsets[i] / (double)width_lod);
143 yOffsets[i] =
144 (float)((double)yOffsets[i] / (double)height_lod);
145 zOffsets[i] =
146 (float)((double)zOffsets[i] / (double)depth_lod);
147 }
148 }
149 }
150 }
151 }
152 }
153
test_read_image(cl_context context,cl_command_queue queue,cl_kernel kernel,image_descriptor * imageInfo,image_sampler_data * imageSampler,bool useFloatCoords,ExplicitType outputType,MTdata d)154 int test_read_image(cl_context context, cl_command_queue queue,
155 cl_kernel kernel, image_descriptor *imageInfo,
156 image_sampler_data *imageSampler, bool useFloatCoords,
157 ExplicitType outputType, MTdata d)
158 {
159 int error;
160 size_t threads[3];
161 static int initHalf = 0;
162
163 cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
164
165 clMemWrapper xOffsets, yOffsets, zOffsets, results;
166 clSamplerWrapper actualSampler;
167 BufferOwningPtr<char> maxImageUseHostPtrBackingStore;
168
169 // Create offset data
170 BufferOwningPtr<cl_float> xOffsetValues(
171 malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
172 * imageInfo->depth));
173 BufferOwningPtr<cl_float> yOffsetValues(
174 malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
175 * imageInfo->depth));
176 BufferOwningPtr<cl_float> zOffsetValues(
177 malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
178 * imageInfo->depth));
179
180 if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
181 if (DetectFloatToHalfRoundingMode(queue)) return 1;
182
183 BufferOwningPtr<char> imageValues;
184 generate_random_image_data(imageInfo, imageValues, d);
185
186 // Construct testing sources
187 clProtectedImage protImage;
188 clMemWrapper unprotImage;
189 cl_mem image;
190
191 if (gtestTypesToRun & kReadTests)
192 {
193 image_read_write_flags = CL_MEM_READ_ONLY;
194 }
195 else
196 {
197 image_read_write_flags = CL_MEM_READ_WRITE;
198 }
199
200 if (gMemFlagsToUse == CL_MEM_USE_HOST_PTR)
201 {
202 // clProtectedImage uses USE_HOST_PTR, so just rely on that for the
203 // testing (via Ian) Do not use protected images for max image size test
204 // since it rounds the row size to a page size
205 if (gTestMaxImages)
206 {
207 generate_random_image_data(imageInfo,
208 maxImageUseHostPtrBackingStore, d);
209 unprotImage = create_image_3d(
210 context, image_read_write_flags | CL_MEM_USE_HOST_PTR,
211 imageInfo->format, imageInfo->width, imageInfo->height,
212 imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
213 (gEnablePitch ? imageInfo->slicePitch : 0),
214 maxImageUseHostPtrBackingStore, &error);
215 }
216 else
217 {
218 error = protImage.Create(context, image_read_write_flags,
219 imageInfo->format, imageInfo->width,
220 imageInfo->height, imageInfo->depth);
221 }
222 if (error != CL_SUCCESS)
223 {
224 log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
225 "(pitch %d, %d ) (%s)",
226 (int)imageInfo->width, (int)imageInfo->height,
227 (int)imageInfo->depth, (int)imageInfo->rowPitch,
228 (int)imageInfo->slicePitch, IGetErrorString(error));
229 return error;
230 }
231 if (gTestMaxImages)
232 image = (cl_mem)unprotImage;
233 else
234 image = (cl_mem)protImage;
235 }
236 else if (gMemFlagsToUse == CL_MEM_COPY_HOST_PTR)
237 {
238 // Don't use clEnqueueWriteImage; just use copy host ptr to get the data
239 // in
240 unprotImage = create_image_3d(
241 context, image_read_write_flags | CL_MEM_COPY_HOST_PTR,
242 imageInfo->format, imageInfo->width, imageInfo->height,
243 imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
244 (gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error);
245 if (error != CL_SUCCESS)
246 {
247 log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
248 "(pitch %d, %d ) (%s)",
249 (int)imageInfo->width, (int)imageInfo->height,
250 (int)imageInfo->depth, (int)imageInfo->rowPitch,
251 (int)imageInfo->slicePitch, IGetErrorString(error));
252 return error;
253 }
254 image = unprotImage;
255 }
256 else // Either CL_MEM_ALLOC_HOST_PTR or none
257 {
258 // Note: if ALLOC_HOST_PTR is used, the driver allocates memory that can
259 // be accessed by the host, but otherwise it works just as if no flag is
260 // specified, so we just do the same thing either way
261 if (!gTestMipmaps)
262 {
263 unprotImage = create_image_3d(
264 context, image_read_write_flags | gMemFlagsToUse,
265 imageInfo->format, imageInfo->width, imageInfo->height,
266 imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
267 (gEnablePitch ? imageInfo->slicePitch : 0), imageValues,
268 &error);
269 if (error != CL_SUCCESS)
270 {
271 log_error("ERROR: Unable to create 3D image of size %d x %d x "
272 "%d (pitch %d, %d ) (%s)",
273 (int)imageInfo->width, (int)imageInfo->height,
274 (int)imageInfo->depth, (int)imageInfo->rowPitch,
275 (int)imageInfo->slicePitch, IGetErrorString(error));
276 return error;
277 }
278 image = unprotImage;
279 }
280 else
281 {
282 cl_image_desc image_desc = { 0 };
283 image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
284 image_desc.image_width = imageInfo->width;
285 image_desc.image_height = imageInfo->height;
286 image_desc.image_depth = imageInfo->depth;
287 image_desc.num_mip_levels = imageInfo->num_mip_levels;
288
289
290 unprotImage =
291 clCreateImage(context, image_read_write_flags,
292 imageInfo->format, &image_desc, NULL, &error);
293 if (error != CL_SUCCESS)
294 {
295 log_error("ERROR: Unable to create %d level mipmapped 3D image "
296 "of size %d x %d x %d (pitch %d, %d ) (%s)",
297 (int)imageInfo->num_mip_levels, (int)imageInfo->width,
298 (int)imageInfo->height, (int)imageInfo->depth,
299 (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
300 IGetErrorString(error));
301 return error;
302 }
303 image = unprotImage;
304 }
305 }
306
307 if (gMemFlagsToUse != CL_MEM_COPY_HOST_PTR)
308 {
309 size_t origin[4] = { 0, 0, 0, 0 };
310 size_t region[3] = { imageInfo->width, imageInfo->height,
311 imageInfo->depth };
312
313 if (gDebugTrace) log_info(" - Writing image...\n");
314
315 if (!gTestMipmaps)
316 {
317
318 error =
319 clEnqueueWriteImage(queue, image, CL_TRUE, origin, region,
320 gEnablePitch ? imageInfo->rowPitch : 0,
321 gEnablePitch ? imageInfo->slicePitch : 0,
322 imageValues, 0, NULL, NULL);
323
324 if (error != CL_SUCCESS)
325 {
326 log_error("ERROR: Unable to write to 3D image of size %d x %d "
327 "x %d \n",
328 (int)imageInfo->width, (int)imageInfo->height,
329 (int)imageInfo->depth);
330 return error;
331 }
332 }
333 else
334 {
335 int nextLevelOffset = 0;
336
337 for (int i = 0; i < imageInfo->num_mip_levels; i++)
338 {
339 origin[3] = i;
340 error = clEnqueueWriteImage(
341 queue, image, CL_TRUE, origin, region,
342 /*gEnablePitch ? imageInfo->rowPitch :*/ 0,
343 /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
344 ((char *)imageValues + nextLevelOffset), 0, NULL, NULL);
345 if (error != CL_SUCCESS)
346 {
347 log_error("ERROR: Unable to write to %d level mipmapped 3D "
348 "image of size %d x %d x %d\n",
349 (int)imageInfo->num_mip_levels,
350 (int)imageInfo->width, (int)imageInfo->height,
351 (int)imageInfo->depth);
352 return error;
353 }
354 nextLevelOffset += region[0] * region[1] * region[2]
355 * get_pixel_size(imageInfo->format);
356 // Subsequent mip level dimensions keep halving
357 region[0] = region[0] >> 1 ? region[0] >> 1 : 1;
358 region[1] = region[1] >> 1 ? region[1] >> 1 : 1;
359 region[2] = region[2] >> 1 ? region[2] >> 1 : 1;
360 }
361 }
362 }
363
364 xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
365 sizeof(cl_float) * imageInfo->width
366 * imageInfo->height * imageInfo->depth,
367 xOffsetValues, &error);
368 test_error(error, "Unable to create x offset buffer");
369 yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
370 sizeof(cl_float) * imageInfo->width
371 * imageInfo->height * imageInfo->depth,
372 yOffsetValues, &error);
373 test_error(error, "Unable to create y offset buffer");
374 zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
375 sizeof(cl_float) * imageInfo->width
376 * imageInfo->height * imageInfo->depth,
377 zOffsetValues, &error);
378 test_error(error, "Unable to create y offset buffer");
379 results =
380 clCreateBuffer(context, CL_MEM_READ_WRITE,
381 get_explicit_type_size(outputType) * 4 * imageInfo->width
382 * imageInfo->height * imageInfo->depth,
383 NULL, &error);
384 test_error(error, "Unable to create result buffer");
385
386 // Create sampler to use
387 actualSampler = create_sampler(context, imageSampler, gTestMipmaps, &error);
388 test_error(error, "Unable to create image sampler");
389
390 // Set arguments
391 int idx = 0;
392 error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &image);
393 test_error(error, "Unable to set kernel arguments");
394 if (!gUseKernelSamplers)
395 {
396 error =
397 clSetKernelArg(kernel, idx++, sizeof(cl_sampler), &actualSampler);
398 test_error(error, "Unable to set kernel arguments");
399 }
400 error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &xOffsets);
401 test_error(error, "Unable to set kernel arguments");
402 error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &yOffsets);
403 test_error(error, "Unable to set kernel arguments");
404 error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &zOffsets);
405 test_error(error, "Unable to set kernel arguments");
406 error = clSetKernelArg(kernel, idx++, sizeof(cl_mem), &results);
407 test_error(error, "Unable to set kernel arguments");
408
409 const float float_offsets[] = { 0.0f,
410 MAKE_HEX_FLOAT(0x1.0p-30f, 0x1L, -30),
411 0.25f,
412 0.3f,
413 0.5f - FLT_EPSILON / 4.0f,
414 0.5f,
415 0.9f,
416 1.0f - FLT_EPSILON / 2 };
417 int float_offset_count = sizeof(float_offsets) / sizeof(float_offsets[0]);
418 int numTries = MAX_TRIES, numClamped = MAX_CLAMPED;
419 int loopCount = 2 * float_offset_count;
420 if (!useFloatCoords) loopCount = 1;
421 if (gTestMaxImages)
422 {
423 loopCount = 1;
424 log_info("Testing each size only once with pixel offsets of %g for max "
425 "sized images.\n",
426 float_offsets[0]);
427 }
428
429 // Get the maximum absolute error for this format
430 double formatAbsoluteError =
431 get_max_absolute_error(imageInfo->format, imageSampler);
432 if (gDebugTrace)
433 log_info("\tformatAbsoluteError is %e\n", formatAbsoluteError);
434
435 if (0 == initHalf
436 && imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
437 {
438 initHalf = CL_SUCCESS == DetectFloatToHalfRoundingMode(queue);
439 if (initHalf)
440 {
441 log_info("Half rounding mode successfully detected.\n");
442 }
443 }
444
445 int nextLevelOffset = 0;
446 size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
447 depth_lod = imageInfo->depth;
448
449 // Loop over all mipmap levels, if we are testing mipmapped images.
450 for (int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels)
451 || (!gTestMipmaps && lod < 1);
452 lod++)
453 {
454 size_t resultValuesSize = width_lod * height_lod * depth_lod
455 * get_explicit_type_size(outputType) * 4;
456 BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
457 float lod_float = (float)lod;
458 if (gTestMipmaps)
459 {
460 // Set the lod kernel arg
461 if (gDebugTrace) log_info(" - Working at mip level %d\n", lod);
462 error = clSetKernelArg(kernel, idx, sizeof(float), &lod_float);
463 test_error(error, "Unable to set kernel arguments");
464 }
465
466 for (int q = 0; q < loopCount; q++)
467 {
468 float offset = float_offsets[q % float_offset_count];
469
470 // Init the coordinates
471 InitFloatCoordsCommon(imageInfo, imageSampler, xOffsetValues,
472 yOffsetValues, zOffsetValues,
473 q >= float_offset_count ? -offset : offset,
474 q >= float_offset_count ? offset : -offset,
475 q >= float_offset_count ? -offset : offset,
476 imageSampler->normalized_coords, d, lod);
477
478 error =
479 clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
480 sizeof(cl_float) * imageInfo->height
481 * imageInfo->width * imageInfo->depth,
482 xOffsetValues, 0, NULL, NULL);
483 test_error(error, "Unable to write x offsets");
484 error =
485 clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
486 sizeof(cl_float) * imageInfo->height
487 * imageInfo->width * imageInfo->depth,
488 yOffsetValues, 0, NULL, NULL);
489 test_error(error, "Unable to write y offsets");
490 error =
491 clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
492 sizeof(cl_float) * imageInfo->height
493 * imageInfo->width * imageInfo->depth,
494 zOffsetValues, 0, NULL, NULL);
495 test_error(error, "Unable to write z offsets");
496
497
498 memset(resultValues, 0xff, resultValuesSize);
499 clEnqueueWriteBuffer(queue, results, CL_TRUE, 0, resultValuesSize,
500 resultValues, 0, NULL, NULL);
501
502 // Figure out thread dimensions
503 threads[0] = (size_t)width_lod;
504 threads[1] = (size_t)height_lod;
505 threads[2] = (size_t)depth_lod;
506
507 // Run the kernel
508 error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads,
509 NULL, 0, NULL, NULL);
510 test_error(error, "Unable to run kernel");
511
512 // Get results
513 error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0,
514 width_lod * height_lod * depth_lod
515 * get_explicit_type_size(outputType)
516 * 4,
517 resultValues, 0, NULL, NULL);
518 test_error(error, "Unable to read results from kernel");
519 if (gDebugTrace) log_info(" results read\n");
520
521 // Validate results element by element
522 char *imagePtr = (char *)imageValues + nextLevelOffset;
523 /*
524 * FLOAT output type
525 */
526 if (is_sRGBA_order(imageInfo->format->image_channel_order)
527 && (outputType == kFloat))
528 {
529 // Validate float results
530 float *resultPtr = (float *)(char *)resultValues;
531 float expected[4], error = 0.0f;
532 float maxErr = get_max_relative_error(
533 imageInfo->format, imageSampler, 1 /*3D*/,
534 CL_FILTER_LINEAR == imageSampler->filter_mode);
535
536 for (size_t z = 0, j = 0; z < depth_lod; z++)
537 {
538 for (size_t y = 0; y < height_lod; y++)
539 {
540 for (size_t x = 0; x < width_lod; x++, j++)
541 {
542 // Step 1: go through and see if the results verify
543 // for the pixel For the normalized case on a GPU we
544 // put in offsets to the X, Y and Z to see if we
545 // land on the right pixel. This addresses the
546 // significant inaccuracy in GPU normalization in
547 // OpenCL 1.0.
548 int checkOnlyOnePixel = 0;
549 int found_pixel = 0;
550 float offset = NORM_OFFSET;
551 if (!imageSampler->normalized_coords
552 || imageSampler->filter_mode
553 != CL_FILTER_NEAREST
554 || NORM_OFFSET == 0
555 #if defined(__APPLE__)
556 // Apple requires its CPU implementation to do
557 // correctly rounded address arithmetic in all
558 // modes
559 || gDeviceType != CL_DEVICE_TYPE_GPU
560 #endif
561 )
562 offset = 0.0f; // Loop only once
563
564 for (float norm_offset_x = -offset;
565 norm_offset_x <= offset && !found_pixel;
566 norm_offset_x += NORM_OFFSET)
567 {
568 for (float norm_offset_y = -offset;
569 norm_offset_y <= offset && !found_pixel;
570 norm_offset_y += NORM_OFFSET)
571 {
572 for (float norm_offset_z = -offset;
573 norm_offset_z <= NORM_OFFSET
574 && !found_pixel;
575 norm_offset_z += NORM_OFFSET)
576 {
577
578 int hasDenormals = 0;
579 FloatPixel maxPixel =
580 sample_image_pixel_float_offset(
581 imagePtr, imageInfo,
582 xOffsetValues[j],
583 yOffsetValues[j],
584 zOffsetValues[j], norm_offset_x,
585 norm_offset_y, norm_offset_z,
586 imageSampler, expected, 0,
587 &hasDenormals, lod);
588
589 float err1 =
590 ABS_ERROR(sRGBmap(resultPtr[0]),
591 sRGBmap(expected[0]));
592 float err2 =
593 ABS_ERROR(sRGBmap(resultPtr[1]),
594 sRGBmap(expected[1]));
595 float err3 =
596 ABS_ERROR(sRGBmap(resultPtr[2]),
597 sRGBmap(expected[2]));
598 float err4 = ABS_ERROR(resultPtr[3],
599 expected[3]);
600 // Clamp to the minimum absolute error
601 // for the format
602 if (err1 > 0
603 && err1 < formatAbsoluteError)
604 {
605 err1 = 0.0f;
606 }
607 if (err2 > 0
608 && err2 < formatAbsoluteError)
609 {
610 err2 = 0.0f;
611 }
612 if (err3 > 0
613 && err3 < formatAbsoluteError)
614 {
615 err3 = 0.0f;
616 }
617 if (err4 > 0
618 && err4 < formatAbsoluteError)
619 {
620 err4 = 0.0f;
621 }
622 float maxErr = 0.5;
623
624 if (!(err1 <= maxErr)
625 || !(err2 <= maxErr)
626 || !(err3 <= maxErr)
627 || !(err4 <= maxErr))
628 {
629 // Try flushing the denormals
630 if (hasDenormals)
631 {
632 // If implementation decide to
633 // flush subnormals to zero, max
634 // error needs to be adjusted
635 maxErr += 4 * FLT_MIN;
636
637 maxPixel =
638 sample_image_pixel_float_offset(
639 imagePtr, imageInfo,
640 xOffsetValues[j],
641 yOffsetValues[j],
642 zOffsetValues[j],
643 norm_offset_x,
644 norm_offset_y,
645 norm_offset_z,
646 imageSampler, expected,
647 0, NULL, lod);
648
649 err1 = ABS_ERROR(
650 sRGBmap(resultPtr[0]),
651 sRGBmap(expected[0]));
652 err2 = ABS_ERROR(
653 sRGBmap(resultPtr[1]),
654 sRGBmap(expected[1]));
655 err3 = ABS_ERROR(
656 sRGBmap(resultPtr[2]),
657 sRGBmap(expected[2]));
658 err4 = ABS_ERROR(resultPtr[3],
659 expected[3]);
660 }
661 }
662
663 found_pixel = (err1 <= maxErr)
664 && (err2 <= maxErr)
665 && (err3 <= maxErr)
666 && (err4 <= maxErr);
667 } // norm_offset_z
668 } // norm_offset_y
669 } // norm_offset_x
670
671 // Step 2: If we did not find a match, then print
672 // out debugging info.
673 if (!found_pixel)
674 {
675 // For the normalized case on a GPU we put in
676 // offsets to the X and Y to see if we land on
677 // the right pixel. This addresses the
678 // significant inaccuracy in GPU normalization
679 // in OpenCL 1.0.
680 checkOnlyOnePixel = 0;
681 int shouldReturn = 0;
682 for (float norm_offset_x = -offset;
683 norm_offset_x <= offset
684 && !checkOnlyOnePixel;
685 norm_offset_x += NORM_OFFSET)
686 {
687 for (float norm_offset_y = -offset;
688 norm_offset_y <= offset
689 && !checkOnlyOnePixel;
690 norm_offset_y += NORM_OFFSET)
691 {
692 for (float norm_offset_z = -offset;
693 norm_offset_z <= offset
694 && !checkOnlyOnePixel;
695 norm_offset_z += NORM_OFFSET)
696 {
697
698 int hasDenormals = 0;
699 FloatPixel maxPixel =
700 sample_image_pixel_float_offset(
701 imagePtr, imageInfo,
702 xOffsetValues[j],
703 yOffsetValues[j],
704 zOffsetValues[j],
705 norm_offset_x,
706 norm_offset_y,
707 norm_offset_z, imageSampler,
708 expected, 0, &hasDenormals,
709 lod);
710
711 float err1 =
712 ABS_ERROR(sRGBmap(resultPtr[0]),
713 sRGBmap(expected[0]));
714 float err2 =
715 ABS_ERROR(sRGBmap(resultPtr[1]),
716 sRGBmap(expected[1]));
717 float err3 =
718 ABS_ERROR(sRGBmap(resultPtr[2]),
719 sRGBmap(expected[2]));
720 float err4 = ABS_ERROR(resultPtr[3],
721 expected[3]);
722 float maxErr = 0.6;
723
724 if (!(err1 <= maxErr)
725 || !(err2 <= maxErr)
726 || !(err3 <= maxErr)
727 || !(err4 <= maxErr))
728 {
729 // Try flushing the denormals
730 if (hasDenormals)
731 {
732 // If implementation decide
733 // to flush subnormals to
734 // zero, max error needs to
735 // be adjusted
736 maxErr += 4 * FLT_MIN;
737
738 maxPixel =
739 sample_image_pixel_float(
740 imagePtr, imageInfo,
741 xOffsetValues[j],
742 yOffsetValues[j],
743 zOffsetValues[j],
744 imageSampler,
745 expected, 0, NULL,
746 lod);
747
748 err1 = ABS_ERROR(
749 sRGBmap(resultPtr[0]),
750 sRGBmap(expected[0]));
751 err2 = ABS_ERROR(
752 sRGBmap(resultPtr[1]),
753 sRGBmap(expected[1]));
754 err3 = ABS_ERROR(
755 sRGBmap(resultPtr[2]),
756 sRGBmap(expected[2]));
757 err4 =
758 ABS_ERROR(resultPtr[3],
759 expected[3]);
760 }
761 }
762
763 if (!(err1 <= maxErr)
764 || !(err2 <= maxErr)
765 || !(err3 <= maxErr)
766 || !(err4 <= maxErr))
767 {
768 log_error(
769 "FAILED norm_offsets: %g , "
770 "%g , %g:\n",
771 norm_offset_x,
772 norm_offset_y,
773 norm_offset_z);
774
775 float tempOut[4];
776 shouldReturn |=
777 determine_validation_error_offset<
778 float>(
779 imagePtr, imageInfo,
780 imageSampler, resultPtr,
781 expected, error,
782 xOffsetValues[j],
783 yOffsetValues[j],
784 zOffsetValues[j],
785 norm_offset_x,
786 norm_offset_y,
787 norm_offset_z, j,
788 numTries, numClamped,
789 true, lod);
790 log_error("Step by step:\n");
791 FloatPixel temp =
792 sample_image_pixel_float_offset(
793 imagePtr, imageInfo,
794 xOffsetValues[j],
795 yOffsetValues[j],
796 zOffsetValues[j],
797 norm_offset_x,
798 norm_offset_y,
799 norm_offset_z,
800 imageSampler, tempOut,
801 1 /*verbose*/,
802 &hasDenormals, lod);
803 log_error(
804 "\tulps: %2.2f, %2.2f, "
805 "%2.2f, %2.2f (max "
806 "allowed: %2.2f)\n\n",
807 Ulp_Error(resultPtr[0],
808 expected[0]),
809 Ulp_Error(resultPtr[1],
810 expected[1]),
811 Ulp_Error(resultPtr[2],
812 expected[2]),
813 Ulp_Error(resultPtr[3],
814 expected[3]),
815 Ulp_Error(
816 MAKE_HEX_FLOAT(
817 0x1.000002p0f,
818 0x1000002L, -24)
819 + maxErr,
820 MAKE_HEX_FLOAT(
821 0x1.000002p0f,
822 0x1000002L, -24)));
823 }
824 else
825 {
826 log_error(
827 "Test error: we should "
828 "have detected this "
829 "passing above.\n");
830 }
831 } // norm_offset_z
832 } // norm_offset_y
833 } // norm_offset_x
834 if (shouldReturn) return 1;
835 } // if (!found_pixel)
836
837 resultPtr += 4;
838 }
839 }
840 }
841 }
842 /*
843 * FLOAT output type
844 */
845 else if (outputType == kFloat)
846 {
847 // Validate float results
848 float *resultPtr = (float *)(char *)resultValues;
849 float expected[4], error = 0.0f;
850 float maxErr = get_max_relative_error(
851 imageInfo->format, imageSampler, 1 /*3D*/,
852 CL_FILTER_LINEAR == imageSampler->filter_mode);
853
854 for (size_t z = 0, j = 0; z < depth_lod; z++)
855 {
856 for (size_t y = 0; y < height_lod; y++)
857 {
858 for (size_t x = 0; x < width_lod; x++, j++)
859 {
860 // Step 1: go through and see if the results verify
861 // for the pixel For the normalized case on a GPU we
862 // put in offsets to the X, Y and Z to see if we
863 // land on the right pixel. This addresses the
864 // significant inaccuracy in GPU normalization in
865 // OpenCL 1.0.
866 int checkOnlyOnePixel = 0;
867 int found_pixel = 0;
868 float offset = NORM_OFFSET;
869 if (!imageSampler->normalized_coords
870 || imageSampler->filter_mode
871 != CL_FILTER_NEAREST
872 || NORM_OFFSET == 0
873 #if defined(__APPLE__)
874 // Apple requires its CPU implementation to do
875 // correctly rounded address arithmetic in all
876 // modes
877 || gDeviceType != CL_DEVICE_TYPE_GPU
878 #endif
879 )
880 offset = 0.0f; // Loop only once
881
882 for (float norm_offset_x = -offset;
883 norm_offset_x <= offset && !found_pixel;
884 norm_offset_x += NORM_OFFSET)
885 {
886 for (float norm_offset_y = -offset;
887 norm_offset_y <= offset && !found_pixel;
888 norm_offset_y += NORM_OFFSET)
889 {
890 for (float norm_offset_z = -offset;
891 norm_offset_z <= NORM_OFFSET
892 && !found_pixel;
893 norm_offset_z += NORM_OFFSET)
894 {
895
896 int hasDenormals = 0;
897 FloatPixel maxPixel =
898 sample_image_pixel_float_offset(
899 imagePtr, imageInfo,
900 xOffsetValues[j],
901 yOffsetValues[j],
902 zOffsetValues[j], norm_offset_x,
903 norm_offset_y, norm_offset_z,
904 imageSampler, expected, 0,
905 &hasDenormals, lod);
906
907 float err1 = ABS_ERROR(resultPtr[0],
908 expected[0]);
909 float err2 = ABS_ERROR(resultPtr[1],
910 expected[1]);
911 float err3 = ABS_ERROR(resultPtr[2],
912 expected[2]);
913 float err4 = ABS_ERROR(resultPtr[3],
914 expected[3]);
915 // Clamp to the minimum absolute error
916 // for the format
917 if (err1 > 0
918 && err1 < formatAbsoluteError)
919 {
920 err1 = 0.0f;
921 }
922 if (err2 > 0
923 && err2 < formatAbsoluteError)
924 {
925 err2 = 0.0f;
926 }
927 if (err3 > 0
928 && err3 < formatAbsoluteError)
929 {
930 err3 = 0.0f;
931 }
932 if (err4 > 0
933 && err4 < formatAbsoluteError)
934 {
935 err4 = 0.0f;
936 }
937 float maxErr1 = MAX(
938 maxErr * maxPixel.p[0], FLT_MIN);
939 float maxErr2 = MAX(
940 maxErr * maxPixel.p[1], FLT_MIN);
941 float maxErr3 = MAX(
942 maxErr * maxPixel.p[2], FLT_MIN);
943 float maxErr4 = MAX(
944 maxErr * maxPixel.p[3], FLT_MIN);
945
946 if (!(err1 <= maxErr1)
947 || !(err2 <= maxErr2)
948 || !(err3 <= maxErr3)
949 || !(err4 <= maxErr4))
950 {
951 // Try flushing the denormals
952 if (hasDenormals)
953 {
954 // If implementation decide to
955 // flush subnormals to zero, max
956 // error needs to be adjusted
957 maxErr1 += 4 * FLT_MIN;
958 maxErr2 += 4 * FLT_MIN;
959 maxErr3 += 4 * FLT_MIN;
960 maxErr4 += 4 * FLT_MIN;
961
962 maxPixel =
963 sample_image_pixel_float_offset(
964 imagePtr, imageInfo,
965 xOffsetValues[j],
966 yOffsetValues[j],
967 zOffsetValues[j],
968 norm_offset_x,
969 norm_offset_y,
970 norm_offset_z,
971 imageSampler, expected,
972 0, NULL, lod);
973
974 err1 = ABS_ERROR(resultPtr[0],
975 expected[0]);
976 err2 = ABS_ERROR(resultPtr[1],
977 expected[1]);
978 err3 = ABS_ERROR(resultPtr[2],
979 expected[2]);
980 err4 = ABS_ERROR(resultPtr[3],
981 expected[3]);
982 }
983 }
984
985 found_pixel = (err1 <= maxErr1)
986 && (err2 <= maxErr2)
987 && (err3 <= maxErr3)
988 && (err4 <= maxErr4);
989 } // norm_offset_z
990 } // norm_offset_y
991 } // norm_offset_x
992
993 // Step 2: If we did not find a match, then print
994 // out debugging info.
995 if (!found_pixel)
996 {
997 // For the normalized case on a GPU we put in
998 // offsets to the X and Y to see if we land on
999 // the right pixel. This addresses the
1000 // significant inaccuracy in GPU normalization
1001 // in OpenCL 1.0.
1002 checkOnlyOnePixel = 0;
1003 int shouldReturn = 0;
1004 for (float norm_offset_x = -offset;
1005 norm_offset_x <= offset
1006 && !checkOnlyOnePixel;
1007 norm_offset_x += NORM_OFFSET)
1008 {
1009 for (float norm_offset_y = -offset;
1010 norm_offset_y <= offset
1011 && !checkOnlyOnePixel;
1012 norm_offset_y += NORM_OFFSET)
1013 {
1014 for (float norm_offset_z = -offset;
1015 norm_offset_z <= offset
1016 && !checkOnlyOnePixel;
1017 norm_offset_z += NORM_OFFSET)
1018 {
1019
1020 int hasDenormals = 0;
1021 FloatPixel maxPixel =
1022 sample_image_pixel_float_offset(
1023 imagePtr, imageInfo,
1024 xOffsetValues[j],
1025 yOffsetValues[j],
1026 zOffsetValues[j],
1027 norm_offset_x,
1028 norm_offset_y,
1029 norm_offset_z, imageSampler,
1030 expected, 0, &hasDenormals,
1031 lod);
1032
1033 float err1 = ABS_ERROR(resultPtr[0],
1034 expected[0]);
1035 float err2 = ABS_ERROR(resultPtr[1],
1036 expected[1]);
1037 float err3 = ABS_ERROR(resultPtr[2],
1038 expected[2]);
1039 float err4 = ABS_ERROR(resultPtr[3],
1040 expected[3]);
1041 float maxErr1 =
1042 MAX(maxErr * maxPixel.p[0],
1043 FLT_MIN);
1044 float maxErr2 =
1045 MAX(maxErr * maxPixel.p[1],
1046 FLT_MIN);
1047 float maxErr3 =
1048 MAX(maxErr * maxPixel.p[2],
1049 FLT_MIN);
1050 float maxErr4 =
1051 MAX(maxErr * maxPixel.p[3],
1052 FLT_MIN);
1053
1054
1055 if (!(err1 <= maxErr1)
1056 || !(err2 <= maxErr2)
1057 || !(err3 <= maxErr3)
1058 || !(err4 <= maxErr4))
1059 {
1060 // Try flushing the denormals
1061 if (hasDenormals)
1062 {
1063 maxErr1 += 4 * FLT_MIN;
1064 maxErr2 += 4 * FLT_MIN;
1065 maxErr3 += 4 * FLT_MIN;
1066 maxErr4 += 4 * FLT_MIN;
1067
1068 maxPixel =
1069 sample_image_pixel_float(
1070 imagePtr, imageInfo,
1071 xOffsetValues[j],
1072 yOffsetValues[j],
1073 zOffsetValues[j],
1074 imageSampler,
1075 expected, 0, NULL,
1076 lod);
1077
1078 err1 =
1079 ABS_ERROR(resultPtr[0],
1080 expected[0]);
1081 err2 =
1082 ABS_ERROR(resultPtr[1],
1083 expected[1]);
1084 err3 =
1085 ABS_ERROR(resultPtr[2],
1086 expected[2]);
1087 err4 =
1088 ABS_ERROR(resultPtr[3],
1089 expected[3]);
1090 }
1091 }
1092
1093 if (!(err1 <= maxErr1)
1094 || !(err2 <= maxErr2)
1095 || !(err3 <= maxErr3)
1096 || !(err4 <= maxErr4))
1097 {
1098 log_error(
1099 "FAILED norm_offsets: %g , "
1100 "%g , %g:\n",
1101 norm_offset_x,
1102 norm_offset_y,
1103 norm_offset_z);
1104
1105 float tempOut[4];
1106 shouldReturn |=
1107 determine_validation_error_offset<
1108 float>(
1109 imagePtr, imageInfo,
1110 imageSampler, resultPtr,
1111 expected, error,
1112 xOffsetValues[j],
1113 yOffsetValues[j],
1114 zOffsetValues[j],
1115 norm_offset_x,
1116 norm_offset_y,
1117 norm_offset_z, j,
1118 numTries, numClamped,
1119 true, lod);
1120 log_error("Step by step:\n");
1121 FloatPixel temp =
1122 sample_image_pixel_float_offset(
1123 imagePtr, imageInfo,
1124 xOffsetValues[j],
1125 yOffsetValues[j],
1126 zOffsetValues[j],
1127 norm_offset_x,
1128 norm_offset_y,
1129 norm_offset_z,
1130 imageSampler, tempOut,
1131 1 /*verbose*/,
1132 &hasDenormals, lod);
1133 log_error(
1134 "\tulps: %2.2f, %2.2f, "
1135 "%2.2f, %2.2f (max "
1136 "allowed: %2.2f)\n\n",
1137 Ulp_Error(resultPtr[0],
1138 expected[0]),
1139 Ulp_Error(resultPtr[1],
1140 expected[1]),
1141 Ulp_Error(resultPtr[2],
1142 expected[2]),
1143 Ulp_Error(resultPtr[3],
1144 expected[3]),
1145 Ulp_Error(
1146 MAKE_HEX_FLOAT(
1147 0x1.000002p0f,
1148 0x1000002L, -24)
1149 + maxErr,
1150 MAKE_HEX_FLOAT(
1151 0x1.000002p0f,
1152 0x1000002L, -24)));
1153 }
1154 else
1155 {
1156 log_error(
1157 "Test error: we should "
1158 "have detected this "
1159 "passing above.\n");
1160 }
1161 } // norm_offset_z
1162 } // norm_offset_y
1163 } // norm_offset_x
1164 if (shouldReturn) return 1;
1165 } // if (!found_pixel)
1166
1167 resultPtr += 4;
1168 }
1169 }
1170 }
1171 }
1172 /*
1173 * UINT output type
1174 */
1175 else if (outputType == kUInt)
1176 {
1177 // Validate unsigned integer results
1178 unsigned int *resultPtr = (unsigned int *)(char *)resultValues;
1179 unsigned int expected[4];
1180 float error;
1181 for (size_t z = 0, j = 0; z < depth_lod; z++)
1182 {
1183 for (size_t y = 0; y < height_lod; y++)
1184 {
1185 for (size_t x = 0; x < width_lod; x++, j++)
1186 {
1187 // Step 1: go through and see if the results verify
1188 // for the pixel For the normalized case on a GPU we
1189 // put in offsets to the X, Y and Z to see if we
1190 // land on the right pixel. This addresses the
1191 // significant inaccuracy in GPU normalization in
1192 // OpenCL 1.0.
1193 int checkOnlyOnePixel = 0;
1194 int found_pixel = 0;
1195 for (float norm_offset_x = -NORM_OFFSET;
1196 norm_offset_x <= NORM_OFFSET && !found_pixel
1197 && !checkOnlyOnePixel;
1198 norm_offset_x += NORM_OFFSET)
1199 {
1200 for (float norm_offset_y = -NORM_OFFSET;
1201 norm_offset_y <= NORM_OFFSET
1202 && !found_pixel && !checkOnlyOnePixel;
1203 norm_offset_y += NORM_OFFSET)
1204 {
1205 for (float norm_offset_z = -NORM_OFFSET;
1206 norm_offset_z <= NORM_OFFSET
1207 && !found_pixel && !checkOnlyOnePixel;
1208 norm_offset_z += NORM_OFFSET)
1209 {
1210
1211 // If we are not on a GPU, or we are not
1212 // normalized, then only test with
1213 // offsets (0.0, 0.0) E.g., test one
1214 // pixel.
1215 if (!imageSampler->normalized_coords
1216 || gDeviceType != CL_DEVICE_TYPE_GPU
1217 || NORM_OFFSET == 0)
1218 {
1219 norm_offset_x = 0.0f;
1220 norm_offset_y = 0.0f;
1221 norm_offset_z = 0.0f;
1222 checkOnlyOnePixel = 1;
1223 }
1224
1225 sample_image_pixel_offset<unsigned int>(
1226 imagePtr, imageInfo,
1227 xOffsetValues[j], yOffsetValues[j],
1228 zOffsetValues[j], norm_offset_x,
1229 norm_offset_y, norm_offset_z,
1230 imageSampler, expected, lod);
1231
1232 error = errMax(
1233 errMax(abs_diff_uint(expected[0],
1234 resultPtr[0]),
1235 abs_diff_uint(expected[1],
1236 resultPtr[1])),
1237 errMax(
1238 abs_diff_uint(expected[2],
1239 resultPtr[2]),
1240 abs_diff_uint(expected[3],
1241 resultPtr[3])));
1242
1243 if (error < MAX_ERR) found_pixel = 1;
1244 } // norm_offset_z
1245 } // norm_offset_y
1246 } // norm_offset_x
1247
1248 // Step 2: If we did not find a match, then print
1249 // out debugging info.
1250 if (!found_pixel)
1251 {
1252 // For the normalized case on a GPU we put in
1253 // offsets to the X and Y to see if we land on
1254 // the right pixel. This addresses the
1255 // significant inaccuracy in GPU normalization
1256 // in OpenCL 1.0.
1257 checkOnlyOnePixel = 0;
1258 int shouldReturn = 0;
1259 for (float norm_offset_x = -NORM_OFFSET;
1260 norm_offset_x <= NORM_OFFSET
1261 && !checkOnlyOnePixel;
1262 norm_offset_x += NORM_OFFSET)
1263 {
1264 for (float norm_offset_y = -NORM_OFFSET;
1265 norm_offset_y <= NORM_OFFSET
1266 && !checkOnlyOnePixel;
1267 norm_offset_y += NORM_OFFSET)
1268 {
1269 for (float norm_offset_z = -NORM_OFFSET;
1270 norm_offset_z <= NORM_OFFSET
1271 && !checkOnlyOnePixel;
1272 norm_offset_z += NORM_OFFSET)
1273 {
1274
1275 // If we are not on a GPU, or we are
1276 // not normalized, then only test
1277 // with offsets (0.0, 0.0) E.g.,
1278 // test one pixel.
1279 if (!imageSampler->normalized_coords
1280 || gDeviceType
1281 != CL_DEVICE_TYPE_GPU
1282 || NORM_OFFSET == 0)
1283 {
1284 norm_offset_x = 0.0f;
1285 norm_offset_y = 0.0f;
1286 norm_offset_z = 0.0f;
1287 checkOnlyOnePixel = 1;
1288 }
1289
1290 sample_image_pixel_offset<
1291 unsigned int>(
1292 imagePtr, imageInfo,
1293 xOffsetValues[j],
1294 yOffsetValues[j],
1295 zOffsetValues[j], norm_offset_x,
1296 norm_offset_y, norm_offset_z,
1297 imageSampler, expected, lod);
1298
1299 error = errMax(
1300 errMax(
1301 abs_diff_uint(expected[0],
1302 resultPtr[0]),
1303 abs_diff_uint(
1304 expected[1],
1305 resultPtr[1])),
1306 errMax(
1307 abs_diff_uint(expected[2],
1308 resultPtr[2]),
1309 abs_diff_uint(
1310 expected[3],
1311 resultPtr[3])));
1312
1313 if (error > MAX_ERR)
1314 {
1315 log_error(
1316 "FAILED norm_offsets: %g , "
1317 "%g , %g:\n",
1318 norm_offset_x,
1319 norm_offset_y,
1320 norm_offset_z);
1321 shouldReturn |=
1322 determine_validation_error_offset<
1323 unsigned int>(
1324 imagePtr, imageInfo,
1325 imageSampler, resultPtr,
1326 expected, error,
1327 xOffsetValues[j],
1328 yOffsetValues[j],
1329 zOffsetValues[j],
1330 norm_offset_x,
1331 norm_offset_y,
1332 norm_offset_z, j,
1333 numTries, numClamped,
1334 false, lod);
1335 }
1336 else
1337 {
1338 log_error(
1339 "Test error: we should "
1340 "have detected this "
1341 "passing above.\n");
1342 }
1343 } // norm_offset_z
1344 } // norm_offset_y
1345 } // norm_offset_x
1346 if (shouldReturn) return 1;
1347 } // if (!found_pixel)
1348
1349 resultPtr += 4;
1350 }
1351 }
1352 }
1353 }
1354 else
1355 /*
1356 * INT output type
1357 */
1358 {
1359 // Validate integer results
1360 int *resultPtr = (int *)(char *)resultValues;
1361 int expected[4];
1362 float error;
1363 for (size_t z = 0, j = 0; z < depth_lod; z++)
1364 {
1365 for (size_t y = 0; y < height_lod; y++)
1366 {
1367 for (size_t x = 0; x < width_lod; x++, j++)
1368 {
1369 // Step 1: go through and see if the results verify
1370 // for the pixel For the normalized case on a GPU we
1371 // put in offsets to the X, Y and Z to see if we
1372 // land on the right pixel. This addresses the
1373 // significant inaccuracy in GPU normalization in
1374 // OpenCL 1.0.
1375 int checkOnlyOnePixel = 0;
1376 int found_pixel = 0;
1377 for (float norm_offset_x = -NORM_OFFSET;
1378 norm_offset_x <= NORM_OFFSET && !found_pixel
1379 && !checkOnlyOnePixel;
1380 norm_offset_x += NORM_OFFSET)
1381 {
1382 for (float norm_offset_y = -NORM_OFFSET;
1383 norm_offset_y <= NORM_OFFSET
1384 && !found_pixel && !checkOnlyOnePixel;
1385 norm_offset_y += NORM_OFFSET)
1386 {
1387 for (float norm_offset_z = -NORM_OFFSET;
1388 norm_offset_z <= NORM_OFFSET
1389 && !found_pixel && !checkOnlyOnePixel;
1390 norm_offset_z += NORM_OFFSET)
1391 {
1392
1393 // If we are not on a GPU, or we are not
1394 // normalized, then only test with
1395 // offsets (0.0, 0.0) E.g., test one
1396 // pixel.
1397 if (!imageSampler->normalized_coords
1398 || gDeviceType != CL_DEVICE_TYPE_GPU
1399 || NORM_OFFSET == 0)
1400 {
1401 norm_offset_x = 0.0f;
1402 norm_offset_y = 0.0f;
1403 norm_offset_z = 0.0f;
1404 checkOnlyOnePixel = 1;
1405 }
1406
1407 sample_image_pixel_offset<int>(
1408 imagePtr, imageInfo,
1409 xOffsetValues[j], yOffsetValues[j],
1410 zOffsetValues[j], norm_offset_x,
1411 norm_offset_y, norm_offset_z,
1412 imageSampler, expected, lod);
1413
1414 error = errMax(
1415 errMax(abs_diff_int(expected[0],
1416 resultPtr[0]),
1417 abs_diff_int(expected[1],
1418 resultPtr[1])),
1419 errMax(abs_diff_int(expected[2],
1420 resultPtr[2]),
1421 abs_diff_int(expected[3],
1422 resultPtr[3])));
1423
1424 if (error < MAX_ERR) found_pixel = 1;
1425 } // norm_offset_z
1426 } // norm_offset_y
1427 } // norm_offset_x
1428
1429 // Step 2: If we did not find a match, then print
1430 // out debugging info.
1431 if (!found_pixel)
1432 {
1433 // For the normalized case on a GPU we put in
1434 // offsets to the X and Y to see if we land on
1435 // the right pixel. This addresses the
1436 // significant inaccuracy in GPU normalization
1437 // in OpenCL 1.0.
1438 checkOnlyOnePixel = 0;
1439 int shouldReturn = 0;
1440 for (float norm_offset_x = -NORM_OFFSET;
1441 norm_offset_x <= NORM_OFFSET
1442 && !checkOnlyOnePixel;
1443 norm_offset_x += NORM_OFFSET)
1444 {
1445 for (float norm_offset_y = -NORM_OFFSET;
1446 norm_offset_y <= NORM_OFFSET
1447 && !checkOnlyOnePixel;
1448 norm_offset_y += NORM_OFFSET)
1449 {
1450 for (float norm_offset_z = -NORM_OFFSET;
1451 norm_offset_z <= NORM_OFFSET
1452 && !checkOnlyOnePixel;
1453 norm_offset_z += NORM_OFFSET)
1454 {
1455
1456 // If we are not on a GPU, or we are
1457 // not normalized, then only test
1458 // with offsets (0.0, 0.0) E.g.,
1459 // test one pixel.
1460 if (!imageSampler->normalized_coords
1461 || gDeviceType
1462 != CL_DEVICE_TYPE_GPU
1463 || NORM_OFFSET == 0
1464 || NORM_OFFSET == 0
1465 || NORM_OFFSET == 0)
1466 {
1467 norm_offset_x = 0.0f;
1468 norm_offset_y = 0.0f;
1469 norm_offset_z = 0.0f;
1470 checkOnlyOnePixel = 1;
1471 }
1472
1473 sample_image_pixel_offset<int>(
1474 imagePtr, imageInfo,
1475 xOffsetValues[j],
1476 yOffsetValues[j],
1477 zOffsetValues[j], norm_offset_x,
1478 norm_offset_y, norm_offset_z,
1479 imageSampler, expected, lod);
1480
1481 error = errMax(
1482 errMax(
1483 abs_diff_int(expected[0],
1484 resultPtr[0]),
1485 abs_diff_int(expected[1],
1486 resultPtr[1])),
1487 errMax(
1488 abs_diff_int(expected[2],
1489 resultPtr[2]),
1490 abs_diff_int(
1491 expected[3],
1492 resultPtr[3])));
1493
1494 if (error > MAX_ERR)
1495 {
1496 log_error(
1497 "FAILED norm_offsets: %g , "
1498 "%g , %g:\n",
1499 norm_offset_x,
1500 norm_offset_y,
1501 norm_offset_z);
1502 shouldReturn |=
1503 determine_validation_error_offset<
1504 int>(
1505 imagePtr, imageInfo,
1506 imageSampler, resultPtr,
1507 expected, error,
1508 xOffsetValues[j],
1509 yOffsetValues[j],
1510 zOffsetValues[j],
1511 norm_offset_x,
1512 norm_offset_y,
1513 norm_offset_z, j,
1514 numTries, numClamped,
1515 false, lod);
1516 }
1517 else
1518 {
1519 log_error(
1520 "Test error: we should "
1521 "have detected this "
1522 "passing above.\n");
1523 }
1524 } // norm_offset_z
1525 } // norm_offset_y
1526 } // norm_offset_x
1527 if (shouldReturn) return 1;
1528 } // if (!found_pixel)
1529
1530 resultPtr += 4;
1531 }
1532 }
1533 }
1534 }
1535 }
1536 {
1537 nextLevelOffset += width_lod * height_lod * depth_lod
1538 * get_pixel_size(imageInfo->format);
1539 width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
1540 height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
1541 depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1;
1542 }
1543 }
1544
1545 return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
1546 }