1 //
2 // Copyright (c) 2017,2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "imageHelpers.h"
17 #include <limits.h>
18 #include <assert.h>
19 #if defined(__APPLE__)
20 #include <sys/mman.h>
21 #endif
22 #if !defined(_WIN32) && !defined(__APPLE__)
23 #include <malloc.h>
24 #endif
25 #include <algorithm>
26 #include <cinttypes>
27 #include <iterator>
28 #if !defined(_WIN32)
29 #include <cmath>
30 #endif
31
32 RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;
33
34 cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
35 bool gTestRounding = false;
sRGBmap(float fc)36 double sRGBmap(float fc)
37 {
38 double c = (double)fc;
39
40 #if !defined(_WIN32)
41 if (std::isnan(c)) c = 0.0;
42 #else
43 if (_isnan(c)) c = 0.0;
44 #endif
45
46 if (c > 1.0)
47 c = 1.0;
48 else if (c < 0.0)
49 c = 0.0;
50 else if (c < 0.0031308)
51 c = 12.92 * c;
52 else
53 c = (1055.0 / 1000.0) * pow(c, 5.0 / 12.0) - (55.0 / 1000.0);
54
55 return c * 255.0;
56 }
57
sRGBunmap(float fc)58 double sRGBunmap(float fc)
59 {
60 double c = (double)fc;
61 double result;
62
63 if (c <= 0.04045)
64 result = c / 12.92;
65 else
66 result = pow((c + 0.055) / 1.055, 2.4);
67
68 return result;
69 }
70
71
get_format_type_size(const cl_image_format * format)72 uint32_t get_format_type_size(const cl_image_format *format)
73 {
74 return get_channel_data_type_size(format->image_channel_data_type);
75 }
76
get_channel_data_type_size(cl_channel_type channelType)77 uint32_t get_channel_data_type_size(cl_channel_type channelType)
78 {
79 switch (channelType)
80 {
81 case CL_SNORM_INT8:
82 case CL_UNORM_INT8:
83 case CL_SIGNED_INT8:
84 case CL_UNSIGNED_INT8: return 1;
85
86 case CL_SNORM_INT16:
87 case CL_UNORM_INT16:
88 case CL_SIGNED_INT16:
89 case CL_UNSIGNED_INT16:
90 case CL_HALF_FLOAT:
91 #ifdef CL_SFIXED14_APPLE
92 case CL_SFIXED14_APPLE:
93 #endif
94 return sizeof(cl_short);
95
96 case CL_SIGNED_INT32:
97 case CL_UNSIGNED_INT32: return sizeof(cl_int);
98
99 case CL_UNORM_SHORT_565:
100 case CL_UNORM_SHORT_555:
101 #ifdef OBSOLETE_FORAMT
102 case CL_UNORM_SHORT_565_REV:
103 case CL_UNORM_SHORT_555_REV:
104 #endif
105 return 2;
106
107 #ifdef OBSOLETE_FORAMT
108 case CL_UNORM_INT_8888:
109 case CL_UNORM_INT_8888_REV: return 4;
110 #endif
111
112 case CL_UNORM_INT_101010:
113 #ifdef OBSOLETE_FORAMT
114 case CL_UNORM_INT_101010_REV:
115 #endif
116 return 4;
117
118 case CL_FLOAT: return sizeof(cl_float);
119
120 default: return 0;
121 }
122 }
123
get_format_channel_count(const cl_image_format * format)124 uint32_t get_format_channel_count(const cl_image_format *format)
125 {
126 return get_channel_order_channel_count(format->image_channel_order);
127 }
128
get_channel_order_channel_count(cl_channel_order order)129 uint32_t get_channel_order_channel_count(cl_channel_order order)
130 {
131 switch (order)
132 {
133 case CL_R:
134 case CL_A:
135 case CL_Rx:
136 case CL_INTENSITY:
137 case CL_LUMINANCE:
138 case CL_DEPTH:
139 case CL_DEPTH_STENCIL: return 1;
140
141 case CL_RG:
142 case CL_RA:
143 case CL_RGx: return 2;
144
145 case CL_RGB:
146 case CL_RGBx:
147 case CL_sRGB:
148 case CL_sRGBx: return 3;
149
150 case CL_RGBA:
151 case CL_ARGB:
152 case CL_BGRA:
153 case CL_sRGBA:
154 case CL_sBGRA:
155 case CL_ABGR:
156 #ifdef CL_1RGB_APPLE
157 case CL_1RGB_APPLE:
158 #endif
159 #ifdef CL_BGR1_APPLE
160 case CL_BGR1_APPLE:
161 #endif
162 #ifdef CL_ABGR_APPLE
163 case CL_ABGR_APPLE:
164 #endif
165 return 4;
166
167 default:
168 log_error("%s does not support 0x%x\n", __FUNCTION__, order);
169 return 0;
170 }
171 }
172
get_channel_type_from_name(const char * name)173 cl_channel_type get_channel_type_from_name(const char *name)
174 {
175 struct
176 {
177 cl_channel_type type;
178 const char *name;
179 } typeNames[] = { { CL_SNORM_INT8, "CL_SNORM_INT8" },
180 { CL_SNORM_INT16, "CL_SNORM_INT16" },
181 { CL_UNORM_INT8, "CL_UNORM_INT8" },
182 { CL_UNORM_INT16, "CL_UNORM_INT16" },
183 { CL_UNORM_INT24, "CL_UNORM_INT24" },
184 { CL_UNORM_SHORT_565, "CL_UNORM_SHORT_565" },
185 { CL_UNORM_SHORT_555, "CL_UNORM_SHORT_555" },
186 { CL_UNORM_INT_101010, "CL_UNORM_INT_101010" },
187 { CL_SIGNED_INT8, "CL_SIGNED_INT8" },
188 { CL_SIGNED_INT16, "CL_SIGNED_INT16" },
189 { CL_SIGNED_INT32, "CL_SIGNED_INT32" },
190 { CL_UNSIGNED_INT8, "CL_UNSIGNED_INT8" },
191 { CL_UNSIGNED_INT16, "CL_UNSIGNED_INT16" },
192 { CL_UNSIGNED_INT32, "CL_UNSIGNED_INT32" },
193 { CL_HALF_FLOAT, "CL_HALF_FLOAT" },
194 { CL_FLOAT, "CL_FLOAT" },
195 #ifdef CL_SFIXED14_APPLE
196 { CL_SFIXED14_APPLE, "CL_SFIXED14_APPLE" }
197 #endif
198 };
199 for (size_t i = 0; i < sizeof(typeNames) / sizeof(typeNames[0]); i++)
200 {
201 if (strcmp(typeNames[i].name, name) == 0
202 || strcmp(typeNames[i].name + 3, name) == 0)
203 return typeNames[i].type;
204 }
205 return (cl_channel_type)-1;
206 }
207
get_channel_order_from_name(const char * name)208 cl_channel_order get_channel_order_from_name(const char *name)
209 {
210 const struct
211 {
212 cl_channel_order order;
213 const char *name;
214 } orderNames[] = {
215 { CL_R, "CL_R" },
216 { CL_A, "CL_A" },
217 { CL_Rx, "CL_Rx" },
218 { CL_RG, "CL_RG" },
219 { CL_RA, "CL_RA" },
220 { CL_RGx, "CL_RGx" },
221 { CL_RGB, "CL_RGB" },
222 { CL_RGBx, "CL_RGBx" },
223 { CL_RGBA, "CL_RGBA" },
224 { CL_BGRA, "CL_BGRA" },
225 { CL_ARGB, "CL_ARGB" },
226 { CL_INTENSITY, "CL_INTENSITY" },
227 { CL_LUMINANCE, "CL_LUMINANCE" },
228 { CL_DEPTH, "CL_DEPTH" },
229 { CL_DEPTH_STENCIL, "CL_DEPTH_STENCIL" },
230 { CL_sRGB, "CL_sRGB" },
231 { CL_sRGBx, "CL_sRGBx" },
232 { CL_sRGBA, "CL_sRGBA" },
233 { CL_sBGRA, "CL_sBGRA" },
234 { CL_ABGR, "CL_ABGR" },
235 #ifdef CL_1RGB_APPLE
236 { CL_1RGB_APPLE, "CL_1RGB_APPLE" },
237 #endif
238 #ifdef CL_BGR1_APPLE
239 { CL_BGR1_APPLE, "CL_BGR1_APPLE" },
240 #endif
241 };
242
243 for (size_t i = 0; i < sizeof(orderNames) / sizeof(orderNames[0]); i++)
244 {
245 if (strcmp(orderNames[i].name, name) == 0
246 || strcmp(orderNames[i].name + 3, name) == 0)
247 return orderNames[i].order;
248 }
249 return (cl_channel_order)-1;
250 }
251
252
is_format_signed(const cl_image_format * format)253 int is_format_signed(const cl_image_format *format)
254 {
255 switch (format->image_channel_data_type)
256 {
257 case CL_SNORM_INT8:
258 case CL_SIGNED_INT8:
259 case CL_SNORM_INT16:
260 case CL_SIGNED_INT16:
261 case CL_SIGNED_INT32:
262 case CL_HALF_FLOAT:
263 case CL_FLOAT:
264 #ifdef CL_SFIXED14_APPLE
265 case CL_SFIXED14_APPLE:
266 #endif
267 return 1;
268
269 default: return 0;
270 }
271 }
272
get_pixel_size(const cl_image_format * format)273 uint32_t get_pixel_size(const cl_image_format *format)
274 {
275 switch (format->image_channel_data_type)
276 {
277 case CL_SNORM_INT8:
278 case CL_UNORM_INT8:
279 case CL_SIGNED_INT8:
280 case CL_UNSIGNED_INT8: return get_format_channel_count(format);
281
282 case CL_SNORM_INT16:
283 case CL_UNORM_INT16:
284 case CL_SIGNED_INT16:
285 case CL_UNSIGNED_INT16:
286 case CL_HALF_FLOAT:
287 #ifdef CL_SFIXED14_APPLE
288 case CL_SFIXED14_APPLE:
289 #endif
290 return get_format_channel_count(format) * sizeof(cl_ushort);
291
292 case CL_SIGNED_INT32:
293 case CL_UNSIGNED_INT32:
294 return get_format_channel_count(format) * sizeof(cl_int);
295
296 case CL_UNORM_SHORT_565:
297 case CL_UNORM_SHORT_555:
298 #ifdef OBSOLETE_FORAMT
299 case CL_UNORM_SHORT_565_REV:
300 case CL_UNORM_SHORT_555_REV:
301 #endif
302 return 2;
303
304 #ifdef OBSOLETE_FORAMT
305 case CL_UNORM_INT_8888:
306 case CL_UNORM_INT_8888_REV: return 4;
307 #endif
308
309 case CL_UNORM_INT_101010:
310 #ifdef OBSOLETE_FORAMT
311 case CL_UNORM_INT_101010_REV:
312 #endif
313 return 4;
314
315 case CL_FLOAT:
316 return get_format_channel_count(format) * sizeof(cl_float);
317
318 default: return 0;
319 }
320 }
321
next_power_of_two(uint32_t v)322 uint32_t next_power_of_two(uint32_t v)
323 {
324 v--;
325 v |= v >> 1;
326 v |= v >> 2;
327 v |= v >> 4;
328 v |= v >> 8;
329 v |= v >> 16;
330 v++;
331 return v;
332 }
333
get_pixel_alignment(const cl_image_format * format)334 uint32_t get_pixel_alignment(const cl_image_format *format)
335 {
336 return next_power_of_two(get_pixel_size(format));
337 }
338
get_8_bit_image_format(cl_context context,cl_mem_object_type objType,cl_mem_flags flags,size_t channelCount,cl_image_format * outFormat)339 int get_8_bit_image_format(cl_context context, cl_mem_object_type objType,
340 cl_mem_flags flags, size_t channelCount,
341 cl_image_format *outFormat)
342 {
343 cl_image_format formatList[128];
344 unsigned int outFormatCount, i;
345 int error;
346
347
348 /* Make sure each image format is supported */
349 if ((error = clGetSupportedImageFormats(context, flags, objType, 128,
350 formatList, &outFormatCount)))
351 return error;
352
353
354 /* Look for one that is an 8-bit format */
355 for (i = 0; i < outFormatCount; i++)
356 {
357 if (formatList[i].image_channel_data_type == CL_SNORM_INT8
358 || formatList[i].image_channel_data_type == CL_UNORM_INT8
359 || formatList[i].image_channel_data_type == CL_SIGNED_INT8
360 || formatList[i].image_channel_data_type == CL_UNSIGNED_INT8)
361 {
362 if (!channelCount
363 || (channelCount
364 && (get_format_channel_count(&formatList[i])
365 == channelCount)))
366 {
367 *outFormat = formatList[i];
368 return 0;
369 }
370 }
371 }
372
373 return -1;
374 }
375
get_32_bit_image_format(cl_context context,cl_mem_object_type objType,cl_mem_flags flags,size_t channelCount,cl_image_format * outFormat)376 int get_32_bit_image_format(cl_context context, cl_mem_object_type objType,
377 cl_mem_flags flags, size_t channelCount,
378 cl_image_format *outFormat)
379 {
380 cl_image_format formatList[128];
381 unsigned int outFormatCount, i;
382 int error;
383
384
385 /* Make sure each image format is supported */
386 if ((error = clGetSupportedImageFormats(context, flags, objType, 128,
387 formatList, &outFormatCount)))
388 return error;
389
390 /* Look for one that is an 8-bit format */
391 for (i = 0; i < outFormatCount; i++)
392 {
393 if (formatList[i].image_channel_data_type == CL_UNORM_INT_101010
394 || formatList[i].image_channel_data_type == CL_FLOAT
395 || formatList[i].image_channel_data_type == CL_SIGNED_INT32
396 || formatList[i].image_channel_data_type == CL_UNSIGNED_INT32)
397 {
398 if (!channelCount
399 || (channelCount
400 && (get_format_channel_count(&formatList[i])
401 == channelCount)))
402 {
403 *outFormat = formatList[i];
404 return 0;
405 }
406 }
407 }
408
409 return -1;
410 }
411
print_first_pixel_difference_error(size_t where,const char * sourcePixel,const char * destPixel,image_descriptor * imageInfo,size_t y,size_t thirdDim)412 void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
413 const char *destPixel,
414 image_descriptor *imageInfo, size_t y,
415 size_t thirdDim)
416 {
417 size_t pixel_size = get_pixel_size(imageInfo->format);
418
419 log_error("ERROR: Scanline %d did not verify for image size %d,%d,%d "
420 "pitch %d (extra %d bytes)\n",
421 (int)y, (int)imageInfo->width, (int)imageInfo->height,
422 (int)thirdDim, (int)imageInfo->rowPitch,
423 (int)imageInfo->rowPitch
424 - (int)imageInfo->width * (int)pixel_size);
425 log_error("Failed at column: %zu ", where);
426
427 switch (pixel_size)
428 {
429 case 1:
430 log_error("*0x%2.2x vs. 0x%2.2x\n", ((cl_uchar *)sourcePixel)[0],
431 ((cl_uchar *)destPixel)[0]);
432 break;
433 case 2:
434 log_error("*0x%4.4x vs. 0x%4.4x\n", ((cl_ushort *)sourcePixel)[0],
435 ((cl_ushort *)destPixel)[0]);
436 break;
437 case 3:
438 log_error("*{0x%2.2x, 0x%2.2x, 0x%2.2x} vs. "
439 "{0x%2.2x, 0x%2.2x, 0x%2.2x}\n",
440 ((cl_uchar *)sourcePixel)[0],
441 ((cl_uchar *)sourcePixel)[1],
442 ((cl_uchar *)sourcePixel)[2], ((cl_uchar *)destPixel)[0],
443 ((cl_uchar *)destPixel)[1], ((cl_uchar *)destPixel)[2]);
444 break;
445 case 4:
446 log_error("*0x%8.8x vs. 0x%8.8x\n", ((cl_uint *)sourcePixel)[0],
447 ((cl_uint *)destPixel)[0]);
448 break;
449 case 6:
450 log_error(
451 "*{0x%4.4x, 0x%4.4x, 0x%4.4x} vs. "
452 "{0x%4.4x, 0x%4.4x, 0x%4.4x}\n",
453 ((cl_ushort *)sourcePixel)[0], ((cl_ushort *)sourcePixel)[1],
454 ((cl_ushort *)sourcePixel)[2], ((cl_ushort *)destPixel)[0],
455 ((cl_ushort *)destPixel)[1], ((cl_ushort *)destPixel)[2]);
456 break;
457 case 8:
458 log_error("*0x%16.16" PRIx64 " vs. 0x%16.16" PRIx64 "\n",
459 ((cl_ulong *)sourcePixel)[0], ((cl_ulong *)destPixel)[0]);
460 break;
461 case 12:
462 log_error("*{0x%8.8x, 0x%8.8x, 0x%8.8x} vs. "
463 "{0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
464 ((cl_uint *)sourcePixel)[0], ((cl_uint *)sourcePixel)[1],
465 ((cl_uint *)sourcePixel)[2], ((cl_uint *)destPixel)[0],
466 ((cl_uint *)destPixel)[1], ((cl_uint *)destPixel)[2]);
467 break;
468 case 16:
469 log_error("*{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x} vs. "
470 "{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
471 ((cl_uint *)sourcePixel)[0], ((cl_uint *)sourcePixel)[1],
472 ((cl_uint *)sourcePixel)[2], ((cl_uint *)sourcePixel)[3],
473 ((cl_uint *)destPixel)[0], ((cl_uint *)destPixel)[1],
474 ((cl_uint *)destPixel)[2], ((cl_uint *)destPixel)[3]);
475 break;
476 default:
477 log_error("Don't know how to print pixel size of %zu\n",
478 pixel_size);
479 break;
480 }
481 }
482
compare_scanlines(const image_descriptor * imageInfo,const char * aPtr,const char * bPtr)483 size_t compare_scanlines(const image_descriptor *imageInfo, const char *aPtr,
484 const char *bPtr)
485 {
486 size_t pixel_size = get_pixel_size(imageInfo->format);
487 size_t column;
488
489 for (column = 0; column < imageInfo->width; column++)
490 {
491 switch (imageInfo->format->image_channel_data_type)
492 {
493 // If the data type is 101010, then ignore bits 31 and 32 when
494 // comparing the row
495 case CL_UNORM_INT_101010: {
496 cl_uint aPixel = *(cl_uint *)aPtr;
497 cl_uint bPixel = *(cl_uint *)bPtr;
498 if ((aPixel & 0x3fffffff) != (bPixel & 0x3fffffff))
499 return column;
500 }
501 break;
502
503 // If the data type is 555, ignore bit 15 when comparing the row
504 case CL_UNORM_SHORT_555: {
505 cl_ushort aPixel = *(cl_ushort *)aPtr;
506 cl_ushort bPixel = *(cl_ushort *)bPtr;
507 if ((aPixel & 0x7fff) != (bPixel & 0x7fff)) return column;
508 }
509 break;
510
511 default:
512 if (memcmp(aPtr, bPtr, pixel_size) != 0) return column;
513 break;
514 }
515
516 aPtr += pixel_size;
517 bPtr += pixel_size;
518 }
519
520 // If we didn't find a difference, return the width of the image
521 return column;
522 }
523
random_log_in_range(int minV,int maxV,MTdata d)524 int random_log_in_range(int minV, int maxV, MTdata d)
525 {
526 double v = log2(((double)genrand_int32(d) / (double)0xffffffff) + 1);
527 int iv = (int)((float)(maxV - minV) * v);
528 return iv + minV;
529 }
530
531
532 // Define the addressing functions
533 typedef int (*AddressFn)(int value, size_t maxValue);
534
NoAddressFn(int value,size_t maxValue)535 int NoAddressFn(int value, size_t maxValue) { return value; }
RepeatAddressFn(int value,size_t maxValue)536 int RepeatAddressFn(int value, size_t maxValue)
537 {
538 if (value < 0)
539 value += (int)maxValue;
540 else if (value >= (int)maxValue)
541 value -= (int)maxValue;
542 return value;
543 }
MirroredRepeatAddressFn(int value,size_t maxValue)544 int MirroredRepeatAddressFn(int value, size_t maxValue)
545 {
546 if (value < 0)
547 value = 0;
548 else if ((size_t)value >= maxValue)
549 value = (int)(maxValue - 1);
550 return value;
551 }
ClampAddressFn(int value,size_t maxValue)552 int ClampAddressFn(int value, size_t maxValue)
553 {
554 return (value < -1) ? -1
555 : ((value > (cl_long)maxValue) ? (int)maxValue : value);
556 }
ClampToEdgeNearestFn(int value,size_t maxValue)557 int ClampToEdgeNearestFn(int value, size_t maxValue)
558 {
559 return (value < 0)
560 ? 0
561 : (((size_t)value > maxValue - 1) ? (int)maxValue - 1 : value);
562 }
563 AddressFn ClampToEdgeLinearFn = ClampToEdgeNearestFn;
564
565 // Note: normalized coords get repeated in normalized space, not unnormalized
566 // space! hence the special case here
567 volatile float gFloatHome;
RepeatNormalizedAddressFn(float fValue,size_t maxValue)568 float RepeatNormalizedAddressFn(float fValue, size_t maxValue)
569 {
570 #ifndef _MSC_VER // Use original if not the VS compiler.
571 // General computation for repeat
572 return (fValue - floorf(fValue)) * (float)maxValue; // Reduce to [0, 1.f]
573 #else // Otherwise, use this instead:
574 // Home the subtraction to a float to break up the sequence of x87
575 // instructions emitted by the VS compiler.
576 gFloatHome = fValue - floorf(fValue);
577 return gFloatHome * (float)maxValue;
578 #endif
579 }
580
MirroredRepeatNormalizedAddressFn(float fValue,size_t maxValue)581 float MirroredRepeatNormalizedAddressFn(float fValue, size_t maxValue)
582 {
583 // Round to nearest multiple of two.
584 // Note halfway values flip flop here due to rte, but they both end up
585 // pointing the same place at the end of the day.
586 float s_prime = 2.0f * rintf(fValue * 0.5f);
587
588 // Reduce to [-1, 1], Apply mirroring -> [0, 1]
589 s_prime = fabsf(fValue - s_prime);
590
591 // un-normalize
592 return s_prime * (float)maxValue;
593 }
594
595 struct AddressingTable
596 {
AddressingTableAddressingTable597 AddressingTable()
598 {
599 static_assert(CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6, "");
600 static_assert(CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2, "");
601
602 mTable[CL_ADDRESS_NONE - CL_ADDRESS_NONE]
603 [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = NoAddressFn;
604 mTable[CL_ADDRESS_NONE - CL_ADDRESS_NONE]
605 [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = NoAddressFn;
606 mTable[CL_ADDRESS_REPEAT - CL_ADDRESS_NONE]
607 [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = RepeatAddressFn;
608 mTable[CL_ADDRESS_REPEAT - CL_ADDRESS_NONE]
609 [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = RepeatAddressFn;
610 mTable[CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE]
611 [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = ClampToEdgeNearestFn;
612 mTable[CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE]
613 [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = ClampToEdgeLinearFn;
614 mTable[CL_ADDRESS_CLAMP - CL_ADDRESS_NONE]
615 [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = ClampAddressFn;
616 mTable[CL_ADDRESS_CLAMP - CL_ADDRESS_NONE]
617 [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = ClampAddressFn;
618 mTable[CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE]
619 [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = MirroredRepeatAddressFn;
620 mTable[CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE]
621 [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = MirroredRepeatAddressFn;
622 }
623
operator []AddressingTable624 AddressFn operator[](image_sampler_data *sampler)
625 {
626 return mTable[(int)sampler->addressing_mode - CL_ADDRESS_NONE]
627 [(int)sampler->filter_mode - CL_FILTER_NEAREST];
628 }
629
630 AddressFn mTable[6][2];
631 };
632
633 static AddressingTable sAddressingTable;
634
is_sRGBA_order(cl_channel_order image_channel_order)635 bool is_sRGBA_order(cl_channel_order image_channel_order)
636 {
637 switch (image_channel_order)
638 {
639 case CL_sRGB:
640 case CL_sRGBx:
641 case CL_sRGBA:
642 case CL_sBGRA: return true;
643 default: return false;
644 }
645 }
646
647 // Format helpers
648
has_alpha(const cl_image_format * format)649 int has_alpha(const cl_image_format *format)
650 {
651 switch (format->image_channel_order)
652 {
653 case CL_R: return 0;
654 case CL_A: return 1;
655 case CL_Rx: return 0;
656 case CL_RG: return 0;
657 case CL_RA: return 1;
658 case CL_RGx: return 0;
659 case CL_RGB:
660 case CL_sRGB: return 0;
661 case CL_RGBx:
662 case CL_sRGBx: return 0;
663 case CL_RGBA: return 1;
664 case CL_BGRA: return 1;
665 case CL_ARGB: return 1;
666 case CL_ABGR: return 1;
667 case CL_INTENSITY: return 1;
668 case CL_LUMINANCE: return 0;
669 #ifdef CL_BGR1_APPLE
670 case CL_BGR1_APPLE: return 1;
671 #endif
672 #ifdef CL_1RGB_APPLE
673 case CL_1RGB_APPLE: return 1;
674 #endif
675 case CL_sRGBA:
676 case CL_sBGRA: return 1;
677 case CL_DEPTH: return 0;
678 default:
679 log_error("Invalid image channel order: %d\n",
680 format->image_channel_order);
681 return 0;
682 }
683 }
684
685 #define PRINT_MAX_SIZE_LOGIC 0
686
687 #define SWAP(_a, _b) \
688 do \
689 { \
690 _a ^= _b; \
691 _b ^= _a; \
692 _a ^= _b; \
693 } while (0)
694
get_max_sizes(size_t * numberOfSizes,const int maxNumberOfSizes,size_t sizes[][3],size_t maxWidth,size_t maxHeight,size_t maxDepth,size_t maxArraySize,const cl_ulong maxIndividualAllocSize,const cl_ulong maxTotalAllocSize,cl_mem_object_type image_type,const cl_image_format * format,int usingMaxPixelSizeBuffer)695 void get_max_sizes(
696 size_t *numberOfSizes, const int maxNumberOfSizes, size_t sizes[][3],
697 size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
698 const cl_ulong maxIndividualAllocSize, // CL_DEVICE_MAX_MEM_ALLOC_SIZE
699 const cl_ulong maxTotalAllocSize, // CL_DEVICE_GLOBAL_MEM_SIZE
700 cl_mem_object_type image_type, const cl_image_format *format,
701 int usingMaxPixelSizeBuffer)
702 {
703
704 bool is3D = (image_type == CL_MEM_OBJECT_IMAGE3D);
705 bool isArray = (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY
706 || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY);
707
708 // Validate we have a reasonable max depth for 3D
709 if (is3D && maxDepth < 2)
710 {
711 log_error("ERROR: Requesting max image sizes for 3D images when max "
712 "depth is < 2.\n");
713 *numberOfSizes = 0;
714 return;
715 }
716 // Validate we have a reasonable max array size for 1D & 2D image arrays
717 if (isArray && maxArraySize < 2)
718 {
719 log_error("ERROR: Requesting max image sizes for an image array when "
720 "max array size is < 1.\n");
721 *numberOfSizes = 0;
722 return;
723 }
724
725 // Reduce the maximum because we are trying to test the max image
726 // dimensions, not the memory allocation
727 cl_ulong adjustedMaxTotalAllocSize = maxTotalAllocSize / 4;
728 cl_ulong adjustedMaxIndividualAllocSize = maxIndividualAllocSize / 4;
729 log_info("Note: max individual allocation adjusted down from %gMB to %gMB "
730 "and max total allocation adjusted down from %gMB to %gMB.\n",
731 maxIndividualAllocSize / (1024.0 * 1024.0),
732 adjustedMaxIndividualAllocSize / (1024.0 * 1024.0),
733 maxTotalAllocSize / (1024.0 * 1024.0),
734 adjustedMaxTotalAllocSize / (1024.0 * 1024.0));
735
736 // Cap our max allocation to 1.0GB.
737 // FIXME -- why? In the interest of not taking a long time? We should
738 // still test this stuff...
739 if (adjustedMaxTotalAllocSize > (cl_ulong)1024 * 1024 * 1024)
740 {
741 adjustedMaxTotalAllocSize = (cl_ulong)1024 * 1024 * 1024;
742 log_info("Limiting max total allocation size to %gMB (down from %gMB) "
743 "for test.\n",
744 adjustedMaxTotalAllocSize / (1024.0 * 1024.0),
745 maxTotalAllocSize / (1024.0 * 1024.0));
746 }
747
748 cl_ulong maxAllocSize = adjustedMaxIndividualAllocSize;
749 if (adjustedMaxTotalAllocSize < adjustedMaxIndividualAllocSize * 2)
750 maxAllocSize = adjustedMaxTotalAllocSize / 2;
751
752 size_t raw_pixel_size = get_pixel_size(format);
753 // If the test will be creating input (src) buffer of type int4 or float4,
754 // number of pixels will be governed by sizeof(int4 or float4) and not
755 // sizeof(dest fomat) Also if pixel size is 12 bytes i.e. RGB or RGBx, we
756 // adjust it to 16 bytes as GPUs has no concept of 3 channel images. GPUs
757 // expand these to four channel RGBA.
758 if (usingMaxPixelSizeBuffer || raw_pixel_size == 12) raw_pixel_size = 16;
759 size_t max_pixels = (size_t)maxAllocSize / raw_pixel_size;
760
761 log_info("Maximums: [%zu x %zu x %zu], raw pixel size %zu bytes, "
762 "per-allocation limit %gMB.\n",
763 maxWidth, maxHeight, isArray ? maxArraySize : maxDepth,
764 raw_pixel_size, (maxAllocSize / (1024.0 * 1024.0)));
765
766 // Keep track of the maximum sizes for each dimension
767 size_t maximum_sizes[] = { maxWidth, maxHeight, maxDepth };
768
769 switch (image_type)
770 {
771 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
772 maximum_sizes[1] = maxArraySize;
773 maximum_sizes[2] = 1;
774 break;
775 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
776 maximum_sizes[2] = maxArraySize;
777 break;
778 }
779
780
781 // Given one fixed sized dimension, this code finds one or two other
782 // dimensions, both with very small size, such that the size does not
783 // exceed the maximum passed to this function
784
785 #if defined(__x86_64) || defined(__arm64__) || defined(__ppc64__)
786 size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 10, 11, 13, 15 };
787 #else
788 size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 11, 13 };
789 #endif
790
791 static size_t other_size = 0;
792 enum
793 {
794 num_other_sizes = sizeof(other_sizes) / sizeof(size_t)
795 };
796
797 (*numberOfSizes) = 0;
798
799 if (image_type == CL_MEM_OBJECT_IMAGE1D)
800 {
801
802 size_t M = maximum_sizes[0];
803
804 // Store the size
805 sizes[(*numberOfSizes)][0] = M;
806 sizes[(*numberOfSizes)][1] = 1;
807 sizes[(*numberOfSizes)][2] = 1;
808 ++(*numberOfSizes);
809 }
810
811 else if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY
812 || image_type == CL_MEM_OBJECT_IMAGE2D)
813 {
814
815 for (int fixed_dim = 0; fixed_dim < 2; ++fixed_dim)
816 {
817
818 // Determine the size of the fixed dimension
819 size_t M = maximum_sizes[fixed_dim];
820 size_t A = max_pixels;
821
822 int x0_dim = !fixed_dim;
823 size_t x0 = static_cast<size_t>(
824 fmin(fmin(other_sizes[(other_size++) % num_other_sizes], A / M),
825 maximum_sizes[x0_dim]));
826
827 // Store the size
828 sizes[(*numberOfSizes)][fixed_dim] = M;
829 sizes[(*numberOfSizes)][x0_dim] = x0;
830 sizes[(*numberOfSizes)][2] = 1;
831 ++(*numberOfSizes);
832 }
833 }
834
835 else if (image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY
836 || image_type == CL_MEM_OBJECT_IMAGE3D)
837 {
838
839 // Iterate over dimensions, finding sizes for the non-fixed dimension
840 for (int fixed_dim = 0; fixed_dim < 3; ++fixed_dim)
841 {
842
843 // Determine the size of the fixed dimension
844 size_t M = maximum_sizes[fixed_dim];
845 size_t A = max_pixels;
846
847 // Find two other dimensions, x0 and x1
848 int x0_dim = (fixed_dim == 0) ? 1 : 0;
849 int x1_dim = (fixed_dim == 2) ? 1 : 2;
850
851 // Choose two other sizes for these dimensions
852 size_t x0 = static_cast<size_t>(
853 fmin(fmin(A / M, maximum_sizes[x0_dim]),
854 other_sizes[(other_size++) % num_other_sizes]));
855 // GPUs have certain restrictions on minimum width (row alignment)
856 // of images which has given us issues testing small widths in this
857 // test (say we set width to 3 for testing, and compute size based
858 // on this width and decide it fits within vram ... but GPU driver
859 // decides that, due to row alignment requirements, it has to use
860 // width of 16 which doesnt fit in vram). For this purpose we are
861 // not testing width < 16 for this test.
862 if (x0_dim == 0 && x0 < 16) x0 = 16;
863 size_t x1 = static_cast<size_t>(
864 fmin(fmin(A / M / x0, maximum_sizes[x1_dim]),
865 other_sizes[(other_size++) % num_other_sizes]));
866
867 // Valid image sizes cannot be below 1. Due to the workaround for
868 // the xo_dim where x0 is overidden to 16 there might not be enough
869 // space left for x1 dimension. This could be a fractional 0.x size
870 // that when cast to integer would result in a value 0. In these
871 // cases we clamp the size to a minimum of 1.
872 if (x1 < 1) x1 = 1;
873
874 // M and x0 cannot be '0' as they derive from clDeviceInfo calls
875 assert(x0 > 0 && M > 0);
876
877 // Store the size
878 sizes[(*numberOfSizes)][fixed_dim] = M;
879 sizes[(*numberOfSizes)][x0_dim] = x0;
880 sizes[(*numberOfSizes)][x1_dim] = x1;
881 ++(*numberOfSizes);
882 }
883 }
884
885 // Log the results
886 for (int j = 0; j < (int)(*numberOfSizes); j++)
887 {
888 switch (image_type)
889 {
890 case CL_MEM_OBJECT_IMAGE1D:
891 log_info(" size[%d] = [%zu] (%g MB image)\n", j, sizes[j][0],
892 raw_pixel_size * sizes[j][0] * sizes[j][1]
893 * sizes[j][2] / (1024.0 * 1024.0));
894 break;
895 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
896 case CL_MEM_OBJECT_IMAGE2D:
897 log_info(" size[%d] = [%zu %zu] (%g MB image)\n", j,
898 sizes[j][0], sizes[j][1],
899 raw_pixel_size * sizes[j][0] * sizes[j][1]
900 * sizes[j][2] / (1024.0 * 1024.0));
901 break;
902 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
903 case CL_MEM_OBJECT_IMAGE3D:
904 log_info(" size[%d] = [%zu %zu %zu] (%g MB image)\n", j,
905 sizes[j][0], sizes[j][1], sizes[j][2],
906 raw_pixel_size * sizes[j][0] * sizes[j][1]
907 * sizes[j][2] / (1024.0 * 1024.0));
908 break;
909 }
910 }
911 }
912
get_max_absolute_error(const cl_image_format * format,image_sampler_data * sampler)913 float get_max_absolute_error(const cl_image_format *format,
914 image_sampler_data *sampler)
915 {
916 if (sampler->filter_mode == CL_FILTER_NEAREST) return 0.0f;
917
918 switch (format->image_channel_data_type)
919 {
920 case CL_SNORM_INT8: return 1.0f / 127.0f;
921 case CL_UNORM_INT8: return 1.0f / 255.0f;
922 case CL_UNORM_INT16: return 1.0f / 65535.0f;
923 case CL_SNORM_INT16: return 1.0f / 32767.0f;
924 case CL_FLOAT: return CL_FLT_MIN;
925 #ifdef CL_SFIXED14_APPLE
926 case CL_SFIXED14_APPLE: return 0x1.0p-14f;
927 #endif
928 case CL_UNORM_SHORT_555:
929 case CL_UNORM_SHORT_565: return 1.0f / 31.0f;
930 default: return 0.0f;
931 }
932 }
933
get_max_relative_error(const cl_image_format * format,image_sampler_data * sampler,int is3D,int isLinearFilter)934 float get_max_relative_error(const cl_image_format *format,
935 image_sampler_data *sampler, int is3D,
936 int isLinearFilter)
937 {
938 float maxError = 0.0f;
939 float sampleCount = 1.0f;
940 if (isLinearFilter) sampleCount = is3D ? 8.0f : 4.0f;
941
942 // Note that the ULP is defined here as the unit in the last place of the
943 // maximum magnitude sample used for filtering.
944
945 // Section 8.3
946 switch (format->image_channel_data_type)
947 {
948 // The spec allows 2 ulps of error for normalized formats
949 case CL_SNORM_INT8:
950 case CL_UNORM_INT8:
951 case CL_SNORM_INT16:
952 case CL_UNORM_INT16:
953 case CL_UNORM_SHORT_565:
954 case CL_UNORM_SHORT_555:
955 case CL_UNORM_INT_101010:
956 // Maximum sampling error for round to zero normalization based on
957 // multiplication by reciprocal (using reciprocal generated in
958 // round to +inf mode, so that 1.0 matches spec)
959 maxError = 2 * FLT_EPSILON * sampleCount;
960 break;
961
962 // If the implementation supports these formats then it will have to
963 // allow rounding error here too, because not all 32-bit ints are
964 // exactly representable in float
965 case CL_SIGNED_INT32:
966 case CL_UNSIGNED_INT32: maxError = 1 * FLT_EPSILON; break;
967 }
968
969
970 // Section 8.2
971 if (sampler->addressing_mode == CL_ADDRESS_REPEAT
972 || sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT
973 || sampler->filter_mode != CL_FILTER_NEAREST
974 || sampler->normalized_coords)
975 #if defined(__APPLE__)
976 {
977 if (sampler->filter_mode != CL_FILTER_NEAREST)
978 {
979 // The maximum
980 if (gDeviceType == CL_DEVICE_TYPE_GPU)
981 // Some GPUs ain't so accurate
982 maxError += MAKE_HEX_FLOAT(0x1.0p-4f, 0x1L, -4);
983 else
984 // The standard method of 2d linear filtering delivers 4.0 ulps
985 // of error in round to nearest (8 in rtz).
986 maxError += 4.0f * FLT_EPSILON;
987 }
988 else
989 // normalized coordinates will introduce some error into the
990 // fractional part of the address, affecting results
991 maxError += 4.0f * FLT_EPSILON;
992 }
993 #else
994 {
995 #if !defined(_WIN32)
996 #warning Implementations will likely wish to pick a max allowable sampling error policy here that is better than the spec
997 #endif
998 // The spec allows linear filters to return any result most of the time.
999 // That's fine for implementations but a problem for testing. After all
1000 // users aren't going to like garbage images. We have "picked a number"
1001 // here that we are going to attempt to conform to. Implementations are
1002 // free to pick another number, like infinity, if they like.
1003 // We picked a number for you, to provide /some/ sanity
1004 maxError = MAKE_HEX_FLOAT(0x1.0p-7f, 0x1L, -7);
1005 // ...but this is what the spec allows:
1006 // maxError = INFINITY;
1007 // Please feel free to pick any positive number. (NaN wont work.)
1008 }
1009 #endif
1010
1011 // The error calculation itself can introduce error
1012 maxError += FLT_EPSILON * 2;
1013
1014 return maxError;
1015 }
1016
get_format_max_int(const cl_image_format * format)1017 size_t get_format_max_int(const cl_image_format *format)
1018 {
1019 switch (format->image_channel_data_type)
1020 {
1021 case CL_SNORM_INT8:
1022 case CL_SIGNED_INT8: return 127;
1023 case CL_UNORM_INT8:
1024 case CL_UNSIGNED_INT8: return 255;
1025
1026 case CL_SNORM_INT16:
1027 case CL_SIGNED_INT16: return 32767;
1028
1029 case CL_UNORM_INT16:
1030 case CL_UNSIGNED_INT16: return 65535;
1031
1032 case CL_SIGNED_INT32: return 2147483647L;
1033
1034 case CL_UNSIGNED_INT32: return 4294967295LL;
1035
1036 case CL_UNORM_SHORT_565:
1037 case CL_UNORM_SHORT_555: return 31;
1038
1039 case CL_UNORM_INT_101010: return 1023;
1040
1041 case CL_HALF_FLOAT: return 1 << 10;
1042
1043 #ifdef CL_SFIXED14_APPLE
1044 case CL_SFIXED14_APPLE: return 16384;
1045 #endif
1046 default: return 0;
1047 }
1048 }
1049
get_format_min_int(const cl_image_format * format)1050 int get_format_min_int(const cl_image_format *format)
1051 {
1052 switch (format->image_channel_data_type)
1053 {
1054 case CL_SNORM_INT8:
1055 case CL_SIGNED_INT8: return -128;
1056 case CL_UNORM_INT8:
1057 case CL_UNSIGNED_INT8: return 0;
1058
1059 case CL_SNORM_INT16:
1060 case CL_SIGNED_INT16: return -32768;
1061
1062 case CL_UNORM_INT16:
1063 case CL_UNSIGNED_INT16: return 0;
1064
1065 case CL_SIGNED_INT32: return -2147483648LL;
1066
1067 case CL_UNSIGNED_INT32: return 0;
1068
1069 case CL_UNORM_SHORT_565:
1070 case CL_UNORM_SHORT_555:
1071 case CL_UNORM_INT_101010: return 0;
1072
1073 case CL_HALF_FLOAT: return -(1 << 10);
1074
1075 #ifdef CL_SFIXED14_APPLE
1076 case CL_SFIXED14_APPLE: return -16384;
1077 #endif
1078
1079 default: return 0;
1080 }
1081 }
1082
convert_float_to_half(float f)1083 cl_half convert_float_to_half(float f)
1084 {
1085 switch (gFloatToHalfRoundingMode)
1086 {
1087 case kRoundToNearestEven: return cl_half_from_float(f, CL_HALF_RTE);
1088 case kRoundTowardZero: return cl_half_from_float(f, CL_HALF_RTZ);
1089 default:
1090 log_error("ERROR: Test internal error -- unhandled or unknown "
1091 "float->half rounding mode.\n");
1092 exit(-1);
1093 return 0xffff;
1094 }
1095 }
1096
get_image_size(image_descriptor const * imageInfo)1097 cl_ulong get_image_size(image_descriptor const *imageInfo)
1098 {
1099 cl_ulong imageSize;
1100
1101 // Assumes rowPitch and slicePitch are always correctly defined
1102 if (/*gTestMipmaps*/ imageInfo->num_mip_levels > 1)
1103 {
1104 imageSize = (size_t)compute_mipmapped_image_size(*imageInfo);
1105 }
1106 else
1107 {
1108 switch (imageInfo->type)
1109 {
1110 case CL_MEM_OBJECT_IMAGE1D: imageSize = imageInfo->rowPitch; break;
1111 case CL_MEM_OBJECT_IMAGE2D:
1112 imageSize = imageInfo->height * imageInfo->rowPitch;
1113 break;
1114 case CL_MEM_OBJECT_IMAGE3D:
1115 imageSize = imageInfo->depth * imageInfo->slicePitch;
1116 break;
1117 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1118 imageSize = imageInfo->arraySize * imageInfo->slicePitch;
1119 break;
1120 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1121 imageSize = imageInfo->arraySize * imageInfo->slicePitch;
1122 break;
1123 default:
1124 log_error("ERROR: Cannot identify image type %x\n",
1125 imageInfo->type);
1126 abort();
1127 }
1128 }
1129 return imageSize;
1130 }
1131
1132 // Calculate image size in megabytes (strictly, mebibytes). Result is rounded
1133 // up.
get_image_size_mb(image_descriptor const * imageInfo)1134 cl_ulong get_image_size_mb(image_descriptor const *imageInfo)
1135 {
1136 cl_ulong imageSize = get_image_size(imageInfo);
1137 cl_ulong mb = imageSize / (1024 * 1024);
1138 if (imageSize % (1024 * 1024) > 0)
1139 {
1140 mb += 1;
1141 }
1142 return mb;
1143 }
1144
1145
1146 uint64_t gRoundingStartValue = 0;
1147
1148
escape_inf_nan_values(char * data,size_t allocSize)1149 void escape_inf_nan_values(char *data, size_t allocSize)
1150 {
1151 // filter values with 8 not-quite-highest bits
1152 unsigned int *intPtr = (unsigned int *)data;
1153 for (size_t i = 0; i<allocSize>> 2; i++)
1154 {
1155 if ((intPtr[i] & 0x7F800000) == 0x7F800000) intPtr[i] ^= 0x40000000;
1156 }
1157
1158 // Ditto with half floats (16-bit numbers with the 5 not-quite-highest bits
1159 // = 0x7C00 are special)
1160 unsigned short *shortPtr = (unsigned short *)data;
1161 for (size_t i = 0; i<allocSize>> 1; i++)
1162 {
1163 if ((shortPtr[i] & 0x7C00) == 0x7C00) shortPtr[i] ^= 0x4000;
1164 }
1165 }
1166
generate_random_image_data(image_descriptor * imageInfo,BufferOwningPtr<char> & P,MTdata d)1167 char *generate_random_image_data(image_descriptor *imageInfo,
1168 BufferOwningPtr<char> &P, MTdata d)
1169 {
1170 size_t allocSize = static_cast<size_t>(get_image_size(imageInfo));
1171 size_t pixelRowBytes = imageInfo->width * get_pixel_size(imageInfo->format);
1172 size_t i;
1173
1174 if (imageInfo->num_mip_levels > 1)
1175 allocSize =
1176 static_cast<size_t>(compute_mipmapped_image_size(*imageInfo));
1177
1178 #if defined(__APPLE__)
1179 char *data = NULL;
1180 if (gDeviceType == CL_DEVICE_TYPE_CPU)
1181 {
1182 size_t mapSize = ((allocSize + 4095L) & -4096L) + 8192;
1183
1184 void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE,
1185 MAP_ANON | MAP_PRIVATE, 0, 0);
1186 intptr_t data_end = (intptr_t)map + mapSize - 4096;
1187 data = (char *)(data_end - (intptr_t)allocSize);
1188
1189 mprotect(map, 4096, PROT_NONE);
1190 mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
1191 P.reset(data, map, mapSize, allocSize);
1192 }
1193 else
1194 {
1195 data = (char *)malloc(allocSize);
1196 P.reset(data, NULL, 0, allocSize);
1197 }
1198 #else
1199 P.reset(NULL); // Free already allocated memory first, then try to allocate
1200 // new block.
1201 char *data =
1202 (char *)align_malloc(allocSize, get_pixel_alignment(imageInfo->format));
1203 P.reset(data, NULL, 0, allocSize, true);
1204 #endif
1205
1206 if (data == NULL)
1207 {
1208 log_error("ERROR: Unable to malloc %zu bytes for "
1209 "generate_random_image_data\n",
1210 allocSize);
1211 return 0;
1212 }
1213
1214 if (gTestRounding)
1215 {
1216 // Special case: fill with a ramp from 0 to the size of the type
1217 size_t typeSize = get_format_type_size(imageInfo->format);
1218 switch (typeSize)
1219 {
1220 case 1: {
1221 char *ptr = data;
1222 for (i = 0; i < allocSize; i++)
1223 ptr[i] = (cl_char)(i + gRoundingStartValue);
1224 }
1225 break;
1226 case 2: {
1227 cl_short *ptr = (cl_short *)data;
1228 for (i = 0; i < allocSize / 2; i++)
1229 ptr[i] = (cl_short)(i + gRoundingStartValue);
1230 }
1231 break;
1232 case 4: {
1233 cl_int *ptr = (cl_int *)data;
1234 for (i = 0; i < allocSize / 4; i++)
1235 ptr[i] = (cl_int)(i + gRoundingStartValue);
1236 }
1237 break;
1238 }
1239
1240 // Note: inf or nan float values would cause problems, although we don't
1241 // know this will actually be a float, so we just know what to look for
1242 escape_inf_nan_values(data, allocSize);
1243 return data;
1244 }
1245
1246 // Otherwise, we should be able to just fill with random bits no matter what
1247 cl_uint *p = (cl_uint *)data;
1248 for (i = 0; i + 4 <= allocSize; i += 4) p[i / 4] = genrand_int32(d);
1249
1250 for (; i < allocSize; i++) data[i] = genrand_int32(d);
1251
1252 // Note: inf or nan float values would cause problems, although we don't
1253 // know this will actually be a float, so we just know what to look for
1254 escape_inf_nan_values(data, allocSize);
1255
1256 if (/*!gTestMipmaps*/ imageInfo->num_mip_levels < 2)
1257 {
1258 // Fill unused edges with -1, NaN for float
1259 if (imageInfo->rowPitch > pixelRowBytes)
1260 {
1261 size_t height = 0;
1262
1263 switch (imageInfo->type)
1264 {
1265 case CL_MEM_OBJECT_IMAGE2D:
1266 case CL_MEM_OBJECT_IMAGE3D:
1267 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1268 height = imageInfo->height;
1269 break;
1270 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1271 height = imageInfo->arraySize;
1272 break;
1273 }
1274
1275 // Fill in the row padding regions
1276 for (i = 0; i < height; i++)
1277 {
1278 size_t offset = i * imageInfo->rowPitch + pixelRowBytes;
1279 size_t length = imageInfo->rowPitch - pixelRowBytes;
1280 memset(data + offset, 0xff, length);
1281 }
1282 }
1283
1284 // Fill in the slice padding regions, if necessary:
1285
1286 size_t slice_dimension = imageInfo->height;
1287 if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
1288 {
1289 slice_dimension = imageInfo->arraySize;
1290 }
1291
1292 if (imageInfo->slicePitch > slice_dimension * imageInfo->rowPitch)
1293 {
1294 size_t depth = 0;
1295 switch (imageInfo->type)
1296 {
1297 case CL_MEM_OBJECT_IMAGE2D:
1298 case CL_MEM_OBJECT_IMAGE3D: depth = imageInfo->depth; break;
1299 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1300 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1301 depth = imageInfo->arraySize;
1302 break;
1303 }
1304
1305 for (i = 0; i < depth; i++)
1306 {
1307 size_t offset = i * imageInfo->slicePitch
1308 + slice_dimension * imageInfo->rowPitch;
1309 size_t length = imageInfo->slicePitch
1310 - slice_dimension * imageInfo->rowPitch;
1311 memset(data + offset, 0xff, length);
1312 }
1313 }
1314 }
1315
1316 return data;
1317 }
1318
1319 #define CLAMP_FLOAT(v) (fmaxf(fminf(v, 1.f), -1.f))
1320
1321
read_image_pixel_float(void * imageData,image_descriptor * imageInfo,int x,int y,int z,float * outData,int lod)1322 void read_image_pixel_float(void *imageData, image_descriptor *imageInfo, int x,
1323 int y, int z, float *outData, int lod)
1324 {
1325 size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
1326 depth_lod = imageInfo->depth;
1327 size_t slice_pitch_lod = 0, row_pitch_lod = 0;
1328
1329 if (imageInfo->num_mip_levels > 1)
1330 {
1331 switch (imageInfo->type)
1332 {
1333 case CL_MEM_OBJECT_IMAGE3D:
1334 depth_lod =
1335 (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
1336 case CL_MEM_OBJECT_IMAGE2D:
1337 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1338 height_lod =
1339 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
1340 default:
1341 width_lod =
1342 (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
1343 }
1344 row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
1345 if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
1346 slice_pitch_lod = row_pitch_lod;
1347 else if (imageInfo->type == CL_MEM_OBJECT_IMAGE3D
1348 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
1349 slice_pitch_lod = row_pitch_lod * height_lod;
1350 }
1351 else
1352 {
1353 row_pitch_lod = imageInfo->rowPitch;
1354 slice_pitch_lod = imageInfo->slicePitch;
1355 }
1356 if (x < 0 || y < 0 || z < 0 || x >= (int)width_lod
1357 || (height_lod != 0 && y >= (int)height_lod)
1358 || (depth_lod != 0 && z >= (int)depth_lod)
1359 || (imageInfo->arraySize != 0 && z >= (int)imageInfo->arraySize))
1360 {
1361 outData[0] = outData[1] = outData[2] = outData[3] = 0;
1362 if (!has_alpha(imageInfo->format)) outData[3] = 1;
1363 return;
1364 }
1365
1366 const cl_image_format *format = imageInfo->format;
1367
1368 unsigned int i;
1369 float tempData[4];
1370
1371 // Advance to the right spot
1372 char *ptr = (char *)imageData;
1373 size_t pixelSize = get_pixel_size(format);
1374
1375 ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
1376
1377 // OpenCL only supports reading floats from certain formats
1378 size_t channelCount = get_format_channel_count(format);
1379 switch (format->image_channel_data_type)
1380 {
1381 case CL_SNORM_INT8: {
1382 cl_char *dPtr = (cl_char *)ptr;
1383 for (i = 0; i < channelCount; i++)
1384 tempData[i] = CLAMP_FLOAT((float)dPtr[i] / 127.0f);
1385 break;
1386 }
1387
1388 case CL_UNORM_INT8: {
1389 unsigned char *dPtr = (unsigned char *)ptr;
1390 for (i = 0; i < channelCount; i++)
1391 {
1392 if ((is_sRGBA_order(imageInfo->format->image_channel_order))
1393 && i < 3) // only RGB need to be converted for sRGBA
1394 tempData[i] = (float)sRGBunmap((float)dPtr[i] / 255.0f);
1395 else
1396 tempData[i] = (float)dPtr[i] / 255.0f;
1397 }
1398 break;
1399 }
1400
1401 case CL_SIGNED_INT8: {
1402 cl_char *dPtr = (cl_char *)ptr;
1403 for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1404 break;
1405 }
1406
1407 case CL_UNSIGNED_INT8: {
1408 cl_uchar *dPtr = (cl_uchar *)ptr;
1409 for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1410 break;
1411 }
1412
1413 case CL_SNORM_INT16: {
1414 cl_short *dPtr = (cl_short *)ptr;
1415 for (i = 0; i < channelCount; i++)
1416 tempData[i] = CLAMP_FLOAT((float)dPtr[i] / 32767.0f);
1417 break;
1418 }
1419
1420 case CL_UNORM_INT16: {
1421 cl_ushort *dPtr = (cl_ushort *)ptr;
1422 for (i = 0; i < channelCount; i++)
1423 tempData[i] = (float)dPtr[i] / 65535.0f;
1424 break;
1425 }
1426
1427 case CL_SIGNED_INT16: {
1428 cl_short *dPtr = (cl_short *)ptr;
1429 for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1430 break;
1431 }
1432
1433 case CL_UNSIGNED_INT16: {
1434 cl_ushort *dPtr = (cl_ushort *)ptr;
1435 for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1436 break;
1437 }
1438
1439 case CL_HALF_FLOAT: {
1440 cl_half *dPtr = (cl_half *)ptr;
1441 for (i = 0; i < channelCount; i++)
1442 tempData[i] = cl_half_to_float(dPtr[i]);
1443 break;
1444 }
1445
1446 case CL_SIGNED_INT32: {
1447 cl_int *dPtr = (cl_int *)ptr;
1448 for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1449 break;
1450 }
1451
1452 case CL_UNSIGNED_INT32: {
1453 cl_uint *dPtr = (cl_uint *)ptr;
1454 for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1455 break;
1456 }
1457
1458 case CL_UNORM_SHORT_565: {
1459 cl_ushort *dPtr = (cl_ushort *)ptr;
1460 tempData[0] = (float)(dPtr[0] >> 11) / (float)31;
1461 tempData[1] = (float)((dPtr[0] >> 5) & 63) / (float)63;
1462 tempData[2] = (float)(dPtr[0] & 31) / (float)31;
1463 break;
1464 }
1465
1466 case CL_UNORM_SHORT_555: {
1467 cl_ushort *dPtr = (cl_ushort *)ptr;
1468 tempData[0] = (float)((dPtr[0] >> 10) & 31) / (float)31;
1469 tempData[1] = (float)((dPtr[0] >> 5) & 31) / (float)31;
1470 tempData[2] = (float)(dPtr[0] & 31) / (float)31;
1471 break;
1472 }
1473
1474 case CL_UNORM_INT_101010: {
1475 cl_uint *dPtr = (cl_uint *)ptr;
1476 tempData[0] = (float)((dPtr[0] >> 20) & 0x3ff) / (float)1023;
1477 tempData[1] = (float)((dPtr[0] >> 10) & 0x3ff) / (float)1023;
1478 tempData[2] = (float)(dPtr[0] & 0x3ff) / (float)1023;
1479 break;
1480 }
1481
1482 case CL_FLOAT: {
1483 float *dPtr = (float *)ptr;
1484 for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1485 break;
1486 }
1487 #ifdef CL_SFIXED14_APPLE
1488 case CL_SFIXED14_APPLE: {
1489 cl_ushort *dPtr = (cl_ushort *)ptr;
1490 for (i = 0; i < channelCount; i++)
1491 tempData[i] = ((int)dPtr[i] - 16384) * 0x1.0p-14f;
1492 break;
1493 }
1494 #endif
1495 }
1496
1497
1498 outData[0] = outData[1] = outData[2] = 0;
1499 outData[3] = 1;
1500
1501 switch (format->image_channel_order)
1502 {
1503 case CL_A: outData[3] = tempData[0]; break;
1504 case CL_R:
1505 case CL_Rx: outData[0] = tempData[0]; break;
1506 case CL_RA:
1507 outData[0] = tempData[0];
1508 outData[3] = tempData[1];
1509 break;
1510 case CL_RG:
1511 case CL_RGx:
1512 outData[0] = tempData[0];
1513 outData[1] = tempData[1];
1514 break;
1515 case CL_RGB:
1516 case CL_RGBx:
1517 case CL_sRGB:
1518 case CL_sRGBx:
1519 outData[0] = tempData[0];
1520 outData[1] = tempData[1];
1521 outData[2] = tempData[2];
1522 break;
1523 case CL_RGBA:
1524 outData[0] = tempData[0];
1525 outData[1] = tempData[1];
1526 outData[2] = tempData[2];
1527 outData[3] = tempData[3];
1528 break;
1529 case CL_ARGB:
1530 outData[0] = tempData[1];
1531 outData[1] = tempData[2];
1532 outData[2] = tempData[3];
1533 outData[3] = tempData[0];
1534 break;
1535 case CL_ABGR:
1536 outData[0] = tempData[3];
1537 outData[1] = tempData[2];
1538 outData[2] = tempData[1];
1539 outData[3] = tempData[0];
1540 break;
1541 case CL_BGRA:
1542 case CL_sBGRA:
1543 outData[0] = tempData[2];
1544 outData[1] = tempData[1];
1545 outData[2] = tempData[0];
1546 outData[3] = tempData[3];
1547 break;
1548 case CL_INTENSITY:
1549 outData[0] = tempData[0];
1550 outData[1] = tempData[0];
1551 outData[2] = tempData[0];
1552 outData[3] = tempData[0];
1553 break;
1554 case CL_LUMINANCE:
1555 outData[0] = tempData[0];
1556 outData[1] = tempData[0];
1557 outData[2] = tempData[0];
1558 break;
1559 #ifdef CL_1RGB_APPLE
1560 case CL_1RGB_APPLE:
1561 outData[0] = tempData[1];
1562 outData[1] = tempData[2];
1563 outData[2] = tempData[3];
1564 outData[3] = 1.0f;
1565 break;
1566 #endif
1567 #ifdef CL_BGR1_APPLE
1568 case CL_BGR1_APPLE:
1569 outData[0] = tempData[2];
1570 outData[1] = tempData[1];
1571 outData[2] = tempData[0];
1572 outData[3] = 1.0f;
1573 break;
1574 #endif
1575 case CL_sRGBA:
1576 outData[0] = tempData[0];
1577 outData[1] = tempData[1];
1578 outData[2] = tempData[2];
1579 outData[3] = tempData[3];
1580 break;
1581 case CL_DEPTH: outData[0] = tempData[0]; break;
1582 default:
1583 log_error("Invalid format:");
1584 print_header(format, true);
1585 break;
1586 }
1587 }
1588
read_image_pixel_float(void * imageData,image_descriptor * imageInfo,int x,int y,int z,float * outData)1589 void read_image_pixel_float(void *imageData, image_descriptor *imageInfo, int x,
1590 int y, int z, float *outData)
1591 {
1592 read_image_pixel_float(imageData, imageInfo, x, y, z, outData, 0);
1593 }
1594
get_integer_coords(float x,float y,float z,size_t width,size_t height,size_t depth,image_sampler_data * imageSampler,image_descriptor * imageInfo,int & outX,int & outY,int & outZ)1595 bool get_integer_coords(float x, float y, float z, size_t width, size_t height,
1596 size_t depth, image_sampler_data *imageSampler,
1597 image_descriptor *imageInfo, int &outX, int &outY,
1598 int &outZ)
1599 {
1600 return get_integer_coords_offset(x, y, z, 0.0f, 0.0f, 0.0f, width, height,
1601 depth, imageSampler, imageInfo, outX, outY,
1602 outZ);
1603 }
1604
get_integer_coords_offset(float x,float y,float z,float xAddressOffset,float yAddressOffset,float zAddressOffset,size_t width,size_t height,size_t depth,image_sampler_data * imageSampler,image_descriptor * imageInfo,int & outX,int & outY,int & outZ)1605 bool get_integer_coords_offset(float x, float y, float z, float xAddressOffset,
1606 float yAddressOffset, float zAddressOffset,
1607 size_t width, size_t height, size_t depth,
1608 image_sampler_data *imageSampler,
1609 image_descriptor *imageInfo, int &outX,
1610 int &outY, int &outZ)
1611 {
1612 AddressFn adFn = sAddressingTable[imageSampler];
1613
1614 float refX = floorf(x), refY = floorf(y), refZ = floorf(z);
1615
1616 // Handle sampler-directed coordinate normalization + clamping. Note that
1617 // the array coordinate for image array types is expected to be
1618 // unnormalized, and is clamped to 0..arraySize-1.
1619 if (imageSampler->normalized_coords)
1620 {
1621 switch (imageSampler->addressing_mode)
1622 {
1623 case CL_ADDRESS_REPEAT:
1624 x = RepeatNormalizedAddressFn(x, width);
1625 if (height != 0)
1626 {
1627 if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
1628 y = RepeatNormalizedAddressFn(y, height);
1629 }
1630 if (depth != 0)
1631 {
1632 if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
1633 z = RepeatNormalizedAddressFn(z, depth);
1634 }
1635
1636 if (xAddressOffset != 0.0)
1637 {
1638 // Add in the offset
1639 x += xAddressOffset;
1640 // Handle wrapping
1641 if (x > width) x -= (float)width;
1642 if (x < 0) x += (float)width;
1643 }
1644 if ((yAddressOffset != 0.0)
1645 && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY))
1646 {
1647 // Add in the offset
1648 y += yAddressOffset;
1649 // Handle wrapping
1650 if (y > height) y -= (float)height;
1651 if (y < 0) y += (float)height;
1652 }
1653 if ((zAddressOffset != 0.0)
1654 && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY))
1655 {
1656 // Add in the offset
1657 z += zAddressOffset;
1658 // Handle wrapping
1659 if (z > depth) z -= (float)depth;
1660 if (z < 0) z += (float)depth;
1661 }
1662 break;
1663
1664 case CL_ADDRESS_MIRRORED_REPEAT:
1665 x = MirroredRepeatNormalizedAddressFn(x, width);
1666 if (height != 0)
1667 {
1668 if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
1669 y = MirroredRepeatNormalizedAddressFn(y, height);
1670 }
1671 if (depth != 0)
1672 {
1673 if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
1674 z = MirroredRepeatNormalizedAddressFn(z, depth);
1675 }
1676
1677 if (xAddressOffset != 0.0)
1678 {
1679 float temp = x + xAddressOffset;
1680 if (temp > (float)width)
1681 temp = (float)width - (temp - (float)width);
1682 x = fabsf(temp);
1683 }
1684 if ((yAddressOffset != 0.0)
1685 && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY))
1686 {
1687 float temp = y + yAddressOffset;
1688 if (temp > (float)height)
1689 temp = (float)height - (temp - (float)height);
1690 y = fabsf(temp);
1691 }
1692 if ((zAddressOffset != 0.0)
1693 && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY))
1694 {
1695 float temp = z + zAddressOffset;
1696 if (temp > (float)depth)
1697 temp = (float)depth - (temp - (float)depth);
1698 z = fabsf(temp);
1699 }
1700 break;
1701
1702 default:
1703 // Also, remultiply to the original coords. This simulates any
1704 // truncation in the pass to OpenCL
1705 x *= (float)width;
1706 x += xAddressOffset;
1707
1708 if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
1709 {
1710 y *= (float)height;
1711 y += yAddressOffset;
1712 }
1713
1714 if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
1715 {
1716 z *= (float)depth;
1717 z += zAddressOffset;
1718 }
1719 break;
1720 }
1721 }
1722
1723 // At this point, we're dealing with non-normalized coordinates.
1724
1725 outX = adFn(static_cast<int>(floorf(x)), width);
1726
1727 // 1D and 2D arrays require special care for the index coordinate:
1728
1729 switch (imageInfo->type)
1730 {
1731 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1732 outY = static_cast<int>(
1733 calculate_array_index(y, (float)imageInfo->arraySize - 1.0f));
1734 outZ = 0; /* don't care! */
1735 break;
1736 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1737 outY = adFn(static_cast<int>(floorf(y)), height);
1738 outZ = static_cast<int>(
1739 calculate_array_index(z, (float)imageInfo->arraySize - 1.0f));
1740 break;
1741 default:
1742 // legacy path:
1743 if (height != 0) outY = adFn(static_cast<int>(floorf(y)), height);
1744 if (depth != 0) outZ = adFn(static_cast<int>(floorf(z)), depth);
1745 }
1746
1747 return !((int)refX == outX && (int)refY == outY && (int)refZ == outZ);
1748 }
1749
frac(float a)1750 static float frac(float a) { return a - floorf(a); }
1751
1752 static inline void pixelMax(const float a[4], const float b[4], float *results);
pixelMax(const float a[4],const float b[4],float * results)1753 static inline void pixelMax(const float a[4], const float b[4], float *results)
1754 {
1755 for (int i = 0; i < 4; i++) results[i] = errMax(fabsf(a[i]), fabsf(b[i]));
1756 }
1757
1758 // If containsDenorms is NULL, flush denorms to zero
1759 // if containsDenorms is not NULL, record whether there are any denorms
1760 static inline void check_for_denorms(float a[4], int *containsDenorms);
check_for_denorms(float a[4],int * containsDenorms)1761 static inline void check_for_denorms(float a[4], int *containsDenorms)
1762 {
1763 if (NULL == containsDenorms)
1764 {
1765 for (int i = 0; i < 4; i++)
1766 {
1767 if (IsFloatSubnormal(a[i])) a[i] = copysignf(0.0f, a[i]);
1768 }
1769 }
1770 else
1771 {
1772 for (int i = 0; i < 4; i++)
1773 {
1774 if (IsFloatSubnormal(a[i]))
1775 {
1776 *containsDenorms = 1;
1777 break;
1778 }
1779 }
1780 }
1781 }
1782
calculate_array_index(float coord,float extent)1783 inline float calculate_array_index(float coord, float extent)
1784 {
1785 // from Section 8.4 of the 1.2 Spec 'Selecting an Image from an Image Array'
1786 //
1787 // given coordinate 'w' that represents an index:
1788 // layer_index = clamp( rint(w), 0, image_array_size - 1)
1789
1790 float ret = rintf(coord);
1791 ret = ret > extent ? extent : ret;
1792 ret = ret < 0.0f ? 0.0f : ret;
1793
1794 return ret;
1795 }
1796
1797 /*
1798 * Utility function to unnormalized a coordinate given a particular sampler.
1799 *
1800 * name - the name of the coordinate, used for verbose debugging only
1801 * coord - the coordinate requiring unnormalization
1802 * offset - an addressing offset to be added to the coordinate
1803 * extent - the max value for this coordinate (e.g. width for x)
1804 */
unnormalize_coordinate(const char * name,float coord,float offset,float extent,cl_addressing_mode addressing_mode,int verbose)1805 static float unnormalize_coordinate(const char *name, float coord, float offset,
1806 float extent,
1807 cl_addressing_mode addressing_mode,
1808 int verbose)
1809 {
1810 float ret = 0.0f;
1811
1812 switch (addressing_mode)
1813 {
1814 case CL_ADDRESS_REPEAT:
1815 ret = RepeatNormalizedAddressFn(coord, static_cast<size_t>(extent));
1816
1817 if (verbose)
1818 {
1819 log_info("\tRepeat filter denormalizes %s (%f) to %f\n", name,
1820 coord, ret);
1821 }
1822
1823 if (offset != 0.0)
1824 {
1825 // Add in the offset, and handle wrapping.
1826 ret += offset;
1827 if (ret > extent) ret -= extent;
1828 if (ret < 0.0) ret += extent;
1829 }
1830
1831 if (verbose && offset != 0.0f)
1832 {
1833 log_info("\tAddress offset of %f added to get %f\n", offset,
1834 ret);
1835 }
1836 break;
1837
1838 case CL_ADDRESS_MIRRORED_REPEAT:
1839 ret = MirroredRepeatNormalizedAddressFn(
1840 coord, static_cast<size_t>(extent));
1841
1842 if (verbose)
1843 {
1844 log_info(
1845 "\tMirrored repeat filter denormalizes %s (%f) to %f\n",
1846 name, coord, ret);
1847 }
1848
1849 if (offset != 0.0)
1850 {
1851 float temp = ret + offset;
1852 if (temp > extent) temp = extent - (temp - extent);
1853 ret = fabsf(temp);
1854 }
1855
1856 if (verbose && offset != 0.0f)
1857 {
1858 log_info("\tAddress offset of %f added to get %f\n", offset,
1859 ret);
1860 }
1861 break;
1862
1863 default:
1864
1865 ret = coord * extent;
1866
1867 if (verbose)
1868 {
1869 log_info("\tFilter denormalizes %s to %f (%f * %f)\n", name,
1870 ret, coord, extent);
1871 }
1872
1873 ret += offset;
1874
1875 if (verbose && offset != 0.0f)
1876 {
1877 log_info("\tAddress offset of %f added to get %f\n", offset,
1878 ret);
1879 }
1880 }
1881
1882 return ret;
1883 }
1884
1885 FloatPixel
sample_image_pixel_float(void * imageData,image_descriptor * imageInfo,float x,float y,float z,image_sampler_data * imageSampler,float * outData,int verbose,int * containsDenorms)1886 sample_image_pixel_float(void *imageData, image_descriptor *imageInfo, float x,
1887 float y, float z, image_sampler_data *imageSampler,
1888 float *outData, int verbose, int *containsDenorms)
1889 {
1890 return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f,
1891 0.0f, 0.0f, imageSampler, outData,
1892 verbose, containsDenorms);
1893 }
1894
1895 // returns max pixel value of the pixels touched
sample_image_pixel_float(void * imageData,image_descriptor * imageInfo,float x,float y,float z,image_sampler_data * imageSampler,float * outData,int verbose,int * containsDenorms,int lod)1896 FloatPixel sample_image_pixel_float(void *imageData,
1897 image_descriptor *imageInfo, float x,
1898 float y, float z,
1899 image_sampler_data *imageSampler,
1900 float *outData, int verbose,
1901 int *containsDenorms, int lod)
1902 {
1903 return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f,
1904 0.0f, 0.0f, imageSampler, outData,
1905 verbose, containsDenorms, lod);
1906 }
sample_image_pixel_float_offset(void * imageData,image_descriptor * imageInfo,float x,float y,float z,float xAddressOffset,float yAddressOffset,float zAddressOffset,image_sampler_data * imageSampler,float * outData,int verbose,int * containsDenorms,int lod)1907 FloatPixel sample_image_pixel_float_offset(
1908 void *imageData, image_descriptor *imageInfo, float x, float y, float z,
1909 float xAddressOffset, float yAddressOffset, float zAddressOffset,
1910 image_sampler_data *imageSampler, float *outData, int verbose,
1911 int *containsDenorms, int lod)
1912 {
1913 AddressFn adFn = sAddressingTable[imageSampler];
1914 FloatPixel returnVal;
1915 size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
1916 depth_lod = imageInfo->depth;
1917 size_t slice_pitch_lod = 0, row_pitch_lod = 0;
1918
1919 if (imageInfo->num_mip_levels > 1)
1920 {
1921 switch (imageInfo->type)
1922 {
1923 case CL_MEM_OBJECT_IMAGE3D:
1924 depth_lod =
1925 (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
1926 case CL_MEM_OBJECT_IMAGE2D:
1927 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1928 height_lod =
1929 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
1930 default:
1931 width_lod =
1932 (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
1933 }
1934 row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
1935 if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
1936 slice_pitch_lod = row_pitch_lod;
1937 else if (imageInfo->type == CL_MEM_OBJECT_IMAGE3D
1938 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
1939 slice_pitch_lod = row_pitch_lod * height_lod;
1940 }
1941 else
1942 {
1943 slice_pitch_lod = imageInfo->slicePitch;
1944 row_pitch_lod = imageInfo->rowPitch;
1945 }
1946
1947 if (containsDenorms) *containsDenorms = 0;
1948
1949 if (imageSampler->normalized_coords)
1950 {
1951
1952 // We need to unnormalize our coordinates differently depending on
1953 // the image type, but 'x' is always processed the same way.
1954
1955 x = unnormalize_coordinate("x", x, xAddressOffset, (float)width_lod,
1956 imageSampler->addressing_mode, verbose);
1957
1958 switch (imageInfo->type)
1959 {
1960
1961 // The image array types require special care:
1962
1963 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1964 z = 0; // don't care -- unused for 1D arrays
1965 break;
1966
1967 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1968 y = unnormalize_coordinate(
1969 "y", y, yAddressOffset, (float)height_lod,
1970 imageSampler->addressing_mode, verbose);
1971 break;
1972
1973 // Everybody else:
1974
1975 default:
1976 y = unnormalize_coordinate(
1977 "y", y, yAddressOffset, (float)height_lod,
1978 imageSampler->addressing_mode, verbose);
1979 z = unnormalize_coordinate(
1980 "z", z, zAddressOffset, (float)depth_lod,
1981 imageSampler->addressing_mode, verbose);
1982 }
1983 }
1984 else if (verbose)
1985 {
1986
1987 switch (imageInfo->type)
1988 {
1989 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1990 log_info("Starting coordinate: %f, array index %f\n", x, y);
1991 break;
1992 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1993 log_info("Starting coordinate: %f, %f, array index %f\n", x, y,
1994 z);
1995 break;
1996 case CL_MEM_OBJECT_IMAGE1D:
1997 case CL_MEM_OBJECT_IMAGE1D_BUFFER:
1998 log_info("Starting coordinate: %f\n", x);
1999 break;
2000 case CL_MEM_OBJECT_IMAGE2D:
2001 log_info("Starting coordinate: %f, %f\n", x, y);
2002 break;
2003 case CL_MEM_OBJECT_IMAGE3D:
2004 default: log_info("Starting coordinate: %f, %f, %f\n", x, y, z);
2005 }
2006 }
2007
2008 // At this point, we have unnormalized coordinates.
2009
2010 if (imageSampler->filter_mode == CL_FILTER_NEAREST)
2011 {
2012 int ix, iy, iz;
2013
2014 // We apply the addressing function to the now-unnormalized
2015 // coordinates. Note that the array cases again require special
2016 // care, per section 8.4 in the OpenCL 1.2 Specification.
2017
2018 ix = adFn(static_cast<int>(floorf(x)), width_lod);
2019
2020 switch (imageInfo->type)
2021 {
2022 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
2023 iy = static_cast<int>(calculate_array_index(
2024 y, (float)(imageInfo->arraySize - 1)));
2025 iz = 0;
2026 if (verbose)
2027 {
2028 log_info("\tArray index %f evaluates to %d\n", y, iy);
2029 }
2030 break;
2031 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
2032 iy = adFn(static_cast<int>(floorf(y)), height_lod);
2033 iz = static_cast<int>(calculate_array_index(
2034 z, (float)(imageInfo->arraySize - 1)));
2035 if (verbose)
2036 {
2037 log_info("\tArray index %f evaluates to %d\n", z, iz);
2038 }
2039 break;
2040 default:
2041 iy = adFn(static_cast<int>(floorf(y)), height_lod);
2042 if (depth_lod != 0)
2043 iz = adFn(static_cast<int>(floorf(z)), depth_lod);
2044 else
2045 iz = 0;
2046 }
2047
2048 if (verbose)
2049 {
2050 if (iz)
2051 log_info(
2052 "\tReference integer coords calculated: { %d, %d, %d }\n",
2053 ix, iy, iz);
2054 else
2055 log_info("\tReference integer coords calculated: { %d, %d }\n",
2056 ix, iy);
2057 }
2058
2059 read_image_pixel_float(imageData, imageInfo, ix, iy, iz, outData, lod);
2060 check_for_denorms(outData, containsDenorms);
2061 for (int i = 0; i < 4; i++) returnVal.p[i] = fabsf(outData[i]);
2062 return returnVal;
2063 }
2064 else
2065 {
2066 // Linear filtering cases.
2067
2068 size_t width = width_lod, height = height_lod, depth = depth_lod;
2069
2070 // Image arrays can use 2D filtering, but require us to walk into the
2071 // image a certain number of slices before reading.
2072
2073 if (depth == 0 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY
2074 || imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
2075 {
2076 float array_index = 0;
2077
2078 size_t layer_offset = 0;
2079
2080 if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
2081 {
2082 array_index =
2083 calculate_array_index(z, (float)(imageInfo->arraySize - 1));
2084 layer_offset = slice_pitch_lod * (size_t)array_index;
2085 }
2086 else if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
2087 {
2088 array_index =
2089 calculate_array_index(y, (float)(imageInfo->arraySize - 1));
2090 layer_offset = slice_pitch_lod * (size_t)array_index;
2091
2092 // Set up y and height so that the filtering below is correct
2093 // 1D filtering on a single slice.
2094 height = 1;
2095 }
2096
2097 int x1 = adFn(static_cast<int>(floorf(x - 0.5f)), width);
2098 int y1 = 0;
2099 int x2 = adFn(static_cast<int>(floorf(x - 0.5f) + 1), width);
2100 int y2 = 0;
2101 if ((imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
2102 && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
2103 && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_BUFFER))
2104 {
2105 y1 = adFn(static_cast<int>(floorf(y - 0.5f)), height);
2106 y2 = adFn(static_cast<int>(floorf(y - 0.5f) + 1), height);
2107 }
2108 else
2109 {
2110 y = 0.5f;
2111 }
2112
2113 if (verbose)
2114 {
2115 log_info("\tActual integer coords used (i = floor(x-.5)): i0:{ "
2116 "%d, %d } and i1:{ %d, %d }\n",
2117 x1, y1, x2, y2);
2118 log_info("\tArray coordinate is %f\n", array_index);
2119 }
2120
2121 // Walk to beginning of the 'correct' slice, if needed.
2122 char *imgPtr = ((char *)imageData) + layer_offset;
2123
2124 float upLeft[4], upRight[4], lowLeft[4], lowRight[4];
2125 float maxUp[4], maxLow[4];
2126 read_image_pixel_float(imgPtr, imageInfo, x1, y1, 0, upLeft, lod);
2127 read_image_pixel_float(imgPtr, imageInfo, x2, y1, 0, upRight, lod);
2128 check_for_denorms(upLeft, containsDenorms);
2129 check_for_denorms(upRight, containsDenorms);
2130 pixelMax(upLeft, upRight, maxUp);
2131 read_image_pixel_float(imgPtr, imageInfo, x1, y2, 0, lowLeft, lod);
2132 read_image_pixel_float(imgPtr, imageInfo, x2, y2, 0, lowRight, lod);
2133 check_for_denorms(lowLeft, containsDenorms);
2134 check_for_denorms(lowRight, containsDenorms);
2135 pixelMax(lowLeft, lowRight, maxLow);
2136 pixelMax(maxUp, maxLow, returnVal.p);
2137
2138 if (verbose)
2139 {
2140 if (NULL == containsDenorms)
2141 log_info("\tSampled pixels (rgba order, denorms flushed to "
2142 "zero):\n");
2143 else
2144 log_info("\tSampled pixels (rgba order):\n");
2145 log_info("\t\tp00: %f, %f, %f, %f\n", upLeft[0], upLeft[1],
2146 upLeft[2], upLeft[3]);
2147 log_info("\t\tp01: %f, %f, %f, %f\n", upRight[0], upRight[1],
2148 upRight[2], upRight[3]);
2149 log_info("\t\tp10: %f, %f, %f, %f\n", lowLeft[0], lowLeft[1],
2150 lowLeft[2], lowLeft[3]);
2151 log_info("\t\tp11: %f, %f, %f, %f\n", lowRight[0], lowRight[1],
2152 lowRight[2], lowRight[3]);
2153 }
2154
2155 bool printMe = false;
2156 if (x1 <= 0 || x2 <= 0 || x1 >= (int)width - 1
2157 || x2 >= (int)width - 1)
2158 printMe = true;
2159 if (y1 <= 0 || y2 <= 0 || y1 >= (int)height - 1
2160 || y2 >= (int)height - 1)
2161 printMe = true;
2162
2163 double weights[2][2];
2164
2165 weights[0][0] = weights[0][1] = 1.0 - frac(x - 0.5f);
2166 weights[1][0] = weights[1][1] = frac(x - 0.5f);
2167 weights[0][0] *= 1.0 - frac(y - 0.5f);
2168 weights[1][0] *= 1.0 - frac(y - 0.5f);
2169 weights[0][1] *= frac(y - 0.5f);
2170 weights[1][1] *= frac(y - 0.5f);
2171
2172 if (verbose)
2173 log_info("\tfrac( x - 0.5f ) = %f, frac( y - 0.5f ) = %f\n",
2174 frac(x - 0.5f), frac(y - 0.5f));
2175
2176 for (int i = 0; i < 3; i++)
2177 {
2178 outData[i] = (float)((upLeft[i] * weights[0][0])
2179 + (upRight[i] * weights[1][0])
2180 + (lowLeft[i] * weights[0][1])
2181 + (lowRight[i] * weights[1][1]));
2182 // flush subnormal results to zero if necessary
2183 if (NULL == containsDenorms && fabs(outData[i]) < FLT_MIN)
2184 outData[i] = copysignf(0.0f, outData[i]);
2185 }
2186 outData[3] = (float)((upLeft[3] * weights[0][0])
2187 + (upRight[3] * weights[1][0])
2188 + (lowLeft[3] * weights[0][1])
2189 + (lowRight[3] * weights[1][1]));
2190 // flush subnormal results to zero if necessary
2191 if (NULL == containsDenorms && fabs(outData[3]) < FLT_MIN)
2192 outData[3] = copysignf(0.0f, outData[3]);
2193 }
2194 else
2195 {
2196 // 3D linear filtering
2197 int x1 = adFn(static_cast<int>(floorf(x - 0.5f)), width_lod);
2198 int y1 = adFn(static_cast<int>(floorf(y - 0.5f)), height_lod);
2199 int z1 = adFn(static_cast<int>(floorf(z - 0.5f)), depth_lod);
2200 int x2 = adFn(static_cast<int>(floorf(x - 0.5f) + 1), width_lod);
2201 int y2 = adFn(static_cast<int>(floorf(y - 0.5f) + 1), height_lod);
2202 int z2 = adFn(static_cast<int>(floorf(z - 0.5f) + 1), depth_lod);
2203
2204 if (verbose)
2205 log_info("\tActual integer coords used (i = floor(x-.5)): "
2206 "i0:{%d, %d, %d} and i1:{%d, %d, %d}\n",
2207 x1, y1, z1, x2, y2, z2);
2208
2209 float upLeftA[4], upRightA[4], lowLeftA[4], lowRightA[4];
2210 float upLeftB[4], upRightB[4], lowLeftB[4], lowRightB[4];
2211 float pixelMaxA[4], pixelMaxB[4];
2212 read_image_pixel_float(imageData, imageInfo, x1, y1, z1, upLeftA,
2213 lod);
2214 read_image_pixel_float(imageData, imageInfo, x2, y1, z1, upRightA,
2215 lod);
2216 check_for_denorms(upLeftA, containsDenorms);
2217 check_for_denorms(upRightA, containsDenorms);
2218 pixelMax(upLeftA, upRightA, pixelMaxA);
2219 read_image_pixel_float(imageData, imageInfo, x1, y2, z1, lowLeftA,
2220 lod);
2221 read_image_pixel_float(imageData, imageInfo, x2, y2, z1, lowRightA,
2222 lod);
2223 check_for_denorms(lowLeftA, containsDenorms);
2224 check_for_denorms(lowRightA, containsDenorms);
2225 pixelMax(lowLeftA, lowRightA, pixelMaxB);
2226 pixelMax(pixelMaxA, pixelMaxB, returnVal.p);
2227 read_image_pixel_float(imageData, imageInfo, x1, y1, z2, upLeftB,
2228 lod);
2229 read_image_pixel_float(imageData, imageInfo, x2, y1, z2, upRightB,
2230 lod);
2231 check_for_denorms(upLeftB, containsDenorms);
2232 check_for_denorms(upRightB, containsDenorms);
2233 pixelMax(upLeftB, upRightB, pixelMaxA);
2234 read_image_pixel_float(imageData, imageInfo, x1, y2, z2, lowLeftB,
2235 lod);
2236 read_image_pixel_float(imageData, imageInfo, x2, y2, z2, lowRightB,
2237 lod);
2238 check_for_denorms(lowLeftB, containsDenorms);
2239 check_for_denorms(lowRightB, containsDenorms);
2240 pixelMax(lowLeftB, lowRightB, pixelMaxB);
2241 pixelMax(pixelMaxA, pixelMaxB, pixelMaxA);
2242 pixelMax(pixelMaxA, returnVal.p, returnVal.p);
2243
2244 if (verbose)
2245 {
2246 if (NULL == containsDenorms)
2247 log_info("\tSampled pixels (rgba order, denorms flushed to "
2248 "zero):\n");
2249 else
2250 log_info("\tSampled pixels (rgba order):\n");
2251 log_info("\t\tp000: %f, %f, %f, %f\n", upLeftA[0], upLeftA[1],
2252 upLeftA[2], upLeftA[3]);
2253 log_info("\t\tp001: %f, %f, %f, %f\n", upRightA[0], upRightA[1],
2254 upRightA[2], upRightA[3]);
2255 log_info("\t\tp010: %f, %f, %f, %f\n", lowLeftA[0], lowLeftA[1],
2256 lowLeftA[2], lowLeftA[3]);
2257 log_info("\t\tp011: %f, %f, %f, %f\n\n", lowRightA[0],
2258 lowRightA[1], lowRightA[2], lowRightA[3]);
2259 log_info("\t\tp100: %f, %f, %f, %f\n", upLeftB[0], upLeftB[1],
2260 upLeftB[2], upLeftB[3]);
2261 log_info("\t\tp101: %f, %f, %f, %f\n", upRightB[0], upRightB[1],
2262 upRightB[2], upRightB[3]);
2263 log_info("\t\tp110: %f, %f, %f, %f\n", lowLeftB[0], lowLeftB[1],
2264 lowLeftB[2], lowLeftB[3]);
2265 log_info("\t\tp111: %f, %f, %f, %f\n", lowRightB[0],
2266 lowRightB[1], lowRightB[2], lowRightB[3]);
2267 }
2268
2269 double weights[2][2][2];
2270
2271 float a = frac(x - 0.5f), b = frac(y - 0.5f), c = frac(z - 0.5f);
2272 weights[0][0][0] = weights[0][1][0] = weights[0][0][1] =
2273 weights[0][1][1] = 1.f - a;
2274 weights[1][0][0] = weights[1][1][0] = weights[1][0][1] =
2275 weights[1][1][1] = a;
2276 weights[0][0][0] *= 1.f - b;
2277 weights[1][0][0] *= 1.f - b;
2278 weights[0][0][1] *= 1.f - b;
2279 weights[1][0][1] *= 1.f - b;
2280 weights[0][1][0] *= b;
2281 weights[1][1][0] *= b;
2282 weights[0][1][1] *= b;
2283 weights[1][1][1] *= b;
2284 weights[0][0][0] *= 1.f - c;
2285 weights[0][1][0] *= 1.f - c;
2286 weights[1][0][0] *= 1.f - c;
2287 weights[1][1][0] *= 1.f - c;
2288 weights[0][0][1] *= c;
2289 weights[0][1][1] *= c;
2290 weights[1][0][1] *= c;
2291 weights[1][1][1] *= c;
2292
2293 if (verbose)
2294 log_info("\tfrac( x - 0.5f ) = %f, frac( y - 0.5f ) = %f, "
2295 "frac( z - 0.5f ) = %f\n",
2296 frac(x - 0.5f), frac(y - 0.5f), frac(z - 0.5f));
2297
2298 for (int i = 0; i < 3; i++)
2299 {
2300 outData[i] = (float)((upLeftA[i] * weights[0][0][0])
2301 + (upRightA[i] * weights[1][0][0])
2302 + (lowLeftA[i] * weights[0][1][0])
2303 + (lowRightA[i] * weights[1][1][0])
2304 + (upLeftB[i] * weights[0][0][1])
2305 + (upRightB[i] * weights[1][0][1])
2306 + (lowLeftB[i] * weights[0][1][1])
2307 + (lowRightB[i] * weights[1][1][1]));
2308 // flush subnormal results to zero if necessary
2309 if (NULL == containsDenorms && fabs(outData[i]) < FLT_MIN)
2310 outData[i] = copysignf(0.0f, outData[i]);
2311 }
2312 outData[3] = (float)((upLeftA[3] * weights[0][0][0])
2313 + (upRightA[3] * weights[1][0][0])
2314 + (lowLeftA[3] * weights[0][1][0])
2315 + (lowRightA[3] * weights[1][1][0])
2316 + (upLeftB[3] * weights[0][0][1])
2317 + (upRightB[3] * weights[1][0][1])
2318 + (lowLeftB[3] * weights[0][1][1])
2319 + (lowRightB[3] * weights[1][1][1]));
2320 // flush subnormal results to zero if necessary
2321 if (NULL == containsDenorms && fabs(outData[3]) < FLT_MIN)
2322 outData[3] = copysignf(0.0f, outData[3]);
2323 }
2324
2325 return returnVal;
2326 }
2327 }
2328
sample_image_pixel_float_offset(void * imageData,image_descriptor * imageInfo,float x,float y,float z,float xAddressOffset,float yAddressOffset,float zAddressOffset,image_sampler_data * imageSampler,float * outData,int verbose,int * containsDenorms)2329 FloatPixel sample_image_pixel_float_offset(
2330 void *imageData, image_descriptor *imageInfo, float x, float y, float z,
2331 float xAddressOffset, float yAddressOffset, float zAddressOffset,
2332 image_sampler_data *imageSampler, float *outData, int verbose,
2333 int *containsDenorms)
2334 {
2335 return sample_image_pixel_float_offset(
2336 imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset,
2337 zAddressOffset, imageSampler, outData, verbose, containsDenorms, 0);
2338 }
2339
2340
debug_find_vector_in_image(void * imagePtr,image_descriptor * imageInfo,void * vectorToFind,size_t vectorSize,int * outX,int * outY,int * outZ,size_t lod)2341 int debug_find_vector_in_image(void *imagePtr, image_descriptor *imageInfo,
2342 void *vectorToFind, size_t vectorSize, int *outX,
2343 int *outY, int *outZ, size_t lod)
2344 {
2345 int foundCount = 0;
2346 char *iPtr = (char *)imagePtr;
2347 size_t width;
2348 size_t depth;
2349 size_t height;
2350 size_t row_pitch;
2351 size_t slice_pitch;
2352
2353 switch (imageInfo->type)
2354 {
2355 case CL_MEM_OBJECT_IMAGE1D:
2356 width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2357 height = 1;
2358 depth = 1;
2359 break;
2360 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
2361 width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2362 height = 1;
2363 depth = imageInfo->arraySize;
2364 break;
2365 case CL_MEM_OBJECT_IMAGE2D:
2366 width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2367 height =
2368 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
2369 depth = 1;
2370 break;
2371 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
2372 width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2373 height =
2374 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
2375 depth = imageInfo->arraySize;
2376 break;
2377 case CL_MEM_OBJECT_IMAGE3D:
2378 width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2379 height =
2380 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
2381 depth = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
2382 break;
2383 }
2384
2385 row_pitch = width * get_pixel_size(imageInfo->format);
2386 slice_pitch = row_pitch * height;
2387
2388 for (size_t z = 0; z < depth; z++)
2389 {
2390 for (size_t y = 0; y < height; y++)
2391 {
2392 for (size_t x = 0; x < width; x++)
2393 {
2394 if (memcmp(iPtr, vectorToFind, vectorSize) == 0)
2395 {
2396 if (foundCount == 0)
2397 {
2398 *outX = (int)x;
2399 if (outY != NULL) *outY = (int)y;
2400 if (outZ != NULL) *outZ = (int)z;
2401 }
2402 foundCount++;
2403 }
2404 iPtr += vectorSize;
2405 }
2406 iPtr += row_pitch - (width * vectorSize);
2407 }
2408 iPtr += slice_pitch - (height * row_pitch);
2409 }
2410 return foundCount;
2411 }
2412
debug_find_pixel_in_image(void * imagePtr,image_descriptor * imageInfo,unsigned int * valuesToFind,int * outX,int * outY,int * outZ,int lod)2413 int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
2414 unsigned int *valuesToFind, int *outX, int *outY,
2415 int *outZ, int lod)
2416 {
2417 char vectorToFind[4 * 4];
2418 size_t vectorSize = get_format_channel_count(imageInfo->format);
2419
2420
2421 if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT8)
2422 {
2423 unsigned char *p = (unsigned char *)vectorToFind;
2424 for (unsigned int i = 0; i < vectorSize; i++)
2425 p[i] = (unsigned char)valuesToFind[i];
2426 }
2427 else if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT16)
2428 {
2429 unsigned short *p = (unsigned short *)vectorToFind;
2430 for (unsigned int i = 0; i < vectorSize; i++)
2431 p[i] = (unsigned short)valuesToFind[i];
2432 vectorSize *= 2;
2433 }
2434 else if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT32)
2435 {
2436 unsigned int *p = (unsigned int *)vectorToFind;
2437 for (unsigned int i = 0; i < vectorSize; i++)
2438 p[i] = (unsigned int)valuesToFind[i];
2439 vectorSize *= 4;
2440 }
2441 else
2442 {
2443 log_info("WARNING: Unable to search for debug pixel: invalid image "
2444 "format\n");
2445 return false;
2446 }
2447 return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
2448 vectorSize, outX, outY, outZ, lod);
2449 }
2450
debug_find_pixel_in_image(void * imagePtr,image_descriptor * imageInfo,int * valuesToFind,int * outX,int * outY,int * outZ,int lod)2451 int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
2452 int *valuesToFind, int *outX, int *outY,
2453 int *outZ, int lod)
2454 {
2455 char vectorToFind[4 * 4];
2456 size_t vectorSize = get_format_channel_count(imageInfo->format);
2457
2458 if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT8)
2459 {
2460 char *p = (char *)vectorToFind;
2461 for (unsigned int i = 0; i < vectorSize; i++)
2462 p[i] = (char)valuesToFind[i];
2463 }
2464 else if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT16)
2465 {
2466 short *p = (short *)vectorToFind;
2467 for (unsigned int i = 0; i < vectorSize; i++)
2468 p[i] = (short)valuesToFind[i];
2469 vectorSize *= 2;
2470 }
2471 else if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT32)
2472 {
2473 int *p = (int *)vectorToFind;
2474 for (unsigned int i = 0; i < vectorSize; i++)
2475 p[i] = (int)valuesToFind[i];
2476 vectorSize *= 4;
2477 }
2478 else
2479 {
2480 log_info("WARNING: Unable to search for debug pixel: invalid image "
2481 "format\n");
2482 return false;
2483 }
2484 return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
2485 vectorSize, outX, outY, outZ, lod);
2486 }
2487
debug_find_pixel_in_image(void * imagePtr,image_descriptor * imageInfo,float * valuesToFind,int * outX,int * outY,int * outZ,int lod)2488 int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
2489 float *valuesToFind, int *outX, int *outY,
2490 int *outZ, int lod)
2491 {
2492 char vectorToFind[4 * 4];
2493 float swizzled[4];
2494 memcpy(swizzled, valuesToFind, sizeof(swizzled));
2495 size_t vectorSize = get_pixel_size(imageInfo->format);
2496 pack_image_pixel(swizzled, imageInfo->format, vectorToFind);
2497 return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
2498 vectorSize, outX, outY, outZ, lod);
2499 }
2500
2501 template <class T>
swizzle_vector_for_image(T * srcVector,const cl_image_format * imageFormat)2502 void swizzle_vector_for_image(T *srcVector, const cl_image_format *imageFormat)
2503 {
2504 T temp;
2505 switch (imageFormat->image_channel_order)
2506 {
2507 case CL_A: srcVector[0] = srcVector[3]; break;
2508 case CL_R:
2509 case CL_Rx:
2510 case CL_RG:
2511 case CL_RGx:
2512 case CL_RGB:
2513 case CL_RGBx:
2514 case CL_RGBA:
2515 case CL_sRGB:
2516 case CL_sRGBx:
2517 case CL_sRGBA: break;
2518 case CL_RA: srcVector[1] = srcVector[3]; break;
2519 case CL_ARGB:
2520 temp = srcVector[3];
2521 srcVector[3] = srcVector[2];
2522 srcVector[2] = srcVector[1];
2523 srcVector[1] = srcVector[0];
2524 srcVector[0] = temp;
2525 break;
2526 case CL_ABGR:
2527 temp = srcVector[3];
2528 srcVector[3] = srcVector[0];
2529 srcVector[0] = temp;
2530 temp = srcVector[2];
2531 srcVector[2] = srcVector[1];
2532 srcVector[1] = temp;
2533 break;
2534 case CL_BGRA:
2535 case CL_sBGRA:
2536 temp = srcVector[0];
2537 srcVector[0] = srcVector[2];
2538 srcVector[2] = temp;
2539 break;
2540 case CL_INTENSITY:
2541 srcVector[3] = srcVector[0];
2542 srcVector[2] = srcVector[0];
2543 srcVector[1] = srcVector[0];
2544 break;
2545 case CL_LUMINANCE:
2546 srcVector[2] = srcVector[0];
2547 srcVector[1] = srcVector[0];
2548 break;
2549 #ifdef CL_1RGB_APPLE
2550 case CL_1RGB_APPLE:
2551 temp = srcVector[3];
2552 srcVector[3] = srcVector[2];
2553 srcVector[2] = srcVector[1];
2554 srcVector[1] = srcVector[0];
2555 srcVector[0] = temp;
2556 break;
2557 #endif
2558 #ifdef CL_BGR1_APPLE
2559 case CL_BGR1_APPLE:
2560 temp = srcVector[0];
2561 srcVector[0] = srcVector[2];
2562 srcVector[2] = temp;
2563 break;
2564 #endif
2565 }
2566 }
2567
2568 #define SATURATE(v, min, max) (v < min ? min : (v > max ? max : v))
2569
pack_image_pixel(unsigned int * srcVector,const cl_image_format * imageFormat,void * outData)2570 void pack_image_pixel(unsigned int *srcVector,
2571 const cl_image_format *imageFormat, void *outData)
2572 {
2573 swizzle_vector_for_image<unsigned int>(srcVector, imageFormat);
2574 size_t channelCount = get_format_channel_count(imageFormat);
2575
2576 switch (imageFormat->image_channel_data_type)
2577 {
2578 case CL_UNSIGNED_INT8: {
2579 unsigned char *ptr = (unsigned char *)outData;
2580 for (unsigned int i = 0; i < channelCount; i++)
2581 ptr[i] = (unsigned char)SATURATE(srcVector[i], 0, 255);
2582 break;
2583 }
2584 case CL_UNSIGNED_INT16: {
2585 unsigned short *ptr = (unsigned short *)outData;
2586 for (unsigned int i = 0; i < channelCount; i++)
2587 ptr[i] = (unsigned short)SATURATE(srcVector[i], 0, 65535);
2588 break;
2589 }
2590 case CL_UNSIGNED_INT32: {
2591 unsigned int *ptr = (unsigned int *)outData;
2592 for (unsigned int i = 0; i < channelCount; i++)
2593 ptr[i] = (unsigned int)srcVector[i];
2594 break;
2595 }
2596 default: break;
2597 }
2598 }
2599
pack_image_pixel(int * srcVector,const cl_image_format * imageFormat,void * outData)2600 void pack_image_pixel(int *srcVector, const cl_image_format *imageFormat,
2601 void *outData)
2602 {
2603 swizzle_vector_for_image<int>(srcVector, imageFormat);
2604 size_t chanelCount = get_format_channel_count(imageFormat);
2605
2606 switch (imageFormat->image_channel_data_type)
2607 {
2608 case CL_SIGNED_INT8: {
2609 char *ptr = (char *)outData;
2610 for (unsigned int i = 0; i < chanelCount; i++)
2611 ptr[i] = (char)SATURATE(srcVector[i], -128, 127);
2612 break;
2613 }
2614 case CL_SIGNED_INT16: {
2615 short *ptr = (short *)outData;
2616 for (unsigned int i = 0; i < chanelCount; i++)
2617 ptr[i] = (short)SATURATE(srcVector[i], -32768, 32767);
2618 break;
2619 }
2620 case CL_SIGNED_INT32: {
2621 int *ptr = (int *)outData;
2622 for (unsigned int i = 0; i < chanelCount; i++)
2623 ptr[i] = (int)srcVector[i];
2624 break;
2625 }
2626 default: break;
2627 }
2628 }
2629
round_to_even(float v)2630 cl_int round_to_even(float v)
2631 {
2632 // clamp overflow
2633 if (v >= -(float)CL_INT_MIN) return CL_INT_MAX;
2634 if (v <= (float)CL_INT_MIN) return CL_INT_MIN;
2635
2636 // round fractional values to integer value
2637 if (fabsf(v) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23))
2638 {
2639 static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23),
2640 MAKE_HEX_FLOAT(-0x1.0p23f, -0x1L, 23) };
2641 float magicVal = magic[v < 0.0f];
2642 v += magicVal;
2643 v -= magicVal;
2644 }
2645
2646 return (cl_int)v;
2647 }
2648
pack_image_pixel(float * srcVector,const cl_image_format * imageFormat,void * outData)2649 void pack_image_pixel(float *srcVector, const cl_image_format *imageFormat,
2650 void *outData)
2651 {
2652 swizzle_vector_for_image<float>(srcVector, imageFormat);
2653 size_t channelCount = get_format_channel_count(imageFormat);
2654 switch (imageFormat->image_channel_data_type)
2655 {
2656 case CL_HALF_FLOAT: {
2657 cl_half *ptr = (cl_half *)outData;
2658
2659 switch (gFloatToHalfRoundingMode)
2660 {
2661 case kRoundToNearestEven:
2662 for (unsigned int i = 0; i < channelCount; i++)
2663 ptr[i] = cl_half_from_float(srcVector[i], CL_HALF_RTE);
2664 break;
2665 case kRoundTowardZero:
2666 for (unsigned int i = 0; i < channelCount; i++)
2667 ptr[i] = cl_half_from_float(srcVector[i], CL_HALF_RTZ);
2668 break;
2669 default:
2670 log_error("ERROR: Test internal error -- unhandled or "
2671 "unknown float->half rounding mode.\n");
2672 exit(-1);
2673 break;
2674 }
2675 break;
2676 }
2677
2678 case CL_FLOAT: {
2679 cl_float *ptr = (cl_float *)outData;
2680 for (unsigned int i = 0; i < channelCount; i++)
2681 ptr[i] = srcVector[i];
2682 break;
2683 }
2684
2685 case CL_SNORM_INT8: {
2686 cl_char *ptr = (cl_char *)outData;
2687 for (unsigned int i = 0; i < channelCount; i++)
2688 ptr[i] =
2689 (cl_char)NORMALIZE_SIGNED(srcVector[i], -127.0f, 127.f);
2690 break;
2691 }
2692 case CL_SNORM_INT16: {
2693 cl_short *ptr = (cl_short *)outData;
2694 for (unsigned int i = 0; i < channelCount; i++)
2695 ptr[i] =
2696 (short)NORMALIZE_SIGNED(srcVector[i], -32767.f, 32767.f);
2697 break;
2698 }
2699 case CL_UNORM_INT8: {
2700 cl_uchar *ptr = (cl_uchar *)outData;
2701 if (is_sRGBA_order(imageFormat->image_channel_order))
2702 {
2703 ptr[0] = (unsigned char)(sRGBmap(srcVector[0]) + 0.5);
2704 ptr[1] = (unsigned char)(sRGBmap(srcVector[1]) + 0.5);
2705 ptr[2] = (unsigned char)(sRGBmap(srcVector[2]) + 0.5);
2706 if (channelCount == 4)
2707 ptr[3] = (unsigned char)NORMALIZE(srcVector[3], 255.f);
2708 }
2709 else
2710 {
2711 for (unsigned int i = 0; i < channelCount; i++)
2712 ptr[i] = (unsigned char)NORMALIZE(srcVector[i], 255.f);
2713 }
2714 #ifdef CL_1RGB_APPLE
2715 if (imageFormat->image_channel_order == CL_1RGB_APPLE)
2716 ptr[0] = 255.0f;
2717 #endif
2718 #ifdef CL_BGR1_APPLE
2719 if (imageFormat->image_channel_order == CL_BGR1_APPLE)
2720 ptr[3] = 255.0f;
2721 #endif
2722 break;
2723 }
2724 case CL_UNORM_INT16: {
2725 cl_ushort *ptr = (cl_ushort *)outData;
2726 for (unsigned int i = 0; i < channelCount; i++)
2727 ptr[i] = (unsigned short)NORMALIZE(srcVector[i], 65535.f);
2728 break;
2729 }
2730 case CL_UNORM_SHORT_555: {
2731 cl_ushort *ptr = (cl_ushort *)outData;
2732 ptr[0] =
2733 (((unsigned short)NORMALIZE(srcVector[0], 31.f) & 31) << 10)
2734 | (((unsigned short)NORMALIZE(srcVector[1], 31.f) & 31) << 5)
2735 | (((unsigned short)NORMALIZE(srcVector[2], 31.f) & 31) << 0);
2736 break;
2737 }
2738 case CL_UNORM_SHORT_565: {
2739 cl_ushort *ptr = (cl_ushort *)outData;
2740 ptr[0] =
2741 (((unsigned short)NORMALIZE(srcVector[0], 31.f) & 31) << 11)
2742 | (((unsigned short)NORMALIZE(srcVector[1], 63.f) & 63) << 5)
2743 | (((unsigned short)NORMALIZE(srcVector[2], 31.f) & 31) << 0);
2744 break;
2745 }
2746 case CL_UNORM_INT_101010: {
2747 cl_uint *ptr = (cl_uint *)outData;
2748 ptr[0] =
2749 (((unsigned int)NORMALIZE(srcVector[0], 1023.f) & 1023) << 20)
2750 | (((unsigned int)NORMALIZE(srcVector[1], 1023.f) & 1023) << 10)
2751 | (((unsigned int)NORMALIZE(srcVector[2], 1023.f) & 1023) << 0);
2752 break;
2753 }
2754 case CL_SIGNED_INT8: {
2755 cl_char *ptr = (cl_char *)outData;
2756 for (unsigned int i = 0; i < channelCount; i++)
2757 ptr[i] =
2758 (cl_char)CONVERT_INT(srcVector[i], -127.0f, 127.f, 127);
2759 break;
2760 }
2761 case CL_SIGNED_INT16: {
2762 cl_short *ptr = (cl_short *)outData;
2763 for (unsigned int i = 0; i < channelCount; i++)
2764 ptr[i] =
2765 (short)CONVERT_INT(srcVector[i], -32767.f, 32767.f, 32767);
2766 break;
2767 }
2768 case CL_SIGNED_INT32: {
2769 cl_int *ptr = (cl_int *)outData;
2770 for (unsigned int i = 0; i < channelCount; i++)
2771 ptr[i] = round_to_even(srcVector[i]);
2772 break;
2773 }
2774 case CL_UNSIGNED_INT8: {
2775 cl_uchar *ptr = (cl_uchar *)outData;
2776 for (unsigned int i = 0; i < channelCount; i++)
2777 ptr[i] =
2778 (cl_uchar)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX);
2779 break;
2780 }
2781 case CL_UNSIGNED_INT16: {
2782 cl_ushort *ptr = (cl_ushort *)outData;
2783 for (unsigned int i = 0; i < channelCount; i++)
2784 ptr[i] = (cl_ushort)CONVERT_UINT(srcVector[i], 32767.f,
2785 CL_USHRT_MAX);
2786 break;
2787 }
2788 case CL_UNSIGNED_INT32: {
2789 cl_uint *ptr = (cl_uint *)outData;
2790 for (unsigned int i = 0; i < channelCount; i++)
2791 ptr[i] = (cl_uint)CONVERT_UINT(
2792 srcVector[i],
2793 MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffe, 31 - 23),
2794 CL_UINT_MAX);
2795 break;
2796 }
2797 #ifdef CL_SFIXED14_APPLE
2798 case CL_SFIXED14_APPLE: {
2799 cl_ushort *ptr = (cl_ushort *)outData;
2800 for (unsigned int i = 0; i < channelCount; i++)
2801 {
2802 cl_float f = fmaxf(srcVector[i], -1.0f);
2803 f = fminf(f, 3.0f);
2804 cl_int d = rintf(f * 0x1.0p14f);
2805 d += 16384;
2806 if (d > CL_USHRT_MAX) d = CL_USHRT_MAX;
2807 ptr[i] = d;
2808 }
2809 break;
2810 }
2811 #endif
2812 default:
2813 log_error("INTERNAL ERROR: unknown format (%d)\n",
2814 imageFormat->image_channel_data_type);
2815 exit(-1);
2816 break;
2817 }
2818 }
2819
pack_image_pixel_error(const float * srcVector,const cl_image_format * imageFormat,const void * results,float * errors)2820 void pack_image_pixel_error(const float *srcVector,
2821 const cl_image_format *imageFormat,
2822 const void *results, float *errors)
2823 {
2824 size_t channelCount = get_format_channel_count(imageFormat);
2825 switch (imageFormat->image_channel_data_type)
2826 {
2827 case CL_HALF_FLOAT: {
2828 const cl_half *ptr = (const cl_half *)results;
2829
2830 for (unsigned int i = 0; i < channelCount; i++)
2831 errors[i] = Ulp_Error_Half(ptr[i], srcVector[i]);
2832
2833 break;
2834 }
2835
2836 case CL_FLOAT: {
2837 const cl_ushort *ptr = (const cl_ushort *)results;
2838
2839 for (unsigned int i = 0; i < channelCount; i++)
2840 errors[i] = Ulp_Error(ptr[i], srcVector[i]);
2841
2842 break;
2843 }
2844
2845 case CL_SNORM_INT8: {
2846 const cl_char *ptr = (const cl_char *)results;
2847
2848 for (unsigned int i = 0; i < channelCount; i++)
2849 errors[i] = ptr[i]
2850 - NORMALIZE_SIGNED_UNROUNDED(srcVector[i], -127.0f, 127.f);
2851
2852 break;
2853 }
2854 case CL_SNORM_INT16: {
2855 const cl_short *ptr = (const cl_short *)results;
2856
2857 for (unsigned int i = 0; i < channelCount; i++)
2858 errors[i] = ptr[i]
2859 - NORMALIZE_SIGNED_UNROUNDED(srcVector[i], -32767.f,
2860 32767.f);
2861
2862 break;
2863 }
2864 case CL_UNORM_INT8: {
2865 const cl_uchar *ptr = (const cl_uchar *)results;
2866
2867 for (unsigned int i = 0; i < channelCount; i++)
2868 errors[i] = ptr[i] - NORMALIZE_UNROUNDED(srcVector[i], 255.f);
2869
2870 break;
2871 }
2872 case CL_UNORM_INT16: {
2873 const cl_ushort *ptr = (const cl_ushort *)results;
2874
2875 for (unsigned int i = 0; i < channelCount; i++)
2876 errors[i] = ptr[i] - NORMALIZE_UNROUNDED(srcVector[i], 65535.f);
2877
2878 break;
2879 }
2880 case CL_UNORM_SHORT_555: {
2881 const cl_ushort *ptr = (const cl_ushort *)results;
2882
2883 errors[0] =
2884 ((ptr[0] >> 10) & 31) - NORMALIZE_UNROUNDED(srcVector[0], 31.f);
2885 errors[1] =
2886 ((ptr[0] >> 5) & 31) - NORMALIZE_UNROUNDED(srcVector[1], 31.f);
2887 errors[2] =
2888 ((ptr[0] >> 0) & 31) - NORMALIZE_UNROUNDED(srcVector[2], 31.f);
2889
2890 break;
2891 }
2892 case CL_UNORM_SHORT_565: {
2893 const cl_ushort *ptr = (const cl_ushort *)results;
2894
2895 errors[0] =
2896 ((ptr[0] >> 11) & 31) - NORMALIZE_UNROUNDED(srcVector[0], 31.f);
2897 errors[1] =
2898 ((ptr[0] >> 5) & 63) - NORMALIZE_UNROUNDED(srcVector[1], 63.f);
2899 errors[2] =
2900 ((ptr[0] >> 0) & 31) - NORMALIZE_UNROUNDED(srcVector[2], 31.f);
2901
2902 break;
2903 }
2904 case CL_UNORM_INT_101010: {
2905 const cl_uint *ptr = (const cl_uint *)results;
2906
2907 errors[0] = ((ptr[0] >> 20) & 1023)
2908 - NORMALIZE_UNROUNDED(srcVector[0], 1023.f);
2909 errors[1] = ((ptr[0] >> 10) & 1023)
2910 - NORMALIZE_UNROUNDED(srcVector[1], 1023.f);
2911 errors[2] = ((ptr[0] >> 0) & 1023)
2912 - NORMALIZE_UNROUNDED(srcVector[2], 1023.f);
2913
2914 break;
2915 }
2916 case CL_SIGNED_INT8: {
2917 const cl_char *ptr = (const cl_char *)results;
2918
2919 for (unsigned int i = 0; i < channelCount; i++)
2920 errors[i] =
2921 ptr[i] - CONVERT_INT(srcVector[i], -127.0f, 127.f, 127);
2922
2923 break;
2924 }
2925 case CL_SIGNED_INT16: {
2926 const cl_short *ptr = (const cl_short *)results;
2927 for (unsigned int i = 0; i < channelCount; i++)
2928 errors[i] = ptr[i]
2929 - CONVERT_INT(srcVector[i], -32767.f, 32767.f, 32767);
2930 break;
2931 }
2932 case CL_SIGNED_INT32: {
2933 const cl_int *ptr = (const cl_int *)results;
2934 for (unsigned int i = 0; i < channelCount; i++)
2935 errors[i] = (cl_float)((cl_long)ptr[i]
2936 - (cl_long)round_to_even(srcVector[i]));
2937 break;
2938 }
2939 case CL_UNSIGNED_INT8: {
2940 const cl_uchar *ptr = (const cl_uchar *)results;
2941 for (unsigned int i = 0; i < channelCount; i++)
2942 errors[i] = static_cast<float>(
2943 (cl_int)ptr[i]
2944 - (cl_int)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX));
2945 break;
2946 }
2947 case CL_UNSIGNED_INT16: {
2948 const cl_ushort *ptr = (const cl_ushort *)results;
2949 for (unsigned int i = 0; i < channelCount; i++)
2950 errors[i] = static_cast<float>(
2951 (cl_int)ptr[i]
2952 - (cl_int)CONVERT_UINT(srcVector[i], 32767.f,
2953 CL_USHRT_MAX));
2954 break;
2955 }
2956 case CL_UNSIGNED_INT32: {
2957 const cl_uint *ptr = (const cl_uint *)results;
2958 for (unsigned int i = 0; i < channelCount; i++)
2959 errors[i] = (cl_float)(
2960 (cl_long)ptr[i]
2961 - (cl_long)CONVERT_UINT(
2962 srcVector[i],
2963 MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffe, 31 - 23),
2964 CL_UINT_MAX));
2965 break;
2966 }
2967 #ifdef CL_SFIXED14_APPLE
2968 case CL_SFIXED14_APPLE: {
2969 const cl_ushort *ptr = (const cl_ushort *)results;
2970
2971 for (unsigned int i = 0; i < channelCount; i++)
2972 errors[i] = ptr[i]
2973 - NORMALIZE_SIGNED_UNROUNDED(((int)srcVector[i] - 16384),
2974 -16384.f, 49151.f);
2975
2976 break;
2977 }
2978 #endif
2979 default:
2980 log_error("INTERNAL ERROR: unknown format (%d)\n",
2981 imageFormat->image_channel_data_type);
2982 exit(-1);
2983 break;
2984 }
2985 }
2986
2987
2988 //
2989 // Autodetect which rounding mode is used for image writes to CL_HALF_FLOAT
2990 // This should be called lazily before attempting to verify image writes,
2991 // otherwise an error will occur.
2992 //
DetectFloatToHalfRoundingMode(cl_command_queue q)2993 int DetectFloatToHalfRoundingMode(
2994 cl_command_queue q) // Returns CL_SUCCESS on success
2995 {
2996 cl_int err = CL_SUCCESS;
2997
2998 if (gFloatToHalfRoundingMode == kDefaultRoundingMode)
2999 {
3000 // Some numbers near 0.5f, that we look at to see how the values are
3001 // rounded.
3002 static const cl_uint inData[4 * 4] = {
3003 0x3f000fffU, 0x3f001000U, 0x3f001001U, 0U,
3004 0x3f001fffU, 0x3f002000U, 0x3f002001U, 0U,
3005 0x3f002fffU, 0x3f003000U, 0x3f003001U, 0U,
3006 0x3f003fffU, 0x3f004000U, 0x3f004001U, 0U
3007 };
3008 static const size_t count = sizeof(inData) / (4 * sizeof(inData[0]));
3009 const float *inp = (const float *)inData;
3010 cl_context context = NULL;
3011
3012 // Create an input buffer
3013 err = clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(context),
3014 &context, NULL);
3015 if (err)
3016 {
3017 log_error("Error: could not get context from command queue in "
3018 "DetectFloatToHalfRoundingMode (%d)",
3019 err);
3020 return err;
3021 }
3022
3023 cl_mem inBuf = clCreateBuffer(context,
3024 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR
3025 | CL_MEM_ALLOC_HOST_PTR,
3026 sizeof(inData), (void *)inData, &err);
3027 if (NULL == inBuf || err)
3028 {
3029 log_error("Error: could not create input buffer in "
3030 "DetectFloatToHalfRoundingMode (err: %d)",
3031 err);
3032 return err;
3033 }
3034
3035 // Create a small output image
3036 cl_image_format fmt = { CL_RGBA, CL_HALF_FLOAT };
3037 cl_mem outImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &fmt,
3038 count, 1, 0, NULL, &err);
3039 if (NULL == outImage || err)
3040 {
3041 log_error("Error: could not create half float out image in "
3042 "DetectFloatToHalfRoundingMode (err: %d)",
3043 err);
3044 clReleaseMemObject(inBuf);
3045 return err;
3046 }
3047
3048 // Create our program, and a kernel
3049 const char *kernelSource[1] = {
3050 "kernel void detect_round( global float4 *in, write_only image2d_t "
3051 "out )\n"
3052 "{\n"
3053 " write_imagef( out, (int2)(get_global_id(0),0), "
3054 "in[get_global_id(0)] );\n"
3055 "}\n"
3056 };
3057
3058 clProgramWrapper program;
3059 clKernelWrapper kernel;
3060 err = create_single_kernel_helper(context, &program, &kernel, 1,
3061 kernelSource, "detect_round");
3062
3063 if (NULL == program || err)
3064 {
3065 log_error("Error: could not create program in "
3066 "DetectFloatToHalfRoundingMode (err: %d)",
3067 err);
3068 clReleaseMemObject(inBuf);
3069 clReleaseMemObject(outImage);
3070 return err;
3071 }
3072
3073 cl_device_id device = NULL;
3074 err = clGetCommandQueueInfo(q, CL_QUEUE_DEVICE, sizeof(device), &device,
3075 NULL);
3076 if (err)
3077 {
3078 log_error("Error: could not get device from command queue in "
3079 "DetectFloatToHalfRoundingMode (%d)",
3080 err);
3081 clReleaseMemObject(inBuf);
3082 clReleaseMemObject(outImage);
3083 return err;
3084 }
3085
3086 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inBuf);
3087 if (err)
3088 {
3089 log_error("Error: could not set argument 0 of kernel in "
3090 "DetectFloatToHalfRoundingMode (%d)",
3091 err);
3092 clReleaseMemObject(inBuf);
3093 clReleaseMemObject(outImage);
3094 return err;
3095 }
3096
3097 err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outImage);
3098 if (err)
3099 {
3100 log_error("Error: could not set argument 1 of kernel in "
3101 "DetectFloatToHalfRoundingMode (%d)",
3102 err);
3103 clReleaseMemObject(inBuf);
3104 clReleaseMemObject(outImage);
3105 return err;
3106 }
3107
3108 // Run the kernel
3109 size_t global_work_size = count;
3110 err = clEnqueueNDRangeKernel(q, kernel, 1, NULL, &global_work_size,
3111 NULL, 0, NULL, NULL);
3112 if (err)
3113 {
3114 log_error("Error: could not enqueue kernel in "
3115 "DetectFloatToHalfRoundingMode (%d)",
3116 err);
3117 clReleaseMemObject(inBuf);
3118 clReleaseMemObject(outImage);
3119 return err;
3120 }
3121
3122 // read the results
3123 cl_half outBuf[count * 4];
3124 memset(outBuf, -1, sizeof(outBuf));
3125 size_t origin[3] = { 0, 0, 0 };
3126 size_t region[3] = { count, 1, 1 };
3127 err = clEnqueueReadImage(q, outImage, CL_TRUE, origin, region, 0, 0,
3128 outBuf, 0, NULL, NULL);
3129 if (err)
3130 {
3131 log_error("Error: could not read output image in "
3132 "DetectFloatToHalfRoundingMode (%d)",
3133 err);
3134 clReleaseMemObject(inBuf);
3135 clReleaseMemObject(outImage);
3136 return err;
3137 }
3138
3139 // Generate our list of reference results
3140 cl_half rte_ref[count * 4];
3141 cl_half rtz_ref[count * 4];
3142 for (size_t i = 0; i < 4 * count; i++)
3143 {
3144 rte_ref[i] = cl_half_from_float(inp[i], CL_HALF_RTE);
3145 rtz_ref[i] = cl_half_from_float(inp[i], CL_HALF_RTZ);
3146 }
3147
3148 // Verify that we got something in either rtz or rte mode
3149 if (0 == memcmp(rte_ref, outBuf, sizeof(rte_ref)))
3150 {
3151 log_info("Autodetected float->half rounding mode to be rte\n");
3152 gFloatToHalfRoundingMode = kRoundToNearestEven;
3153 }
3154 else if (0 == memcmp(rtz_ref, outBuf, sizeof(rtz_ref)))
3155 {
3156 log_info("Autodetected float->half rounding mode to be rtz\n");
3157 gFloatToHalfRoundingMode = kRoundTowardZero;
3158 }
3159 else
3160 {
3161 log_error("ERROR: float to half conversions proceed with invalid "
3162 "rounding mode!\n");
3163 log_info("\nfor:");
3164 for (size_t i = 0; i < count; i++)
3165 log_info(" {%a, %a, %a, %a},", inp[4 * i], inp[4 * i + 1],
3166 inp[4 * i + 2], inp[4 * i + 3]);
3167 log_info("\ngot:");
3168 for (size_t i = 0; i < count; i++)
3169 log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
3170 outBuf[4 * i], outBuf[4 * i + 1], outBuf[4 * i + 2],
3171 outBuf[4 * i + 3]);
3172 log_info("\nrte:");
3173 for (size_t i = 0; i < count; i++)
3174 log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
3175 rte_ref[4 * i], rte_ref[4 * i + 1], rte_ref[4 * i + 2],
3176 rte_ref[4 * i + 3]);
3177 log_info("\nrtz:");
3178 for (size_t i = 0; i < count; i++)
3179 log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
3180 rtz_ref[4 * i], rtz_ref[4 * i + 1], rtz_ref[4 * i + 2],
3181 rtz_ref[4 * i + 3]);
3182 log_info("\n");
3183 err = -1;
3184 gFloatToHalfRoundingMode = kRoundingModeCount; // illegal value
3185 }
3186
3187 // clean up
3188 clReleaseMemObject(inBuf);
3189 clReleaseMemObject(outImage);
3190 return err;
3191 }
3192
3193 // Make sure that the rounding mode was successfully detected, if we checked
3194 // earlier
3195 if (gFloatToHalfRoundingMode != kRoundToNearestEven
3196 && gFloatToHalfRoundingMode != kRoundTowardZero)
3197 return -2;
3198
3199 return err;
3200 }
3201
create_random_image_data(ExplicitType dataType,image_descriptor * imageInfo,BufferOwningPtr<char> & P,MTdata d,bool image2DFromBuffer)3202 char *create_random_image_data(ExplicitType dataType,
3203 image_descriptor *imageInfo,
3204 BufferOwningPtr<char> &P, MTdata d,
3205 bool image2DFromBuffer)
3206 {
3207 size_t allocSize, numPixels;
3208 if (/*gTestMipmaps*/ imageInfo->num_mip_levels > 1)
3209 {
3210 allocSize = (size_t)(compute_mipmapped_image_size(*imageInfo) * 4
3211 * get_explicit_type_size(dataType))
3212 / get_pixel_size(imageInfo->format);
3213 numPixels = allocSize / (get_explicit_type_size(dataType) * 4);
3214 }
3215 else
3216 {
3217 numPixels = (image2DFromBuffer ? imageInfo->rowPitch : imageInfo->width)
3218 * imageInfo->height * (imageInfo->depth ? imageInfo->depth : 1)
3219 * (imageInfo->arraySize ? imageInfo->arraySize : 1);
3220 allocSize = numPixels * 4 * get_explicit_type_size(dataType);
3221 }
3222
3223 #if 0 // DEBUG
3224 {
3225 fprintf(stderr,"--- create_random_image_data:\n");
3226 fprintf(stderr,"allocSize = %zu\n",allocSize);
3227 fprintf(stderr,"numPixels = %zu\n",numPixels);
3228 fprintf(stderr,"width = %zu\n",imageInfo->width);
3229 fprintf(stderr,"height = %zu\n",imageInfo->height);
3230 fprintf(stderr,"depth = %zu\n",imageInfo->depth);
3231 fprintf(stderr,"rowPitch = %zu\n",imageInfo->rowPitch);
3232 fprintf(stderr,"slicePitch = %zu\n",imageInfo->slicePitch);
3233 fprintf(stderr,"arraySize = %zu\n",imageInfo->arraySize);
3234 fprintf(stderr,"explicit_type_size = %zu\n",get_explicit_type_size(dataType));
3235 }
3236 #endif
3237
3238 #if defined(__APPLE__)
3239 char *data = NULL;
3240 if (gDeviceType == CL_DEVICE_TYPE_CPU)
3241 {
3242 size_t mapSize =
3243 ((allocSize + 4095L) & -4096L) + 8192; // alloc two extra pages.
3244
3245 void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE,
3246 MAP_ANON | MAP_PRIVATE, 0, 0);
3247 if (map == MAP_FAILED)
3248 {
3249 perror("create_random_image_data: mmap");
3250 log_error("%s:%d: mmap failed, mapSize = %zu\n", __FILE__, __LINE__,
3251 mapSize);
3252 }
3253 intptr_t data_end = (intptr_t)map + mapSize - 4096;
3254 data = (char *)(data_end - (intptr_t)allocSize);
3255
3256 mprotect(map, 4096, PROT_NONE);
3257 mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
3258 P.reset(data, map, mapSize);
3259 }
3260 else
3261 {
3262 data = (char *)malloc(allocSize);
3263 P.reset(data);
3264 }
3265 #else
3266 char *data =
3267 (char *)align_malloc(allocSize, get_pixel_alignment(imageInfo->format));
3268 P.reset(data, NULL, 0, allocSize, true);
3269 #endif
3270
3271 if (data == NULL)
3272 {
3273 log_error(
3274 "ERROR: Unable to malloc %zu bytes for create_random_image_data\n",
3275 allocSize);
3276 return NULL;
3277 }
3278
3279 switch (dataType)
3280 {
3281 case kFloat: {
3282 float *inputValues = (float *)data;
3283 switch (imageInfo->format->image_channel_data_type)
3284 {
3285 case CL_HALF_FLOAT: {
3286 // Generate data that is (mostly) inside the range of a half
3287 // float const float HALF_MIN = 5.96046448e-08f;
3288 const float HALF_MAX = 65504.0f;
3289
3290 size_t i = 0;
3291 inputValues[i++] = 0.f;
3292 inputValues[i++] = 1.f;
3293 inputValues[i++] = -1.f;
3294 inputValues[i++] = 2.f;
3295 for (; i < numPixels * 4; i++)
3296 inputValues[i] = get_random_float(-HALF_MAX - 2.f,
3297 HALF_MAX + 2.f, d);
3298 }
3299 break;
3300 #ifdef CL_SFIXED14_APPLE
3301 case CL_SFIXED14_APPLE: {
3302 size_t i = 0;
3303 if (numPixels * 4 >= 8)
3304 {
3305 inputValues[i++] = INFINITY;
3306 inputValues[i++] = 0x1.0p14f;
3307 inputValues[i++] = 0x1.0p31f;
3308 inputValues[i++] = 0x1.0p32f;
3309 inputValues[i++] = -INFINITY;
3310 inputValues[i++] = -0x1.0p14f;
3311 inputValues[i++] = -0x1.0p31f;
3312 inputValues[i++] = -0x1.1p31f;
3313 }
3314 for (; i < numPixels * 4; i++)
3315 inputValues[i] = get_random_float(-1.1f, 3.1f, d);
3316 }
3317 break;
3318 #endif
3319 case CL_FLOAT: {
3320 size_t i = 0;
3321 inputValues[i++] = INFINITY;
3322 inputValues[i++] = -INFINITY;
3323 inputValues[i++] = 0.0f;
3324 inputValues[i++] = 0.0f;
3325 cl_uint *p = (cl_uint *)data;
3326 for (; i < numPixels * 4; i++) p[i] = genrand_int32(d);
3327 }
3328 break;
3329
3330 default:
3331 size_t i = 0;
3332 if (numPixels * 4 >= 36)
3333 {
3334 inputValues[i++] = 0.0f;
3335 inputValues[i++] = 0.5f;
3336 inputValues[i++] = 31.5f;
3337 inputValues[i++] = 32.0f;
3338 inputValues[i++] = 127.5f;
3339 inputValues[i++] = 128.0f;
3340 inputValues[i++] = 255.5f;
3341 inputValues[i++] = 256.0f;
3342 inputValues[i++] = 1023.5f;
3343 inputValues[i++] = 1024.0f;
3344 inputValues[i++] = 32767.5f;
3345 inputValues[i++] = 32768.0f;
3346 inputValues[i++] = 65535.5f;
3347 inputValues[i++] = 65536.0f;
3348 inputValues[i++] = 2147483648.0f;
3349 inputValues[i++] = 4294967296.0f;
3350 inputValues[i++] = MAKE_HEX_FLOAT(0x1.0p63f, 1, 63);
3351 inputValues[i++] = MAKE_HEX_FLOAT(0x1.0p64f, 1, 64);
3352 inputValues[i++] = -0.0f;
3353 inputValues[i++] = -0.5f;
3354 inputValues[i++] = -31.5f;
3355 inputValues[i++] = -32.0f;
3356 inputValues[i++] = -127.5f;
3357 inputValues[i++] = -128.0f;
3358 inputValues[i++] = -255.5f;
3359 inputValues[i++] = -256.0f;
3360 inputValues[i++] = -1023.5f;
3361 inputValues[i++] = -1024.0f;
3362 inputValues[i++] = -32767.5f;
3363 inputValues[i++] = -32768.0f;
3364 inputValues[i++] = -65535.5f;
3365 inputValues[i++] = -65536.0f;
3366 inputValues[i++] = -2147483648.0f;
3367 inputValues[i++] = -4294967296.0f;
3368 inputValues[i++] = -MAKE_HEX_FLOAT(0x1.0p63f, 1, 63);
3369 inputValues[i++] = -MAKE_HEX_FLOAT(0x1.0p64f, 1, 64);
3370 }
3371 if (is_format_signed(imageInfo->format))
3372 {
3373 for (; i < numPixels * 4; i++)
3374 inputValues[i] = get_random_float(-1.1f, 1.1f, d);
3375 }
3376 else
3377 {
3378 for (; i < numPixels * 4; i++)
3379 inputValues[i] = get_random_float(-0.1f, 1.1f, d);
3380 }
3381 break;
3382 }
3383 break;
3384 }
3385
3386 case kInt: {
3387 int *imageData = (int *)data;
3388
3389 // We want to generate ints (mostly) in range of the target format
3390 int formatMin = get_format_min_int(imageInfo->format);
3391 size_t formatMax = get_format_max_int(imageInfo->format);
3392 if (formatMin == 0)
3393 {
3394 // Unsigned values, but we are only an int, so cap the actual
3395 // max at the max of signed ints
3396 if (formatMax > 2147483647L) formatMax = 2147483647L;
3397 }
3398 // If the final format is small enough, give us a bit of room for
3399 // out-of-range values to test
3400 if (formatMax < 2147483647L) formatMax += 2;
3401 if (formatMin > -2147483648LL) formatMin -= 2;
3402
3403 // Now gen
3404 for (size_t i = 0; i < numPixels * 4; i++)
3405 {
3406 imageData[i] = random_in_range(formatMin, (int)formatMax, d);
3407 }
3408 break;
3409 }
3410
3411 case kUInt:
3412 case kUnsignedInt: {
3413 unsigned int *imageData = (unsigned int *)data;
3414
3415 // We want to generate ints (mostly) in range of the target format
3416 int formatMin = get_format_min_int(imageInfo->format);
3417 size_t formatMax = get_format_max_int(imageInfo->format);
3418 if (formatMin < 0) formatMin = 0;
3419 // If the final format is small enough, give us a bit of room for
3420 // out-of-range values to test
3421 if (formatMax < 4294967295LL) formatMax += 2;
3422
3423 // Now gen
3424 for (size_t i = 0; i < numPixels * 4; i++)
3425 {
3426 imageData[i] = random_in_range(formatMin, (int)formatMax, d);
3427 }
3428 break;
3429 }
3430 default:
3431 // Unsupported source format
3432 delete[] data;
3433 return NULL;
3434 }
3435
3436 return data;
3437 }
3438
3439 /*
3440 deprecated
3441 bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t
3442 max, int &outValue )
3443 {
3444 int v = (int)value;
3445
3446 switch(imageSampler->addressing_mode)
3447 {
3448 case CL_ADDRESS_REPEAT:
3449 outValue = v;
3450 while( v < 0 )
3451 v += (int)max;
3452 while( v >= (int)max )
3453 v -= (int)max;
3454 if( v != outValue )
3455 {
3456 outValue = v;
3457 return true;
3458 }
3459 return false;
3460
3461 case CL_ADDRESS_MIRRORED_REPEAT:
3462 log_info( "ERROR: unimplemented for CL_ADDRESS_MIRRORED_REPEAT. Do
3463 we ever use this? exit(-1);
3464
3465 default:
3466 if( v < 0 )
3467 {
3468 outValue = 0;
3469 return true;
3470 }
3471 if( v >= (int)max )
3472 {
3473 outValue = (int)max - 1;
3474 return true;
3475 }
3476 outValue = v;
3477 return false;
3478 }
3479
3480 }
3481 */
3482
get_sampler_kernel_code(image_sampler_data * imageSampler,char * outLine)3483 void get_sampler_kernel_code(image_sampler_data *imageSampler, char *outLine)
3484 {
3485 const char *normalized;
3486 const char *addressMode;
3487 const char *filterMode;
3488
3489 if (imageSampler->addressing_mode == CL_ADDRESS_CLAMP)
3490 addressMode = "CLK_ADDRESS_CLAMP";
3491 else if (imageSampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE)
3492 addressMode = "CLK_ADDRESS_CLAMP_TO_EDGE";
3493 else if (imageSampler->addressing_mode == CL_ADDRESS_REPEAT)
3494 addressMode = "CLK_ADDRESS_REPEAT";
3495 else if (imageSampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT)
3496 addressMode = "CLK_ADDRESS_MIRRORED_REPEAT";
3497 else if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
3498 addressMode = "CLK_ADDRESS_NONE";
3499 else
3500 {
3501 log_error("**Error: Unknown addressing mode! Aborting...\n");
3502 abort();
3503 }
3504
3505 if (imageSampler->normalized_coords)
3506 normalized = "CLK_NORMALIZED_COORDS_TRUE";
3507 else
3508 normalized = "CLK_NORMALIZED_COORDS_FALSE";
3509
3510 if (imageSampler->filter_mode == CL_FILTER_LINEAR)
3511 filterMode = "CLK_FILTER_LINEAR";
3512 else
3513 filterMode = "CLK_FILTER_NEAREST";
3514
3515 sprintf(outLine, " const sampler_t imageSampler = %s | %s | %s;\n",
3516 addressMode, filterMode, normalized);
3517 }
3518
copy_image_data(image_descriptor * srcImageInfo,image_descriptor * dstImageInfo,void * imageValues,void * destImageValues,const size_t sourcePos[],const size_t destPos[],const size_t regionSize[])3519 void copy_image_data(image_descriptor *srcImageInfo,
3520 image_descriptor *dstImageInfo, void *imageValues,
3521 void *destImageValues, const size_t sourcePos[],
3522 const size_t destPos[], const size_t regionSize[])
3523 {
3524 // assert( srcImageInfo->format == dstImageInfo->format );
3525
3526 size_t src_mip_level_offset = 0, dst_mip_level_offset = 0;
3527 size_t sourcePos_lod[3], destPos_lod[3], src_lod, dst_lod;
3528 size_t src_row_pitch_lod, src_slice_pitch_lod;
3529 size_t dst_row_pitch_lod, dst_slice_pitch_lod;
3530
3531 size_t pixelSize = get_pixel_size(srcImageInfo->format);
3532
3533 sourcePos_lod[0] = sourcePos[0];
3534 sourcePos_lod[1] = sourcePos[1];
3535 sourcePos_lod[2] = sourcePos[2];
3536 destPos_lod[0] = destPos[0];
3537 destPos_lod[1] = destPos[1];
3538 destPos_lod[2] = destPos[2];
3539 src_row_pitch_lod = srcImageInfo->rowPitch;
3540 dst_row_pitch_lod = dstImageInfo->rowPitch;
3541 src_slice_pitch_lod = srcImageInfo->slicePitch;
3542 dst_slice_pitch_lod = dstImageInfo->slicePitch;
3543
3544 if (srcImageInfo->num_mip_levels > 1)
3545 {
3546 size_t src_width_lod = 1 /*srcImageInfo->width*/;
3547 size_t src_height_lod = 1 /*srcImageInfo->height*/;
3548 size_t src_depth_lod = 1 /*srcImageInfo->depth*/;
3549
3550 switch (srcImageInfo->type)
3551 {
3552 case CL_MEM_OBJECT_IMAGE1D:
3553 src_lod = sourcePos[1];
3554 sourcePos_lod[1] = sourcePos_lod[2] = 0;
3555 src_width_lod = (srcImageInfo->width >> src_lod)
3556 ? (srcImageInfo->width >> src_lod)
3557 : 1;
3558 break;
3559 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
3560 case CL_MEM_OBJECT_IMAGE2D:
3561 src_lod = sourcePos[2];
3562 sourcePos_lod[1] = sourcePos[1];
3563 sourcePos_lod[2] = 0;
3564 src_width_lod = (srcImageInfo->width >> src_lod)
3565 ? (srcImageInfo->width >> src_lod)
3566 : 1;
3567 if (srcImageInfo->type == CL_MEM_OBJECT_IMAGE2D)
3568 src_height_lod = (srcImageInfo->height >> src_lod)
3569 ? (srcImageInfo->height >> src_lod)
3570 : 1;
3571 break;
3572 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
3573 case CL_MEM_OBJECT_IMAGE3D:
3574 src_lod = sourcePos[3];
3575 sourcePos_lod[1] = sourcePos[1];
3576 sourcePos_lod[2] = sourcePos[2];
3577 src_width_lod = (srcImageInfo->width >> src_lod)
3578 ? (srcImageInfo->width >> src_lod)
3579 : 1;
3580 src_height_lod = (srcImageInfo->height >> src_lod)
3581 ? (srcImageInfo->height >> src_lod)
3582 : 1;
3583 if (srcImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
3584 src_depth_lod = (srcImageInfo->depth >> src_lod)
3585 ? (srcImageInfo->depth >> src_lod)
3586 : 1;
3587 break;
3588 }
3589 src_mip_level_offset = compute_mip_level_offset(srcImageInfo, src_lod);
3590 src_row_pitch_lod =
3591 src_width_lod * get_pixel_size(srcImageInfo->format);
3592 src_slice_pitch_lod = src_row_pitch_lod * src_height_lod;
3593 }
3594
3595 if (dstImageInfo->num_mip_levels > 1)
3596 {
3597 size_t dst_width_lod = 1 /*dstImageInfo->width*/;
3598 size_t dst_height_lod = 1 /*dstImageInfo->height*/;
3599 size_t dst_depth_lod = 1 /*dstImageInfo->depth*/;
3600 switch (dstImageInfo->type)
3601 {
3602 case CL_MEM_OBJECT_IMAGE1D:
3603 dst_lod = destPos[1];
3604 destPos_lod[1] = destPos_lod[2] = 0;
3605 dst_width_lod = (dstImageInfo->width >> dst_lod)
3606 ? (dstImageInfo->width >> dst_lod)
3607 : 1;
3608 break;
3609 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
3610 case CL_MEM_OBJECT_IMAGE2D:
3611 dst_lod = destPos[2];
3612 destPos_lod[1] = destPos[1];
3613 destPos_lod[2] = 0;
3614 dst_width_lod = (dstImageInfo->width >> dst_lod)
3615 ? (dstImageInfo->width >> dst_lod)
3616 : 1;
3617 if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D)
3618 dst_height_lod = (dstImageInfo->height >> dst_lod)
3619 ? (dstImageInfo->height >> dst_lod)
3620 : 1;
3621 break;
3622 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
3623 case CL_MEM_OBJECT_IMAGE3D:
3624 dst_lod = destPos[3];
3625 destPos_lod[1] = destPos[1];
3626 destPos_lod[2] = destPos[2];
3627 dst_width_lod = (dstImageInfo->width >> dst_lod)
3628 ? (dstImageInfo->width >> dst_lod)
3629 : 1;
3630 dst_height_lod = (dstImageInfo->height >> dst_lod)
3631 ? (dstImageInfo->height >> dst_lod)
3632 : 1;
3633 if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
3634 dst_depth_lod = (dstImageInfo->depth >> dst_lod)
3635 ? (dstImageInfo->depth >> dst_lod)
3636 : 1;
3637 break;
3638 }
3639 dst_mip_level_offset = compute_mip_level_offset(dstImageInfo, dst_lod);
3640 dst_row_pitch_lod =
3641 dst_width_lod * get_pixel_size(dstImageInfo->format);
3642 dst_slice_pitch_lod = dst_row_pitch_lod * dst_height_lod;
3643 }
3644
3645 // Get initial pointers
3646 char *sourcePtr = (char *)imageValues
3647 + sourcePos_lod[2] * src_slice_pitch_lod
3648 + sourcePos_lod[1] * src_row_pitch_lod + pixelSize * sourcePos_lod[0]
3649 + src_mip_level_offset;
3650 char *destPtr = (char *)destImageValues
3651 + destPos_lod[2] * dst_slice_pitch_lod
3652 + destPos_lod[1] * dst_row_pitch_lod + pixelSize * destPos_lod[0]
3653 + dst_mip_level_offset;
3654
3655 for (size_t z = 0; z < (regionSize[2] > 0 ? regionSize[2] : 1); z++)
3656 {
3657 char *rowSourcePtr = sourcePtr;
3658 char *rowDestPtr = destPtr;
3659 for (size_t y = 0; y < regionSize[1]; y++)
3660 {
3661 memcpy(rowDestPtr, rowSourcePtr, pixelSize * regionSize[0]);
3662 rowSourcePtr += src_row_pitch_lod;
3663 rowDestPtr += dst_row_pitch_lod;
3664 }
3665
3666 sourcePtr += src_slice_pitch_lod;
3667 destPtr += dst_slice_pitch_lod;
3668 }
3669 }
3670
random_float(float low,float high,MTdata d)3671 float random_float(float low, float high, MTdata d)
3672 {
3673 float t = (float)genrand_real1(d);
3674 return (1.0f - t) * low + t * high;
3675 }
3676
CoordWalker(void * coords,bool useFloats,size_t vecSize)3677 CoordWalker::CoordWalker(void *coords, bool useFloats, size_t vecSize)
3678 {
3679 if (useFloats)
3680 {
3681 mFloatCoords = (cl_float *)coords;
3682 mIntCoords = NULL;
3683 }
3684 else
3685 {
3686 mFloatCoords = NULL;
3687 mIntCoords = (cl_int *)coords;
3688 }
3689 mVecSize = vecSize;
3690 }
3691
~CoordWalker()3692 CoordWalker::~CoordWalker() {}
3693
Get(size_t idx,size_t el)3694 cl_float CoordWalker::Get(size_t idx, size_t el)
3695 {
3696 if (mIntCoords != NULL)
3697 return (cl_float)mIntCoords[idx * mVecSize + el];
3698 else
3699 return mFloatCoords[idx * mVecSize + el];
3700 }
3701
3702
print_read_header(const cl_image_format * format,image_sampler_data * sampler,bool err,int t)3703 void print_read_header(const cl_image_format *format,
3704 image_sampler_data *sampler, bool err, int t)
3705 {
3706 const char *addressMode = NULL;
3707 const char *normalizedNames[2] = { "UNNORMALIZED", "NORMALIZED" };
3708
3709 if (sampler->addressing_mode == CL_ADDRESS_CLAMP)
3710 addressMode = "CL_ADDRESS_CLAMP";
3711 else if (sampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE)
3712 addressMode = "CL_ADDRESS_CLAMP_TO_EDGE";
3713 else if (sampler->addressing_mode == CL_ADDRESS_REPEAT)
3714 addressMode = "CL_ADDRESS_REPEAT";
3715 else if (sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT)
3716 addressMode = "CL_ADDRESS_MIRRORED_REPEAT";
3717 else
3718 addressMode = "CL_ADDRESS_NONE";
3719
3720 if (t)
3721 {
3722 if (err)
3723 log_error("[%-7s %-24s %d] - %s - %s - %s - %s\n",
3724 GetChannelOrderName(format->image_channel_order),
3725 GetChannelTypeName(format->image_channel_data_type),
3726 (int)get_format_channel_count(format),
3727 sampler->filter_mode == CL_FILTER_NEAREST
3728 ? "CL_FILTER_NEAREST"
3729 : "CL_FILTER_LINEAR",
3730 addressMode,
3731 normalizedNames[sampler->normalized_coords ? 1 : 0],
3732 t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED");
3733 else
3734 log_info("[%-7s %-24s %d] - %s - %s - %s - %s\n",
3735 GetChannelOrderName(format->image_channel_order),
3736 GetChannelTypeName(format->image_channel_data_type),
3737 (int)get_format_channel_count(format),
3738 sampler->filter_mode == CL_FILTER_NEAREST
3739 ? "CL_FILTER_NEAREST"
3740 : "CL_FILTER_LINEAR",
3741 addressMode,
3742 normalizedNames[sampler->normalized_coords ? 1 : 0],
3743 t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED");
3744 }
3745 else
3746 {
3747 if (err)
3748 log_error("[%-7s %-24s %d] - %s - %s - %s\n",
3749 GetChannelOrderName(format->image_channel_order),
3750 GetChannelTypeName(format->image_channel_data_type),
3751 (int)get_format_channel_count(format),
3752 sampler->filter_mode == CL_FILTER_NEAREST
3753 ? "CL_FILTER_NEAREST"
3754 : "CL_FILTER_LINEAR",
3755 addressMode,
3756 normalizedNames[sampler->normalized_coords ? 1 : 0]);
3757 else
3758 log_info("[%-7s %-24s %d] - %s - %s - %s\n",
3759 GetChannelOrderName(format->image_channel_order),
3760 GetChannelTypeName(format->image_channel_data_type),
3761 (int)get_format_channel_count(format),
3762 sampler->filter_mode == CL_FILTER_NEAREST
3763 ? "CL_FILTER_NEAREST"
3764 : "CL_FILTER_LINEAR",
3765 addressMode,
3766 normalizedNames[sampler->normalized_coords ? 1 : 0]);
3767 }
3768 }
3769
print_write_header(const cl_image_format * format,bool err=false)3770 void print_write_header(const cl_image_format *format, bool err = false)
3771 {
3772 if (err)
3773 log_error("[%-7s %-24s %d]\n",
3774 GetChannelOrderName(format->image_channel_order),
3775 GetChannelTypeName(format->image_channel_data_type),
3776 (int)get_format_channel_count(format));
3777 else
3778 log_info("[%-7s %-24s %d]\n",
3779 GetChannelOrderName(format->image_channel_order),
3780 GetChannelTypeName(format->image_channel_data_type),
3781 (int)get_format_channel_count(format));
3782 }
3783
3784
print_header(const cl_image_format * format,bool err=false)3785 void print_header(const cl_image_format *format, bool err = false)
3786 {
3787 if (err)
3788 {
3789 log_error("[%-7s %-24s %d]\n",
3790 GetChannelOrderName(format->image_channel_order),
3791 GetChannelTypeName(format->image_channel_data_type),
3792 (int)get_format_channel_count(format));
3793 }
3794 else
3795 {
3796 log_info("[%-7s %-24s %d]\n",
3797 GetChannelOrderName(format->image_channel_order),
3798 GetChannelTypeName(format->image_channel_data_type),
3799 (int)get_format_channel_count(format));
3800 }
3801 }
3802
find_format(cl_image_format * formatList,unsigned int numFormats,cl_image_format * formatToFind)3803 bool find_format(cl_image_format *formatList, unsigned int numFormats,
3804 cl_image_format *formatToFind)
3805 {
3806 for (unsigned int i = 0; i < numFormats; i++)
3807 {
3808 if (formatList[i].image_channel_order
3809 == formatToFind->image_channel_order
3810 && formatList[i].image_channel_data_type
3811 == formatToFind->image_channel_data_type)
3812 return true;
3813 }
3814 return false;
3815 }
3816
build_required_image_formats(cl_mem_flags flags,cl_mem_object_type image_type,cl_device_id device,std::vector<cl_image_format> & formatsToSupport)3817 void build_required_image_formats(
3818 cl_mem_flags flags, cl_mem_object_type image_type, cl_device_id device,
3819 std::vector<cl_image_format> &formatsToSupport)
3820 {
3821 formatsToSupport.clear();
3822
3823 // Minimum list of supported image formats for reading or writing (embedded
3824 // profile)
3825 static std::vector<cl_image_format> embeddedProfile_readOrWrite{
3826 // clang-format off
3827 { CL_RGBA, CL_UNORM_INT8 },
3828 { CL_RGBA, CL_UNORM_INT16 },
3829 { CL_RGBA, CL_SIGNED_INT8 },
3830 { CL_RGBA, CL_SIGNED_INT16 },
3831 { CL_RGBA, CL_SIGNED_INT32 },
3832 { CL_RGBA, CL_UNSIGNED_INT8 },
3833 { CL_RGBA, CL_UNSIGNED_INT16 },
3834 { CL_RGBA, CL_UNSIGNED_INT32 },
3835 { CL_RGBA, CL_HALF_FLOAT },
3836 { CL_RGBA, CL_FLOAT },
3837 // clang-format on
3838 };
3839
3840 // Minimum list of required image formats for reading or writing
3841 // num_channels, for all image types.
3842 static std::vector<cl_image_format> fullProfile_readOrWrite{
3843 // clang-format off
3844 { CL_RGBA, CL_UNORM_INT8 },
3845 { CL_RGBA, CL_UNORM_INT16 },
3846 { CL_RGBA, CL_SIGNED_INT8 },
3847 { CL_RGBA, CL_SIGNED_INT16 },
3848 { CL_RGBA, CL_SIGNED_INT32 },
3849 { CL_RGBA, CL_UNSIGNED_INT8 },
3850 { CL_RGBA, CL_UNSIGNED_INT16 },
3851 { CL_RGBA, CL_UNSIGNED_INT32 },
3852 { CL_RGBA, CL_HALF_FLOAT },
3853 { CL_RGBA, CL_FLOAT },
3854 { CL_BGRA, CL_UNORM_INT8 },
3855 // clang-format on
3856 };
3857
3858 // Minimum list of supported image formats for reading or writing
3859 // (OpenCL 2.0, 2.1, or 2.2), for all image types.
3860 static std::vector<cl_image_format> fullProfile_2x_readOrWrite{
3861 // clang-format off
3862 { CL_R, CL_UNORM_INT8 },
3863 { CL_R, CL_UNORM_INT16 },
3864 { CL_R, CL_SNORM_INT8 },
3865 { CL_R, CL_SNORM_INT16 },
3866 { CL_R, CL_SIGNED_INT8 },
3867 { CL_R, CL_SIGNED_INT16 },
3868 { CL_R, CL_SIGNED_INT32 },
3869 { CL_R, CL_UNSIGNED_INT8 },
3870 { CL_R, CL_UNSIGNED_INT16 },
3871 { CL_R, CL_UNSIGNED_INT32 },
3872 { CL_R, CL_HALF_FLOAT },
3873 { CL_R, CL_FLOAT },
3874 { CL_RG, CL_UNORM_INT8 },
3875 { CL_RG, CL_UNORM_INT16 },
3876 { CL_RG, CL_SNORM_INT8 },
3877 { CL_RG, CL_SNORM_INT16 },
3878 { CL_RG, CL_SIGNED_INT8 },
3879 { CL_RG, CL_SIGNED_INT16 },
3880 { CL_RG, CL_SIGNED_INT32 },
3881 { CL_RG, CL_UNSIGNED_INT8 },
3882 { CL_RG, CL_UNSIGNED_INT16 },
3883 { CL_RG, CL_UNSIGNED_INT32 },
3884 { CL_RG, CL_HALF_FLOAT },
3885 { CL_RG, CL_FLOAT },
3886 { CL_RGBA, CL_UNORM_INT8 },
3887 { CL_RGBA, CL_UNORM_INT16 },
3888 { CL_RGBA, CL_SNORM_INT8 },
3889 { CL_RGBA, CL_SNORM_INT16 },
3890 { CL_RGBA, CL_SIGNED_INT8 },
3891 { CL_RGBA, CL_SIGNED_INT16 },
3892 { CL_RGBA, CL_SIGNED_INT32 },
3893 { CL_RGBA, CL_UNSIGNED_INT8 },
3894 { CL_RGBA, CL_UNSIGNED_INT16 },
3895 { CL_RGBA, CL_UNSIGNED_INT32 },
3896 { CL_RGBA, CL_HALF_FLOAT },
3897 { CL_RGBA, CL_FLOAT },
3898 { CL_BGRA, CL_UNORM_INT8 },
3899 // clang-format on
3900 };
3901
3902 // Conditional addition to the 2x readOrWrite table:
3903 // Support for the CL_DEPTH image channel order is required only for 2D
3904 // images and 2D image arrays.
3905 static std::vector<cl_image_format> fullProfile_2x_readOrWrite_Depth{
3906 // clang-format off
3907 { CL_DEPTH, CL_UNORM_INT16 },
3908 { CL_DEPTH, CL_FLOAT },
3909 // clang-format on
3910 };
3911
3912 // Conditional addition to the 2x readOrWrite table:
3913 // Support for reading from the CL_sRGBA image channel order is optional for
3914 // 1D image buffers. Support for writing to the CL_sRGBA image channel order
3915 // is optional for all image types.
3916 static std::vector<cl_image_format> fullProfile_2x_readOrWrite_srgb{
3917 { CL_sRGBA, CL_UNORM_INT8 },
3918 };
3919
3920 // Minimum list of required image formats for reading and writing.
3921 static std::vector<cl_image_format> fullProfile_readAndWrite{
3922 // clang-format off
3923 { CL_R, CL_UNORM_INT8 },
3924 { CL_R, CL_SIGNED_INT8 },
3925 { CL_R, CL_SIGNED_INT16 },
3926 { CL_R, CL_SIGNED_INT32 },
3927 { CL_R, CL_UNSIGNED_INT8 },
3928 { CL_R, CL_UNSIGNED_INT16 },
3929 { CL_R, CL_UNSIGNED_INT32 },
3930 { CL_R, CL_HALF_FLOAT },
3931 { CL_R, CL_FLOAT },
3932 { CL_RGBA, CL_UNORM_INT8 },
3933 { CL_RGBA, CL_SIGNED_INT8 },
3934 { CL_RGBA, CL_SIGNED_INT16 },
3935 { CL_RGBA, CL_SIGNED_INT32 },
3936 { CL_RGBA, CL_UNSIGNED_INT8 },
3937 { CL_RGBA, CL_UNSIGNED_INT16 },
3938 { CL_RGBA, CL_UNSIGNED_INT32 },
3939 { CL_RGBA, CL_HALF_FLOAT },
3940 { CL_RGBA, CL_FLOAT },
3941 // clang-format on
3942 };
3943
3944 // Embedded profile
3945 if (gIsEmbedded)
3946 {
3947 copy(embeddedProfile_readOrWrite.begin(),
3948 embeddedProfile_readOrWrite.end(),
3949 back_inserter(formatsToSupport));
3950 }
3951 // Full profile
3952 else
3953 {
3954 Version version = get_device_cl_version(device);
3955 if (version < Version(2, 0) || version >= Version(3, 0))
3956 {
3957 // Full profile, OpenCL 1.2 or 3.0.
3958 if (flags & CL_MEM_KERNEL_READ_AND_WRITE)
3959 {
3960 // Note: assumes that read-write images are supported!
3961 copy(fullProfile_readAndWrite.begin(),
3962 fullProfile_readAndWrite.end(),
3963 back_inserter(formatsToSupport));
3964 }
3965 else
3966 {
3967 copy(fullProfile_readOrWrite.begin(),
3968 fullProfile_readOrWrite.end(),
3969 back_inserter(formatsToSupport));
3970 }
3971 }
3972 else
3973 {
3974 // Full profile, OpenCL 2.0, 2.1, 2.2.
3975 if (flags & CL_MEM_KERNEL_READ_AND_WRITE)
3976 {
3977 copy(fullProfile_readAndWrite.begin(),
3978 fullProfile_readAndWrite.end(),
3979 back_inserter(formatsToSupport));
3980 }
3981 else
3982 {
3983 copy(fullProfile_2x_readOrWrite.begin(),
3984 fullProfile_2x_readOrWrite.end(),
3985 back_inserter(formatsToSupport));
3986
3987 // Support for the CL_DEPTH image channel order is required only
3988 // for 2D images and 2D image arrays.
3989 if (image_type == CL_MEM_OBJECT_IMAGE2D
3990 || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
3991 {
3992 copy(fullProfile_2x_readOrWrite_Depth.begin(),
3993 fullProfile_2x_readOrWrite_Depth.end(),
3994 back_inserter(formatsToSupport));
3995 }
3996
3997 // Support for reading from the CL_sRGBA image channel order is
3998 // optional for 1D image buffers. Support for writing to the
3999 // CL_sRGBA image channel order is optional for all image types.
4000 if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER
4001 && flags == CL_MEM_READ_ONLY)
4002 {
4003 copy(fullProfile_2x_readOrWrite_srgb.begin(),
4004 fullProfile_2x_readOrWrite_srgb.end(),
4005 back_inserter(formatsToSupport));
4006 }
4007 }
4008 }
4009 }
4010 }
4011
is_image_format_required(cl_image_format format,cl_mem_flags flags,cl_mem_object_type image_type,cl_device_id device)4012 bool is_image_format_required(cl_image_format format, cl_mem_flags flags,
4013 cl_mem_object_type image_type,
4014 cl_device_id device)
4015 {
4016 std::vector<cl_image_format> formatsToSupport;
4017 build_required_image_formats(flags, image_type, device, formatsToSupport);
4018
4019 for (auto &formatItr : formatsToSupport)
4020 {
4021 if (formatItr.image_channel_order == format.image_channel_order
4022 && formatItr.image_channel_data_type
4023 == format.image_channel_data_type)
4024 {
4025 return true;
4026 }
4027 }
4028
4029 return false;
4030 }
4031
compute_max_mip_levels(size_t width,size_t height,size_t depth)4032 cl_uint compute_max_mip_levels(size_t width, size_t height, size_t depth)
4033 {
4034 cl_uint retMaxMipLevels = 0;
4035 size_t max_dim = 0;
4036
4037 max_dim = width;
4038 max_dim = height > max_dim ? height : max_dim;
4039 max_dim = depth > max_dim ? depth : max_dim;
4040
4041 while (max_dim)
4042 {
4043 retMaxMipLevels++;
4044 max_dim >>= 1;
4045 }
4046 return retMaxMipLevels;
4047 }
4048
compute_mipmapped_image_size(image_descriptor imageInfo)4049 cl_ulong compute_mipmapped_image_size(image_descriptor imageInfo)
4050 {
4051 cl_ulong retSize = 0;
4052 size_t curr_width, curr_height, curr_depth, curr_array_size;
4053 curr_width = imageInfo.width;
4054 curr_height = imageInfo.height;
4055 curr_depth = imageInfo.depth;
4056 curr_array_size = imageInfo.arraySize;
4057
4058 for (int i = 0; i < (int)imageInfo.num_mip_levels; i++)
4059 {
4060 switch (imageInfo.type)
4061 {
4062 case CL_MEM_OBJECT_IMAGE3D:
4063 retSize += (cl_ulong)curr_width * curr_height * curr_depth
4064 * get_pixel_size(imageInfo.format);
4065 break;
4066 case CL_MEM_OBJECT_IMAGE2D:
4067 retSize += (cl_ulong)curr_width * curr_height
4068 * get_pixel_size(imageInfo.format);
4069 break;
4070 case CL_MEM_OBJECT_IMAGE1D:
4071 retSize +=
4072 (cl_ulong)curr_width * get_pixel_size(imageInfo.format);
4073 break;
4074 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
4075 retSize += (cl_ulong)curr_width * curr_array_size
4076 * get_pixel_size(imageInfo.format);
4077 break;
4078 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
4079 retSize += (cl_ulong)curr_width * curr_height * curr_array_size
4080 * get_pixel_size(imageInfo.format);
4081 break;
4082 }
4083
4084 switch (imageInfo.type)
4085 {
4086 case CL_MEM_OBJECT_IMAGE3D:
4087 curr_depth = curr_depth >> 1 ? curr_depth >> 1 : 1;
4088 case CL_MEM_OBJECT_IMAGE2D:
4089 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
4090 curr_height = curr_height >> 1 ? curr_height >> 1 : 1;
4091 case CL_MEM_OBJECT_IMAGE1D:
4092 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
4093 curr_width = curr_width >> 1 ? curr_width >> 1 : 1;
4094 }
4095 }
4096
4097 return retSize;
4098 }
4099
compute_mip_level_offset(image_descriptor * imageInfo,size_t lod)4100 size_t compute_mip_level_offset(image_descriptor *imageInfo, size_t lod)
4101 {
4102 size_t retOffset = 0;
4103 size_t width, height, depth;
4104 width = imageInfo->width;
4105 height = imageInfo->height;
4106 depth = imageInfo->depth;
4107
4108 for (size_t i = 0; i < lod; i++)
4109 {
4110 switch (imageInfo->type)
4111 {
4112 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
4113 retOffset += (size_t)width * height * imageInfo->arraySize
4114 * get_pixel_size(imageInfo->format);
4115 break;
4116 case CL_MEM_OBJECT_IMAGE3D:
4117 retOffset += (size_t)width * height * depth
4118 * get_pixel_size(imageInfo->format);
4119 break;
4120 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
4121 retOffset += (size_t)width * imageInfo->arraySize
4122 * get_pixel_size(imageInfo->format);
4123 break;
4124 case CL_MEM_OBJECT_IMAGE2D:
4125 retOffset +=
4126 (size_t)width * height * get_pixel_size(imageInfo->format);
4127 break;
4128 case CL_MEM_OBJECT_IMAGE1D:
4129 retOffset += (size_t)width * get_pixel_size(imageInfo->format);
4130 break;
4131 }
4132
4133 // Compute next lod dimensions
4134 switch (imageInfo->type)
4135 {
4136 case CL_MEM_OBJECT_IMAGE3D: depth = (depth >> 1) ? (depth >> 1) : 1;
4137 case CL_MEM_OBJECT_IMAGE2D:
4138 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
4139 height = (height >> 1) ? (height >> 1) : 1;
4140 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
4141 case CL_MEM_OBJECT_IMAGE1D: width = (width >> 1) ? (width >> 1) : 1;
4142 }
4143 }
4144 return retOffset;
4145 }
4146
convert_image_type_to_string(cl_mem_object_type image_type)4147 const char *convert_image_type_to_string(cl_mem_object_type image_type)
4148 {
4149 switch (image_type)
4150 {
4151 case CL_MEM_OBJECT_IMAGE1D: return "1D";
4152 case CL_MEM_OBJECT_IMAGE2D: return "2D";
4153 case CL_MEM_OBJECT_IMAGE3D: return "3D";
4154 case CL_MEM_OBJECT_IMAGE1D_ARRAY: return "1D array";
4155 case CL_MEM_OBJECT_IMAGE2D_ARRAY: return "2D array";
4156 case CL_MEM_OBJECT_IMAGE1D_BUFFER: return "1D image buffer";
4157 default: return "unrecognized object type";
4158 }
4159 }
4160