1 //
2 // Copyright (c) 2017,2021 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "imageHelpers.h"
17 #include <limits.h>
18 #include <assert.h>
19 #if defined(__APPLE__)
20 #include <sys/mman.h>
21 #endif
22 #if !defined(_WIN32) && !defined(__APPLE__)
23 #include <malloc.h>
24 #endif
25 #include <algorithm>
26 #include <iterator>
27 #if !defined(_WIN32)
28 #include <cmath>
29 #endif
30
31 RoundingMode gFloatToHalfRoundingMode = kDefaultRoundingMode;
32
33 cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
34 bool gTestRounding = false;
sRGBmap(float fc)35 double sRGBmap(float fc)
36 {
37 double c = (double)fc;
38
39 #if !defined(_WIN32)
40 if (std::isnan(c)) c = 0.0;
41 #else
42 if (_isnan(c)) c = 0.0;
43 #endif
44
45 if (c > 1.0)
46 c = 1.0;
47 else if (c < 0.0)
48 c = 0.0;
49 else if (c < 0.0031308)
50 c = 12.92 * c;
51 else
52 c = (1055.0 / 1000.0) * pow(c, 5.0 / 12.0) - (55.0 / 1000.0);
53
54 return c * 255.0;
55 }
56
sRGBunmap(float fc)57 double sRGBunmap(float fc)
58 {
59 double c = (double)fc;
60 double result;
61
62 if (c <= 0.04045)
63 result = c / 12.92;
64 else
65 result = pow((c + 0.055) / 1.055, 2.4);
66
67 return result;
68 }
69
70
get_format_type_size(const cl_image_format * format)71 uint32_t get_format_type_size(const cl_image_format *format)
72 {
73 return get_channel_data_type_size(format->image_channel_data_type);
74 }
75
get_channel_data_type_size(cl_channel_type channelType)76 uint32_t get_channel_data_type_size(cl_channel_type channelType)
77 {
78 switch (channelType)
79 {
80 case CL_SNORM_INT8:
81 case CL_UNORM_INT8:
82 case CL_SIGNED_INT8:
83 case CL_UNSIGNED_INT8: return 1;
84
85 case CL_SNORM_INT16:
86 case CL_UNORM_INT16:
87 case CL_SIGNED_INT16:
88 case CL_UNSIGNED_INT16:
89 case CL_HALF_FLOAT:
90 #ifdef CL_SFIXED14_APPLE
91 case CL_SFIXED14_APPLE:
92 #endif
93 return sizeof(cl_short);
94
95 case CL_SIGNED_INT32:
96 case CL_UNSIGNED_INT32: return sizeof(cl_int);
97
98 case CL_UNORM_SHORT_565:
99 case CL_UNORM_SHORT_555:
100 #ifdef OBSOLETE_FORAMT
101 case CL_UNORM_SHORT_565_REV:
102 case CL_UNORM_SHORT_555_REV:
103 #endif
104 return 2;
105
106 #ifdef OBSOLETE_FORAMT
107 case CL_UNORM_INT_8888:
108 case CL_UNORM_INT_8888_REV: return 4;
109 #endif
110
111 case CL_UNORM_INT_101010:
112 #ifdef OBSOLETE_FORAMT
113 case CL_UNORM_INT_101010_REV:
114 #endif
115 return 4;
116
117 case CL_FLOAT: return sizeof(cl_float);
118
119 default: return 0;
120 }
121 }
122
get_format_channel_count(const cl_image_format * format)123 uint32_t get_format_channel_count(const cl_image_format *format)
124 {
125 return get_channel_order_channel_count(format->image_channel_order);
126 }
127
get_channel_order_channel_count(cl_channel_order order)128 uint32_t get_channel_order_channel_count(cl_channel_order order)
129 {
130 switch (order)
131 {
132 case CL_R:
133 case CL_A:
134 case CL_Rx:
135 case CL_INTENSITY:
136 case CL_LUMINANCE:
137 case CL_DEPTH:
138 case CL_DEPTH_STENCIL: return 1;
139
140 case CL_RG:
141 case CL_RA:
142 case CL_RGx: return 2;
143
144 case CL_RGB:
145 case CL_RGBx:
146 case CL_sRGB:
147 case CL_sRGBx: return 3;
148
149 case CL_RGBA:
150 case CL_ARGB:
151 case CL_BGRA:
152 case CL_sRGBA:
153 case CL_sBGRA:
154 case CL_ABGR:
155 #ifdef CL_1RGB_APPLE
156 case CL_1RGB_APPLE:
157 #endif
158 #ifdef CL_BGR1_APPLE
159 case CL_BGR1_APPLE:
160 #endif
161 #ifdef CL_ABGR_APPLE
162 case CL_ABGR_APPLE:
163 #endif
164 return 4;
165
166 default:
167 log_error("%s does not support 0x%x\n", __FUNCTION__, order);
168 return 0;
169 }
170 }
171
get_channel_type_from_name(const char * name)172 cl_channel_type get_channel_type_from_name(const char *name)
173 {
174 struct
175 {
176 cl_channel_type type;
177 const char *name;
178 } typeNames[] = { { CL_SNORM_INT8, "CL_SNORM_INT8" },
179 { CL_SNORM_INT16, "CL_SNORM_INT16" },
180 { CL_UNORM_INT8, "CL_UNORM_INT8" },
181 { CL_UNORM_INT16, "CL_UNORM_INT16" },
182 { CL_UNORM_INT24, "CL_UNORM_INT24" },
183 { CL_UNORM_SHORT_565, "CL_UNORM_SHORT_565" },
184 { CL_UNORM_SHORT_555, "CL_UNORM_SHORT_555" },
185 { CL_UNORM_INT_101010, "CL_UNORM_INT_101010" },
186 { CL_SIGNED_INT8, "CL_SIGNED_INT8" },
187 { CL_SIGNED_INT16, "CL_SIGNED_INT16" },
188 { CL_SIGNED_INT32, "CL_SIGNED_INT32" },
189 { CL_UNSIGNED_INT8, "CL_UNSIGNED_INT8" },
190 { CL_UNSIGNED_INT16, "CL_UNSIGNED_INT16" },
191 { CL_UNSIGNED_INT32, "CL_UNSIGNED_INT32" },
192 { CL_HALF_FLOAT, "CL_HALF_FLOAT" },
193 { CL_FLOAT, "CL_FLOAT" },
194 #ifdef CL_SFIXED14_APPLE
195 { CL_SFIXED14_APPLE, "CL_SFIXED14_APPLE" }
196 #endif
197 };
198 for (size_t i = 0; i < sizeof(typeNames) / sizeof(typeNames[0]); i++)
199 {
200 if (strcmp(typeNames[i].name, name) == 0
201 || strcmp(typeNames[i].name + 3, name) == 0)
202 return typeNames[i].type;
203 }
204 return (cl_channel_type)-1;
205 }
206
get_channel_order_from_name(const char * name)207 cl_channel_order get_channel_order_from_name(const char *name)
208 {
209 const struct
210 {
211 cl_channel_order order;
212 const char *name;
213 } orderNames[] = {
214 { CL_R, "CL_R" },
215 { CL_A, "CL_A" },
216 { CL_Rx, "CL_Rx" },
217 { CL_RG, "CL_RG" },
218 { CL_RA, "CL_RA" },
219 { CL_RGx, "CL_RGx" },
220 { CL_RGB, "CL_RGB" },
221 { CL_RGBx, "CL_RGBx" },
222 { CL_RGBA, "CL_RGBA" },
223 { CL_BGRA, "CL_BGRA" },
224 { CL_ARGB, "CL_ARGB" },
225 { CL_INTENSITY, "CL_INTENSITY" },
226 { CL_LUMINANCE, "CL_LUMINANCE" },
227 { CL_DEPTH, "CL_DEPTH" },
228 { CL_DEPTH_STENCIL, "CL_DEPTH_STENCIL" },
229 { CL_sRGB, "CL_sRGB" },
230 { CL_sRGBx, "CL_sRGBx" },
231 { CL_sRGBA, "CL_sRGBA" },
232 { CL_sBGRA, "CL_sBGRA" },
233 { CL_ABGR, "CL_ABGR" },
234 #ifdef CL_1RGB_APPLE
235 { CL_1RGB_APPLE, "CL_1RGB_APPLE" },
236 #endif
237 #ifdef CL_BGR1_APPLE
238 { CL_BGR1_APPLE, "CL_BGR1_APPLE" },
239 #endif
240 };
241
242 for (size_t i = 0; i < sizeof(orderNames) / sizeof(orderNames[0]); i++)
243 {
244 if (strcmp(orderNames[i].name, name) == 0
245 || strcmp(orderNames[i].name + 3, name) == 0)
246 return orderNames[i].order;
247 }
248 return (cl_channel_order)-1;
249 }
250
251
is_format_signed(const cl_image_format * format)252 int is_format_signed(const cl_image_format *format)
253 {
254 switch (format->image_channel_data_type)
255 {
256 case CL_SNORM_INT8:
257 case CL_SIGNED_INT8:
258 case CL_SNORM_INT16:
259 case CL_SIGNED_INT16:
260 case CL_SIGNED_INT32:
261 case CL_HALF_FLOAT:
262 case CL_FLOAT:
263 #ifdef CL_SFIXED14_APPLE
264 case CL_SFIXED14_APPLE:
265 #endif
266 return 1;
267
268 default: return 0;
269 }
270 }
271
get_pixel_size(const cl_image_format * format)272 uint32_t get_pixel_size(const cl_image_format *format)
273 {
274 switch (format->image_channel_data_type)
275 {
276 case CL_SNORM_INT8:
277 case CL_UNORM_INT8:
278 case CL_SIGNED_INT8:
279 case CL_UNSIGNED_INT8: return get_format_channel_count(format);
280
281 case CL_SNORM_INT16:
282 case CL_UNORM_INT16:
283 case CL_SIGNED_INT16:
284 case CL_UNSIGNED_INT16:
285 case CL_HALF_FLOAT:
286 #ifdef CL_SFIXED14_APPLE
287 case CL_SFIXED14_APPLE:
288 #endif
289 return get_format_channel_count(format) * sizeof(cl_ushort);
290
291 case CL_SIGNED_INT32:
292 case CL_UNSIGNED_INT32:
293 return get_format_channel_count(format) * sizeof(cl_int);
294
295 case CL_UNORM_SHORT_565:
296 case CL_UNORM_SHORT_555:
297 #ifdef OBSOLETE_FORAMT
298 case CL_UNORM_SHORT_565_REV:
299 case CL_UNORM_SHORT_555_REV:
300 #endif
301 return 2;
302
303 #ifdef OBSOLETE_FORAMT
304 case CL_UNORM_INT_8888:
305 case CL_UNORM_INT_8888_REV: return 4;
306 #endif
307
308 case CL_UNORM_INT_101010:
309 #ifdef OBSOLETE_FORAMT
310 case CL_UNORM_INT_101010_REV:
311 #endif
312 return 4;
313
314 case CL_FLOAT:
315 return get_format_channel_count(format) * sizeof(cl_float);
316
317 default: return 0;
318 }
319 }
320
next_power_of_two(uint32_t v)321 uint32_t next_power_of_two(uint32_t v)
322 {
323 v--;
324 v |= v >> 1;
325 v |= v >> 2;
326 v |= v >> 4;
327 v |= v >> 8;
328 v |= v >> 16;
329 v++;
330 return v;
331 }
332
get_pixel_alignment(const cl_image_format * format)333 uint32_t get_pixel_alignment(const cl_image_format *format)
334 {
335 return next_power_of_two(get_pixel_size(format));
336 }
337
get_8_bit_image_format(cl_context context,cl_mem_object_type objType,cl_mem_flags flags,size_t channelCount,cl_image_format * outFormat)338 int get_8_bit_image_format(cl_context context, cl_mem_object_type objType,
339 cl_mem_flags flags, size_t channelCount,
340 cl_image_format *outFormat)
341 {
342 cl_image_format formatList[128];
343 unsigned int outFormatCount, i;
344 int error;
345
346
347 /* Make sure each image format is supported */
348 if ((error = clGetSupportedImageFormats(context, flags, objType, 128,
349 formatList, &outFormatCount)))
350 return error;
351
352
353 /* Look for one that is an 8-bit format */
354 for (i = 0; i < outFormatCount; i++)
355 {
356 if (formatList[i].image_channel_data_type == CL_SNORM_INT8
357 || formatList[i].image_channel_data_type == CL_UNORM_INT8
358 || formatList[i].image_channel_data_type == CL_SIGNED_INT8
359 || formatList[i].image_channel_data_type == CL_UNSIGNED_INT8)
360 {
361 if (!channelCount
362 || (channelCount
363 && (get_format_channel_count(&formatList[i])
364 == channelCount)))
365 {
366 *outFormat = formatList[i];
367 return 0;
368 }
369 }
370 }
371
372 return -1;
373 }
374
get_32_bit_image_format(cl_context context,cl_mem_object_type objType,cl_mem_flags flags,size_t channelCount,cl_image_format * outFormat)375 int get_32_bit_image_format(cl_context context, cl_mem_object_type objType,
376 cl_mem_flags flags, size_t channelCount,
377 cl_image_format *outFormat)
378 {
379 cl_image_format formatList[128];
380 unsigned int outFormatCount, i;
381 int error;
382
383
384 /* Make sure each image format is supported */
385 if ((error = clGetSupportedImageFormats(context, flags, objType, 128,
386 formatList, &outFormatCount)))
387 return error;
388
389 /* Look for one that is an 8-bit format */
390 for (i = 0; i < outFormatCount; i++)
391 {
392 if (formatList[i].image_channel_data_type == CL_UNORM_INT_101010
393 || formatList[i].image_channel_data_type == CL_FLOAT
394 || formatList[i].image_channel_data_type == CL_SIGNED_INT32
395 || formatList[i].image_channel_data_type == CL_UNSIGNED_INT32)
396 {
397 if (!channelCount
398 || (channelCount
399 && (get_format_channel_count(&formatList[i])
400 == channelCount)))
401 {
402 *outFormat = formatList[i];
403 return 0;
404 }
405 }
406 }
407
408 return -1;
409 }
410
print_first_pixel_difference_error(size_t where,const char * sourcePixel,const char * destPixel,image_descriptor * imageInfo,size_t y,size_t thirdDim)411 void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
412 const char *destPixel,
413 image_descriptor *imageInfo, size_t y,
414 size_t thirdDim)
415 {
416 size_t pixel_size = get_pixel_size(imageInfo->format);
417
418 log_error("ERROR: Scanline %d did not verify for image size %d,%d,%d "
419 "pitch %d (extra %d bytes)\n",
420 (int)y, (int)imageInfo->width, (int)imageInfo->height,
421 (int)thirdDim, (int)imageInfo->rowPitch,
422 (int)imageInfo->rowPitch
423 - (int)imageInfo->width * (int)pixel_size);
424 log_error("Failed at column: %ld ", where);
425
426 switch (pixel_size)
427 {
428 case 1:
429 log_error("*0x%2.2x vs. 0x%2.2x\n", ((cl_uchar *)sourcePixel)[0],
430 ((cl_uchar *)destPixel)[0]);
431 break;
432 case 2:
433 log_error("*0x%4.4x vs. 0x%4.4x\n", ((cl_ushort *)sourcePixel)[0],
434 ((cl_ushort *)destPixel)[0]);
435 break;
436 case 3:
437 log_error("*{0x%2.2x, 0x%2.2x, 0x%2.2x} vs. "
438 "{0x%2.2x, 0x%2.2x, 0x%2.2x}\n",
439 ((cl_uchar *)sourcePixel)[0],
440 ((cl_uchar *)sourcePixel)[1],
441 ((cl_uchar *)sourcePixel)[2], ((cl_uchar *)destPixel)[0],
442 ((cl_uchar *)destPixel)[1], ((cl_uchar *)destPixel)[2]);
443 break;
444 case 4:
445 log_error("*0x%8.8x vs. 0x%8.8x\n", ((cl_uint *)sourcePixel)[0],
446 ((cl_uint *)destPixel)[0]);
447 break;
448 case 6:
449 log_error(
450 "*{0x%4.4x, 0x%4.4x, 0x%4.4x} vs. "
451 "{0x%4.4x, 0x%4.4x, 0x%4.4x}\n",
452 ((cl_ushort *)sourcePixel)[0], ((cl_ushort *)sourcePixel)[1],
453 ((cl_ushort *)sourcePixel)[2], ((cl_ushort *)destPixel)[0],
454 ((cl_ushort *)destPixel)[1], ((cl_ushort *)destPixel)[2]);
455 break;
456 case 8:
457 log_error("*0x%16.16llx vs. 0x%16.16llx\n",
458 ((cl_ulong *)sourcePixel)[0], ((cl_ulong *)destPixel)[0]);
459 break;
460 case 12:
461 log_error("*{0x%8.8x, 0x%8.8x, 0x%8.8x} vs. "
462 "{0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
463 ((cl_uint *)sourcePixel)[0], ((cl_uint *)sourcePixel)[1],
464 ((cl_uint *)sourcePixel)[2], ((cl_uint *)destPixel)[0],
465 ((cl_uint *)destPixel)[1], ((cl_uint *)destPixel)[2]);
466 break;
467 case 16:
468 log_error("*{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x} vs. "
469 "{0x%8.8x, 0x%8.8x, 0x%8.8x, 0x%8.8x}\n",
470 ((cl_uint *)sourcePixel)[0], ((cl_uint *)sourcePixel)[1],
471 ((cl_uint *)sourcePixel)[2], ((cl_uint *)sourcePixel)[3],
472 ((cl_uint *)destPixel)[0], ((cl_uint *)destPixel)[1],
473 ((cl_uint *)destPixel)[2], ((cl_uint *)destPixel)[3]);
474 break;
475 default:
476 log_error("Don't know how to print pixel size of %ld\n",
477 pixel_size);
478 break;
479 }
480 }
481
random_log_in_range(int minV,int maxV,MTdata d)482 int random_log_in_range(int minV, int maxV, MTdata d)
483 {
484 double v = log2(((double)genrand_int32(d) / (double)0xffffffff) + 1);
485 int iv = (int)((float)(maxV - minV) * v);
486 return iv + minV;
487 }
488
489
490 // Define the addressing functions
491 typedef int (*AddressFn)(int value, size_t maxValue);
492
NoAddressFn(int value,size_t maxValue)493 int NoAddressFn(int value, size_t maxValue) { return value; }
RepeatAddressFn(int value,size_t maxValue)494 int RepeatAddressFn(int value, size_t maxValue)
495 {
496 if (value < 0)
497 value += (int)maxValue;
498 else if (value >= (int)maxValue)
499 value -= (int)maxValue;
500 return value;
501 }
MirroredRepeatAddressFn(int value,size_t maxValue)502 int MirroredRepeatAddressFn(int value, size_t maxValue)
503 {
504 if (value < 0)
505 value = 0;
506 else if ((size_t)value >= maxValue)
507 value = (int)(maxValue - 1);
508 return value;
509 }
ClampAddressFn(int value,size_t maxValue)510 int ClampAddressFn(int value, size_t maxValue)
511 {
512 return (value < -1) ? -1
513 : ((value > (cl_long)maxValue) ? (int)maxValue : value);
514 }
ClampToEdgeNearestFn(int value,size_t maxValue)515 int ClampToEdgeNearestFn(int value, size_t maxValue)
516 {
517 return (value < 0)
518 ? 0
519 : (((size_t)value > maxValue - 1) ? (int)maxValue - 1 : value);
520 }
521 AddressFn ClampToEdgeLinearFn = ClampToEdgeNearestFn;
522
523 // Note: normalized coords get repeated in normalized space, not unnormalized
524 // space! hence the special case here
525 volatile float gFloatHome;
RepeatNormalizedAddressFn(float fValue,size_t maxValue)526 float RepeatNormalizedAddressFn(float fValue, size_t maxValue)
527 {
528 #ifndef _MSC_VER // Use original if not the VS compiler.
529 // General computation for repeat
530 return (fValue - floorf(fValue)) * (float)maxValue; // Reduce to [0, 1.f]
531 #else // Otherwise, use this instead:
532 // Home the subtraction to a float to break up the sequence of x87
533 // instructions emitted by the VS compiler.
534 gFloatHome = fValue - floorf(fValue);
535 return gFloatHome * (float)maxValue;
536 #endif
537 }
538
MirroredRepeatNormalizedAddressFn(float fValue,size_t maxValue)539 float MirroredRepeatNormalizedAddressFn(float fValue, size_t maxValue)
540 {
541 // Round to nearest multiple of two.
542 // Note halfway values flip flop here due to rte, but they both end up
543 // pointing the same place at the end of the day.
544 float s_prime = 2.0f * rintf(fValue * 0.5f);
545
546 // Reduce to [-1, 1], Apply mirroring -> [0, 1]
547 s_prime = fabsf(fValue - s_prime);
548
549 // un-normalize
550 return s_prime * (float)maxValue;
551 }
552
553 struct AddressingTable
554 {
AddressingTableAddressingTable555 AddressingTable()
556 {
557 ct_assert((CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6));
558 ct_assert(CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2);
559
560 mTable[CL_ADDRESS_NONE - CL_ADDRESS_NONE]
561 [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = NoAddressFn;
562 mTable[CL_ADDRESS_NONE - CL_ADDRESS_NONE]
563 [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = NoAddressFn;
564 mTable[CL_ADDRESS_REPEAT - CL_ADDRESS_NONE]
565 [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = RepeatAddressFn;
566 mTable[CL_ADDRESS_REPEAT - CL_ADDRESS_NONE]
567 [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = RepeatAddressFn;
568 mTable[CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE]
569 [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = ClampToEdgeNearestFn;
570 mTable[CL_ADDRESS_CLAMP_TO_EDGE - CL_ADDRESS_NONE]
571 [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = ClampToEdgeLinearFn;
572 mTable[CL_ADDRESS_CLAMP - CL_ADDRESS_NONE]
573 [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = ClampAddressFn;
574 mTable[CL_ADDRESS_CLAMP - CL_ADDRESS_NONE]
575 [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = ClampAddressFn;
576 mTable[CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE]
577 [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = MirroredRepeatAddressFn;
578 mTable[CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE]
579 [CL_FILTER_LINEAR - CL_FILTER_NEAREST] = MirroredRepeatAddressFn;
580 }
581
operator []AddressingTable582 AddressFn operator[](image_sampler_data *sampler)
583 {
584 return mTable[(int)sampler->addressing_mode - CL_ADDRESS_NONE]
585 [(int)sampler->filter_mode - CL_FILTER_NEAREST];
586 }
587
588 AddressFn mTable[6][2];
589 };
590
591 static AddressingTable sAddressingTable;
592
is_sRGBA_order(cl_channel_order image_channel_order)593 bool is_sRGBA_order(cl_channel_order image_channel_order)
594 {
595 switch (image_channel_order)
596 {
597 case CL_sRGB:
598 case CL_sRGBx:
599 case CL_sRGBA:
600 case CL_sBGRA: return true;
601 default: return false;
602 }
603 }
604
605 // Format helpers
606
has_alpha(const cl_image_format * format)607 int has_alpha(const cl_image_format *format)
608 {
609 switch (format->image_channel_order)
610 {
611 case CL_R: return 0;
612 case CL_A: return 1;
613 case CL_Rx: return 0;
614 case CL_RG: return 0;
615 case CL_RA: return 1;
616 case CL_RGx: return 0;
617 case CL_RGB:
618 case CL_sRGB: return 0;
619 case CL_RGBx:
620 case CL_sRGBx: return 0;
621 case CL_RGBA: return 1;
622 case CL_BGRA: return 1;
623 case CL_ARGB: return 1;
624 case CL_ABGR: return 1;
625 case CL_INTENSITY: return 1;
626 case CL_LUMINANCE: return 0;
627 #ifdef CL_BGR1_APPLE
628 case CL_BGR1_APPLE: return 1;
629 #endif
630 #ifdef CL_1RGB_APPLE
631 case CL_1RGB_APPLE: return 1;
632 #endif
633 case CL_sRGBA:
634 case CL_sBGRA: return 1;
635 case CL_DEPTH: return 0;
636 default:
637 log_error("Invalid image channel order: %d\n",
638 format->image_channel_order);
639 return 0;
640 }
641 }
642
643 #define PRINT_MAX_SIZE_LOGIC 0
644
645 #define SWAP(_a, _b) \
646 do \
647 { \
648 _a ^= _b; \
649 _b ^= _a; \
650 _a ^= _b; \
651 } while (0)
652 #ifndef MAX
653 #define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
654 #endif
655
get_max_sizes(size_t * numberOfSizes,const int maxNumberOfSizes,size_t sizes[][3],size_t maxWidth,size_t maxHeight,size_t maxDepth,size_t maxArraySize,const cl_ulong maxIndividualAllocSize,const cl_ulong maxTotalAllocSize,cl_mem_object_type image_type,const cl_image_format * format,int usingMaxPixelSizeBuffer)656 void get_max_sizes(
657 size_t *numberOfSizes, const int maxNumberOfSizes, size_t sizes[][3],
658 size_t maxWidth, size_t maxHeight, size_t maxDepth, size_t maxArraySize,
659 const cl_ulong maxIndividualAllocSize, // CL_DEVICE_MAX_MEM_ALLOC_SIZE
660 const cl_ulong maxTotalAllocSize, // CL_DEVICE_GLOBAL_MEM_SIZE
661 cl_mem_object_type image_type, const cl_image_format *format,
662 int usingMaxPixelSizeBuffer)
663 {
664
665 bool is3D = (image_type == CL_MEM_OBJECT_IMAGE3D);
666 bool isArray = (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY
667 || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY);
668
669 // Validate we have a reasonable max depth for 3D
670 if (is3D && maxDepth < 2)
671 {
672 log_error("ERROR: Requesting max image sizes for 3D images when max "
673 "depth is < 2.\n");
674 *numberOfSizes = 0;
675 return;
676 }
677 // Validate we have a reasonable max array size for 1D & 2D image arrays
678 if (isArray && maxArraySize < 2)
679 {
680 log_error("ERROR: Requesting max image sizes for an image array when "
681 "max array size is < 1.\n");
682 *numberOfSizes = 0;
683 return;
684 }
685
686 // Reduce the maximum because we are trying to test the max image
687 // dimensions, not the memory allocation
688 cl_ulong adjustedMaxTotalAllocSize = maxTotalAllocSize / 4;
689 cl_ulong adjustedMaxIndividualAllocSize = maxIndividualAllocSize / 4;
690 log_info("Note: max individual allocation adjusted down from %gMB to %gMB "
691 "and max total allocation adjusted down from %gMB to %gMB.\n",
692 maxIndividualAllocSize / (1024.0 * 1024.0),
693 adjustedMaxIndividualAllocSize / (1024.0 * 1024.0),
694 maxTotalAllocSize / (1024.0 * 1024.0),
695 adjustedMaxTotalAllocSize / (1024.0 * 1024.0));
696
697 // Cap our max allocation to 1.0GB.
698 // FIXME -- why? In the interest of not taking a long time? We should
699 // still test this stuff...
700 if (adjustedMaxTotalAllocSize > (cl_ulong)1024 * 1024 * 1024)
701 {
702 adjustedMaxTotalAllocSize = (cl_ulong)1024 * 1024 * 1024;
703 log_info("Limiting max total allocation size to %gMB (down from %gMB) "
704 "for test.\n",
705 adjustedMaxTotalAllocSize / (1024.0 * 1024.0),
706 maxTotalAllocSize / (1024.0 * 1024.0));
707 }
708
709 cl_ulong maxAllocSize = adjustedMaxIndividualAllocSize;
710 if (adjustedMaxTotalAllocSize < adjustedMaxIndividualAllocSize * 2)
711 maxAllocSize = adjustedMaxTotalAllocSize / 2;
712
713 size_t raw_pixel_size = get_pixel_size(format);
714 // If the test will be creating input (src) buffer of type int4 or float4,
715 // number of pixels will be governed by sizeof(int4 or float4) and not
716 // sizeof(dest fomat) Also if pixel size is 12 bytes i.e. RGB or RGBx, we
717 // adjust it to 16 bytes as GPUs has no concept of 3 channel images. GPUs
718 // expand these to four channel RGBA.
719 if (usingMaxPixelSizeBuffer || raw_pixel_size == 12) raw_pixel_size = 16;
720 size_t max_pixels = (size_t)maxAllocSize / raw_pixel_size;
721
722 log_info("Maximums: [%ld x %ld x %ld], raw pixel size %lu bytes, "
723 "per-allocation limit %gMB.\n",
724 maxWidth, maxHeight, isArray ? maxArraySize : maxDepth,
725 raw_pixel_size, (maxAllocSize / (1024.0 * 1024.0)));
726
727 // Keep track of the maximum sizes for each dimension
728 size_t maximum_sizes[] = { maxWidth, maxHeight, maxDepth };
729
730 switch (image_type)
731 {
732 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
733 maximum_sizes[1] = maxArraySize;
734 maximum_sizes[2] = 1;
735 break;
736 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
737 maximum_sizes[2] = maxArraySize;
738 break;
739 }
740
741
742 // Given one fixed sized dimension, this code finds one or two other
743 // dimensions, both with very small size, such that the size does not
744 // exceed the maximum passed to this function
745
746 #if defined(__x86_64) || defined(__arm64__) || defined(__ppc64__)
747 size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 10, 11, 13, 15 };
748 #else
749 size_t other_sizes[] = { 2, 3, 5, 6, 7, 9, 11, 13 };
750 #endif
751
752 static size_t other_size = 0;
753 enum
754 {
755 num_other_sizes = sizeof(other_sizes) / sizeof(size_t)
756 };
757
758 (*numberOfSizes) = 0;
759
760 if (image_type == CL_MEM_OBJECT_IMAGE1D)
761 {
762
763 double M = maximum_sizes[0];
764
765 // Store the size
766 sizes[(*numberOfSizes)][0] = (size_t)M;
767 sizes[(*numberOfSizes)][1] = 1;
768 sizes[(*numberOfSizes)][2] = 1;
769 ++(*numberOfSizes);
770 }
771
772 else if (image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY
773 || image_type == CL_MEM_OBJECT_IMAGE2D)
774 {
775
776 for (int fixed_dim = 0; fixed_dim < 2; ++fixed_dim)
777 {
778
779 // Determine the size of the fixed dimension
780 double M = maximum_sizes[fixed_dim];
781 double A = max_pixels;
782
783 int x0_dim = !fixed_dim;
784 double x0 =
785 fmin(fmin(other_sizes[(other_size++) % num_other_sizes], A / M),
786 maximum_sizes[x0_dim]);
787
788 // Store the size
789 sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
790 sizes[(*numberOfSizes)][x0_dim] = (size_t)x0;
791 sizes[(*numberOfSizes)][2] = 1;
792 ++(*numberOfSizes);
793 }
794 }
795
796 else if (image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY
797 || image_type == CL_MEM_OBJECT_IMAGE3D)
798 {
799
800 // Iterate over dimensions, finding sizes for the non-fixed dimension
801 for (int fixed_dim = 0; fixed_dim < 3; ++fixed_dim)
802 {
803
804 // Determine the size of the fixed dimension
805 double M = maximum_sizes[fixed_dim];
806 double A = max_pixels;
807
808 // Find two other dimensions, x0 and x1
809 int x0_dim = (fixed_dim == 0) ? 1 : 0;
810 int x1_dim = (fixed_dim == 2) ? 1 : 2;
811
812 // Choose two other sizes for these dimensions
813 double x0 = fmin(fmin(A / M, maximum_sizes[x0_dim]),
814 other_sizes[(other_size++) % num_other_sizes]);
815 // GPUs have certain restrictions on minimum width (row alignment)
816 // of images which has given us issues testing small widths in this
817 // test (say we set width to 3 for testing, and compute size based
818 // on this width and decide it fits within vram ... but GPU driver
819 // decides that, due to row alignment requirements, it has to use
820 // width of 16 which doesnt fit in vram). For this purpose we are
821 // not testing width < 16 for this test.
822 if (x0_dim == 0 && x0 < 16) x0 = 16;
823 double x1 = fmin(fmin(A / M / x0, maximum_sizes[x1_dim]),
824 other_sizes[(other_size++) % num_other_sizes]);
825
826 // Valid image sizes cannot be below 1. Due to the workaround for
827 // the xo_dim where x0 is overidden to 16 there might not be enough
828 // space left for x1 dimension. This could be a fractional 0.x size
829 // that when cast to integer would result in a value 0. In these
830 // cases we clamp the size to a minimum of 1.
831 if (x1 < 1) x1 = 1;
832
833 // M and x0 cannot be '0' as they derive from clDeviceInfo calls
834 assert(x0 > 0 && M > 0);
835
836 // Store the size
837 sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
838 sizes[(*numberOfSizes)][x0_dim] = (size_t)x0;
839 sizes[(*numberOfSizes)][x1_dim] = (size_t)x1;
840 ++(*numberOfSizes);
841 }
842 }
843
844 // Log the results
845 for (int j = 0; j < (int)(*numberOfSizes); j++)
846 {
847 switch (image_type)
848 {
849 case CL_MEM_OBJECT_IMAGE1D:
850 log_info(" size[%d] = [%ld] (%g MB image)\n", j, sizes[j][0],
851 raw_pixel_size * sizes[j][0] * sizes[j][1]
852 * sizes[j][2] / (1024.0 * 1024.0));
853 break;
854 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
855 case CL_MEM_OBJECT_IMAGE2D:
856 log_info(" size[%d] = [%ld %ld] (%g MB image)\n", j,
857 sizes[j][0], sizes[j][1],
858 raw_pixel_size * sizes[j][0] * sizes[j][1]
859 * sizes[j][2] / (1024.0 * 1024.0));
860 break;
861 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
862 case CL_MEM_OBJECT_IMAGE3D:
863 log_info(" size[%d] = [%ld %ld %ld] (%g MB image)\n", j,
864 sizes[j][0], sizes[j][1], sizes[j][2],
865 raw_pixel_size * sizes[j][0] * sizes[j][1]
866 * sizes[j][2] / (1024.0 * 1024.0));
867 break;
868 }
869 }
870 }
871
get_max_absolute_error(const cl_image_format * format,image_sampler_data * sampler)872 float get_max_absolute_error(const cl_image_format *format,
873 image_sampler_data *sampler)
874 {
875 if (sampler->filter_mode == CL_FILTER_NEAREST) return 0.0f;
876
877 switch (format->image_channel_data_type)
878 {
879 case CL_SNORM_INT8: return 1.0f / 127.0f;
880 case CL_UNORM_INT8: return 1.0f / 255.0f;
881 case CL_UNORM_INT16: return 1.0f / 65535.0f;
882 case CL_SNORM_INT16: return 1.0f / 32767.0f;
883 case CL_FLOAT: return CL_FLT_MIN;
884 #ifdef CL_SFIXED14_APPLE
885 case CL_SFIXED14_APPLE: return 0x1.0p-14f;
886 #endif
887 default: return 0.0f;
888 }
889 }
890
get_max_relative_error(const cl_image_format * format,image_sampler_data * sampler,int is3D,int isLinearFilter)891 float get_max_relative_error(const cl_image_format *format,
892 image_sampler_data *sampler, int is3D,
893 int isLinearFilter)
894 {
895 float maxError = 0.0f;
896 float sampleCount = 1.0f;
897 if (isLinearFilter) sampleCount = is3D ? 8.0f : 4.0f;
898
899 // Note that the ULP is defined here as the unit in the last place of the
900 // maximum magnitude sample used for filtering.
901
902 // Section 8.3
903 switch (format->image_channel_data_type)
904 {
905 // The spec allows 2 ulps of error for normalized formats
906 case CL_SNORM_INT8:
907 case CL_UNORM_INT8:
908 case CL_SNORM_INT16:
909 case CL_UNORM_INT16:
910 case CL_UNORM_SHORT_565:
911 case CL_UNORM_SHORT_555:
912 case CL_UNORM_INT_101010:
913 // Maximum sampling error for round to zero normalization based on
914 // multiplication by reciprocal (using reciprocal generated in
915 // round to +inf mode, so that 1.0 matches spec)
916 maxError = 2 * FLT_EPSILON * sampleCount;
917 break;
918
919 // If the implementation supports these formats then it will have to
920 // allow rounding error here too, because not all 32-bit ints are
921 // exactly representable in float
922 case CL_SIGNED_INT32:
923 case CL_UNSIGNED_INT32: maxError = 1 * FLT_EPSILON; break;
924 }
925
926
927 // Section 8.2
928 if (sampler->addressing_mode == CL_ADDRESS_REPEAT
929 || sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT
930 || sampler->filter_mode != CL_FILTER_NEAREST
931 || sampler->normalized_coords)
932 #if defined(__APPLE__)
933 {
934 if (sampler->filter_mode != CL_FILTER_NEAREST)
935 {
936 // The maximum
937 if (gDeviceType == CL_DEVICE_TYPE_GPU)
938 // Some GPUs ain't so accurate
939 maxError += MAKE_HEX_FLOAT(0x1.0p-4f, 0x1L, -4);
940 else
941 // The standard method of 2d linear filtering delivers 4.0 ulps
942 // of error in round to nearest (8 in rtz).
943 maxError += 4.0f * FLT_EPSILON;
944 }
945 else
946 // normalized coordinates will introduce some error into the
947 // fractional part of the address, affecting results
948 maxError += 4.0f * FLT_EPSILON;
949 }
950 #else
951 {
952 #if !defined(_WIN32)
953 #warning Implementations will likely wish to pick a max allowable sampling error policy here that is better than the spec
954 #endif
955 // The spec allows linear filters to return any result most of the time.
956 // That's fine for implementations but a problem for testing. After all
957 // users aren't going to like garbage images. We have "picked a number"
958 // here that we are going to attempt to conform to. Implementations are
959 // free to pick another number, like infinity, if they like.
960 // We picked a number for you, to provide /some/ sanity
961 maxError = MAKE_HEX_FLOAT(0x1.0p-7f, 0x1L, -7);
962 // ...but this is what the spec allows:
963 // maxError = INFINITY;
964 // Please feel free to pick any positive number. (NaN wont work.)
965 }
966 #endif
967
968 // The error calculation itself can introduce error
969 maxError += FLT_EPSILON * 2;
970
971 return maxError;
972 }
973
get_format_max_int(const cl_image_format * format)974 size_t get_format_max_int(const cl_image_format *format)
975 {
976 switch (format->image_channel_data_type)
977 {
978 case CL_SNORM_INT8:
979 case CL_SIGNED_INT8: return 127;
980 case CL_UNORM_INT8:
981 case CL_UNSIGNED_INT8: return 255;
982
983 case CL_SNORM_INT16:
984 case CL_SIGNED_INT16: return 32767;
985
986 case CL_UNORM_INT16:
987 case CL_UNSIGNED_INT16: return 65535;
988
989 case CL_SIGNED_INT32: return 2147483647L;
990
991 case CL_UNSIGNED_INT32: return 4294967295LL;
992
993 case CL_UNORM_SHORT_565:
994 case CL_UNORM_SHORT_555: return 31;
995
996 case CL_UNORM_INT_101010: return 1023;
997
998 case CL_HALF_FLOAT: return 1 << 10;
999
1000 #ifdef CL_SFIXED14_APPLE
1001 case CL_SFIXED14_APPLE: return 16384;
1002 #endif
1003 default: return 0;
1004 }
1005 }
1006
get_format_min_int(const cl_image_format * format)1007 int get_format_min_int(const cl_image_format *format)
1008 {
1009 switch (format->image_channel_data_type)
1010 {
1011 case CL_SNORM_INT8:
1012 case CL_SIGNED_INT8: return -128;
1013 case CL_UNORM_INT8:
1014 case CL_UNSIGNED_INT8: return 0;
1015
1016 case CL_SNORM_INT16:
1017 case CL_SIGNED_INT16: return -32768;
1018
1019 case CL_UNORM_INT16:
1020 case CL_UNSIGNED_INT16: return 0;
1021
1022 case CL_SIGNED_INT32: return -2147483648LL;
1023
1024 case CL_UNSIGNED_INT32: return 0;
1025
1026 case CL_UNORM_SHORT_565:
1027 case CL_UNORM_SHORT_555:
1028 case CL_UNORM_INT_101010: return 0;
1029
1030 case CL_HALF_FLOAT: return -(1 << 10);
1031
1032 #ifdef CL_SFIXED14_APPLE
1033 case CL_SFIXED14_APPLE: return -16384;
1034 #endif
1035
1036 default: return 0;
1037 }
1038 }
1039
convert_float_to_half(float f)1040 cl_half convert_float_to_half(float f)
1041 {
1042 switch (gFloatToHalfRoundingMode)
1043 {
1044 case kRoundToNearestEven: return cl_half_from_float(f, CL_HALF_RTE);
1045 case kRoundTowardZero: return cl_half_from_float(f, CL_HALF_RTZ);
1046 default:
1047 log_error("ERROR: Test internal error -- unhandled or unknown "
1048 "float->half rounding mode.\n");
1049 exit(-1);
1050 return 0xffff;
1051 }
1052 }
1053
get_image_size(image_descriptor const * imageInfo)1054 cl_ulong get_image_size(image_descriptor const *imageInfo)
1055 {
1056 cl_ulong imageSize;
1057
1058 // Assumes rowPitch and slicePitch are always correctly defined
1059 if (/*gTestMipmaps*/ imageInfo->num_mip_levels > 1)
1060 {
1061 imageSize = (size_t)compute_mipmapped_image_size(*imageInfo);
1062 }
1063 else
1064 {
1065 switch (imageInfo->type)
1066 {
1067 case CL_MEM_OBJECT_IMAGE1D: imageSize = imageInfo->rowPitch; break;
1068 case CL_MEM_OBJECT_IMAGE2D:
1069 imageSize = imageInfo->height * imageInfo->rowPitch;
1070 break;
1071 case CL_MEM_OBJECT_IMAGE3D:
1072 imageSize = imageInfo->depth * imageInfo->slicePitch;
1073 break;
1074 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1075 imageSize = imageInfo->arraySize * imageInfo->slicePitch;
1076 break;
1077 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1078 imageSize = imageInfo->arraySize * imageInfo->slicePitch;
1079 break;
1080 default:
1081 log_error("ERROR: Cannot identify image type %x\n",
1082 imageInfo->type);
1083 abort();
1084 }
1085 }
1086 return imageSize;
1087 }
1088
1089 // Calculate image size in megabytes (strictly, mebibytes). Result is rounded
1090 // up.
get_image_size_mb(image_descriptor const * imageInfo)1091 cl_ulong get_image_size_mb(image_descriptor const *imageInfo)
1092 {
1093 cl_ulong imageSize = get_image_size(imageInfo);
1094 cl_ulong mb = imageSize / (1024 * 1024);
1095 if (imageSize % (1024 * 1024) > 0)
1096 {
1097 mb += 1;
1098 }
1099 return mb;
1100 }
1101
1102
1103 uint64_t gRoundingStartValue = 0;
1104
1105
escape_inf_nan_values(char * data,size_t allocSize)1106 void escape_inf_nan_values(char *data, size_t allocSize)
1107 {
1108 // filter values with 8 not-quite-highest bits
1109 unsigned int *intPtr = (unsigned int *)data;
1110 for (size_t i = 0; i<allocSize>> 2; i++)
1111 {
1112 if ((intPtr[i] & 0x7F800000) == 0x7F800000) intPtr[i] ^= 0x40000000;
1113 }
1114
1115 // Ditto with half floats (16-bit numbers with the 5 not-quite-highest bits
1116 // = 0x7C00 are special)
1117 unsigned short *shortPtr = (unsigned short *)data;
1118 for (size_t i = 0; i<allocSize>> 1; i++)
1119 {
1120 if ((shortPtr[i] & 0x7C00) == 0x7C00) shortPtr[i] ^= 0x4000;
1121 }
1122 }
1123
generate_random_image_data(image_descriptor * imageInfo,BufferOwningPtr<char> & P,MTdata d)1124 char *generate_random_image_data(image_descriptor *imageInfo,
1125 BufferOwningPtr<char> &P, MTdata d)
1126 {
1127 size_t allocSize = get_image_size(imageInfo);
1128 size_t pixelRowBytes = imageInfo->width * get_pixel_size(imageInfo->format);
1129 size_t i;
1130
1131 if (imageInfo->num_mip_levels > 1)
1132 allocSize = compute_mipmapped_image_size(*imageInfo);
1133
1134 #if defined(__APPLE__)
1135 char *data = NULL;
1136 if (gDeviceType == CL_DEVICE_TYPE_CPU)
1137 {
1138 size_t mapSize = ((allocSize + 4095L) & -4096L) + 8192;
1139
1140 void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE,
1141 MAP_ANON | MAP_PRIVATE, 0, 0);
1142 intptr_t data_end = (intptr_t)map + mapSize - 4096;
1143 data = (char *)(data_end - (intptr_t)allocSize);
1144
1145 mprotect(map, 4096, PROT_NONE);
1146 mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
1147 P.reset(data, map, mapSize, allocSize);
1148 }
1149 else
1150 {
1151 data = (char *)malloc(allocSize);
1152 P.reset(data, NULL, 0, allocSize);
1153 }
1154 #else
1155 P.reset(NULL); // Free already allocated memory first, then try to allocate
1156 // new block.
1157 char *data =
1158 (char *)align_malloc(allocSize, get_pixel_alignment(imageInfo->format));
1159 P.reset(data, NULL, 0, allocSize, true);
1160 #endif
1161
1162 if (data == NULL)
1163 {
1164 log_error("ERROR: Unable to malloc %lu bytes for "
1165 "generate_random_image_data\n",
1166 allocSize);
1167 return 0;
1168 }
1169
1170 if (gTestRounding)
1171 {
1172 // Special case: fill with a ramp from 0 to the size of the type
1173 size_t typeSize = get_format_type_size(imageInfo->format);
1174 switch (typeSize)
1175 {
1176 case 1: {
1177 char *ptr = data;
1178 for (i = 0; i < allocSize; i++)
1179 ptr[i] = (cl_char)(i + gRoundingStartValue);
1180 }
1181 break;
1182 case 2: {
1183 cl_short *ptr = (cl_short *)data;
1184 for (i = 0; i < allocSize / 2; i++)
1185 ptr[i] = (cl_short)(i + gRoundingStartValue);
1186 }
1187 break;
1188 case 4: {
1189 cl_int *ptr = (cl_int *)data;
1190 for (i = 0; i < allocSize / 4; i++)
1191 ptr[i] = (cl_int)(i + gRoundingStartValue);
1192 }
1193 break;
1194 }
1195
1196 // Note: inf or nan float values would cause problems, although we don't
1197 // know this will actually be a float, so we just know what to look for
1198 escape_inf_nan_values(data, allocSize);
1199 return data;
1200 }
1201
1202 // Otherwise, we should be able to just fill with random bits no matter what
1203 cl_uint *p = (cl_uint *)data;
1204 for (i = 0; i + 4 <= allocSize; i += 4) p[i / 4] = genrand_int32(d);
1205
1206 for (; i < allocSize; i++) data[i] = genrand_int32(d);
1207
1208 // Note: inf or nan float values would cause problems, although we don't
1209 // know this will actually be a float, so we just know what to look for
1210 escape_inf_nan_values(data, allocSize);
1211
1212 if (/*!gTestMipmaps*/ imageInfo->num_mip_levels < 2)
1213 {
1214 // Fill unused edges with -1, NaN for float
1215 if (imageInfo->rowPitch > pixelRowBytes)
1216 {
1217 size_t height = 0;
1218
1219 switch (imageInfo->type)
1220 {
1221 case CL_MEM_OBJECT_IMAGE2D:
1222 case CL_MEM_OBJECT_IMAGE3D:
1223 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1224 height = imageInfo->height;
1225 break;
1226 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1227 height = imageInfo->arraySize;
1228 break;
1229 }
1230
1231 // Fill in the row padding regions
1232 for (i = 0; i < height; i++)
1233 {
1234 size_t offset = i * imageInfo->rowPitch + pixelRowBytes;
1235 size_t length = imageInfo->rowPitch - pixelRowBytes;
1236 memset(data + offset, 0xff, length);
1237 }
1238 }
1239
1240 // Fill in the slice padding regions, if necessary:
1241
1242 size_t slice_dimension = imageInfo->height;
1243 if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
1244 {
1245 slice_dimension = imageInfo->arraySize;
1246 }
1247
1248 if (imageInfo->slicePitch > slice_dimension * imageInfo->rowPitch)
1249 {
1250 size_t depth = 0;
1251 switch (imageInfo->type)
1252 {
1253 case CL_MEM_OBJECT_IMAGE2D:
1254 case CL_MEM_OBJECT_IMAGE3D: depth = imageInfo->depth; break;
1255 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1256 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1257 depth = imageInfo->arraySize;
1258 break;
1259 }
1260
1261 for (i = 0; i < depth; i++)
1262 {
1263 size_t offset = i * imageInfo->slicePitch
1264 + slice_dimension * imageInfo->rowPitch;
1265 size_t length = imageInfo->slicePitch
1266 - slice_dimension * imageInfo->rowPitch;
1267 memset(data + offset, 0xff, length);
1268 }
1269 }
1270 }
1271
1272 return data;
1273 }
1274
1275 #define CLAMP_FLOAT(v) (fmaxf(fminf(v, 1.f), -1.f))
1276
1277
read_image_pixel_float(void * imageData,image_descriptor * imageInfo,int x,int y,int z,float * outData,int lod)1278 void read_image_pixel_float(void *imageData, image_descriptor *imageInfo, int x,
1279 int y, int z, float *outData, int lod)
1280 {
1281 size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
1282 depth_lod = imageInfo->depth;
1283 size_t slice_pitch_lod = 0, row_pitch_lod = 0;
1284
1285 if (imageInfo->num_mip_levels > 1)
1286 {
1287 switch (imageInfo->type)
1288 {
1289 case CL_MEM_OBJECT_IMAGE3D:
1290 depth_lod =
1291 (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
1292 case CL_MEM_OBJECT_IMAGE2D:
1293 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1294 height_lod =
1295 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
1296 default:
1297 width_lod =
1298 (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
1299 }
1300 row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
1301 if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
1302 slice_pitch_lod = row_pitch_lod;
1303 else if (imageInfo->type == CL_MEM_OBJECT_IMAGE3D
1304 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
1305 slice_pitch_lod = row_pitch_lod * height_lod;
1306 }
1307 else
1308 {
1309 row_pitch_lod = imageInfo->rowPitch;
1310 slice_pitch_lod = imageInfo->slicePitch;
1311 }
1312 if (x < 0 || y < 0 || z < 0 || x >= (int)width_lod
1313 || (height_lod != 0 && y >= (int)height_lod)
1314 || (depth_lod != 0 && z >= (int)depth_lod)
1315 || (imageInfo->arraySize != 0 && z >= (int)imageInfo->arraySize))
1316 {
1317 outData[0] = outData[1] = outData[2] = outData[3] = 0;
1318 if (!has_alpha(imageInfo->format)) outData[3] = 1;
1319 return;
1320 }
1321
1322 const cl_image_format *format = imageInfo->format;
1323
1324 unsigned int i;
1325 float tempData[4];
1326
1327 // Advance to the right spot
1328 char *ptr = (char *)imageData;
1329 size_t pixelSize = get_pixel_size(format);
1330
1331 ptr += z * slice_pitch_lod + y * row_pitch_lod + x * pixelSize;
1332
1333 // OpenCL only supports reading floats from certain formats
1334 size_t channelCount = get_format_channel_count(format);
1335 switch (format->image_channel_data_type)
1336 {
1337 case CL_SNORM_INT8: {
1338 cl_char *dPtr = (cl_char *)ptr;
1339 for (i = 0; i < channelCount; i++)
1340 tempData[i] = CLAMP_FLOAT((float)dPtr[i] / 127.0f);
1341 break;
1342 }
1343
1344 case CL_UNORM_INT8: {
1345 unsigned char *dPtr = (unsigned char *)ptr;
1346 for (i = 0; i < channelCount; i++)
1347 {
1348 if ((is_sRGBA_order(imageInfo->format->image_channel_order))
1349 && i < 3) // only RGB need to be converted for sRGBA
1350 tempData[i] = (float)sRGBunmap((float)dPtr[i] / 255.0f);
1351 else
1352 tempData[i] = (float)dPtr[i] / 255.0f;
1353 }
1354 break;
1355 }
1356
1357 case CL_SIGNED_INT8: {
1358 cl_char *dPtr = (cl_char *)ptr;
1359 for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1360 break;
1361 }
1362
1363 case CL_UNSIGNED_INT8: {
1364 cl_uchar *dPtr = (cl_uchar *)ptr;
1365 for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1366 break;
1367 }
1368
1369 case CL_SNORM_INT16: {
1370 cl_short *dPtr = (cl_short *)ptr;
1371 for (i = 0; i < channelCount; i++)
1372 tempData[i] = CLAMP_FLOAT((float)dPtr[i] / 32767.0f);
1373 break;
1374 }
1375
1376 case CL_UNORM_INT16: {
1377 cl_ushort *dPtr = (cl_ushort *)ptr;
1378 for (i = 0; i < channelCount; i++)
1379 tempData[i] = (float)dPtr[i] / 65535.0f;
1380 break;
1381 }
1382
1383 case CL_SIGNED_INT16: {
1384 cl_short *dPtr = (cl_short *)ptr;
1385 for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1386 break;
1387 }
1388
1389 case CL_UNSIGNED_INT16: {
1390 cl_ushort *dPtr = (cl_ushort *)ptr;
1391 for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1392 break;
1393 }
1394
1395 case CL_HALF_FLOAT: {
1396 cl_half *dPtr = (cl_half *)ptr;
1397 for (i = 0; i < channelCount; i++)
1398 tempData[i] = cl_half_to_float(dPtr[i]);
1399 break;
1400 }
1401
1402 case CL_SIGNED_INT32: {
1403 cl_int *dPtr = (cl_int *)ptr;
1404 for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1405 break;
1406 }
1407
1408 case CL_UNSIGNED_INT32: {
1409 cl_uint *dPtr = (cl_uint *)ptr;
1410 for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1411 break;
1412 }
1413
1414 case CL_UNORM_SHORT_565: {
1415 cl_ushort *dPtr = (cl_ushort *)ptr;
1416 tempData[0] = (float)(dPtr[0] >> 11) / (float)31;
1417 tempData[1] = (float)((dPtr[0] >> 5) & 63) / (float)63;
1418 tempData[2] = (float)(dPtr[0] & 31) / (float)31;
1419 break;
1420 }
1421
1422 case CL_UNORM_SHORT_555: {
1423 cl_ushort *dPtr = (cl_ushort *)ptr;
1424 tempData[0] = (float)((dPtr[0] >> 10) & 31) / (float)31;
1425 tempData[1] = (float)((dPtr[0] >> 5) & 31) / (float)31;
1426 tempData[2] = (float)(dPtr[0] & 31) / (float)31;
1427 break;
1428 }
1429
1430 case CL_UNORM_INT_101010: {
1431 cl_uint *dPtr = (cl_uint *)ptr;
1432 tempData[0] = (float)((dPtr[0] >> 20) & 0x3ff) / (float)1023;
1433 tempData[1] = (float)((dPtr[0] >> 10) & 0x3ff) / (float)1023;
1434 tempData[2] = (float)(dPtr[0] & 0x3ff) / (float)1023;
1435 break;
1436 }
1437
1438 case CL_FLOAT: {
1439 float *dPtr = (float *)ptr;
1440 for (i = 0; i < channelCount; i++) tempData[i] = (float)dPtr[i];
1441 break;
1442 }
1443 #ifdef CL_SFIXED14_APPLE
1444 case CL_SFIXED14_APPLE: {
1445 cl_ushort *dPtr = (cl_ushort *)ptr;
1446 for (i = 0; i < channelCount; i++)
1447 tempData[i] = ((int)dPtr[i] - 16384) * 0x1.0p-14f;
1448 break;
1449 }
1450 #endif
1451 }
1452
1453
1454 outData[0] = outData[1] = outData[2] = 0;
1455 outData[3] = 1;
1456
1457 switch (format->image_channel_order)
1458 {
1459 case CL_A: outData[3] = tempData[0]; break;
1460 case CL_R:
1461 case CL_Rx: outData[0] = tempData[0]; break;
1462 case CL_RA:
1463 outData[0] = tempData[0];
1464 outData[3] = tempData[1];
1465 break;
1466 case CL_RG:
1467 case CL_RGx:
1468 outData[0] = tempData[0];
1469 outData[1] = tempData[1];
1470 break;
1471 case CL_RGB:
1472 case CL_RGBx:
1473 case CL_sRGB:
1474 case CL_sRGBx:
1475 outData[0] = tempData[0];
1476 outData[1] = tempData[1];
1477 outData[2] = tempData[2];
1478 break;
1479 case CL_RGBA:
1480 outData[0] = tempData[0];
1481 outData[1] = tempData[1];
1482 outData[2] = tempData[2];
1483 outData[3] = tempData[3];
1484 break;
1485 case CL_ARGB:
1486 outData[0] = tempData[1];
1487 outData[1] = tempData[2];
1488 outData[2] = tempData[3];
1489 outData[3] = tempData[0];
1490 break;
1491 case CL_ABGR:
1492 outData[0] = tempData[3];
1493 outData[1] = tempData[2];
1494 outData[2] = tempData[1];
1495 outData[3] = tempData[0];
1496 break;
1497 case CL_BGRA:
1498 case CL_sBGRA:
1499 outData[0] = tempData[2];
1500 outData[1] = tempData[1];
1501 outData[2] = tempData[0];
1502 outData[3] = tempData[3];
1503 break;
1504 case CL_INTENSITY:
1505 outData[0] = tempData[0];
1506 outData[1] = tempData[0];
1507 outData[2] = tempData[0];
1508 outData[3] = tempData[0];
1509 break;
1510 case CL_LUMINANCE:
1511 outData[0] = tempData[0];
1512 outData[1] = tempData[0];
1513 outData[2] = tempData[0];
1514 break;
1515 #ifdef CL_1RGB_APPLE
1516 case CL_1RGB_APPLE:
1517 outData[0] = tempData[1];
1518 outData[1] = tempData[2];
1519 outData[2] = tempData[3];
1520 outData[3] = 1.0f;
1521 break;
1522 #endif
1523 #ifdef CL_BGR1_APPLE
1524 case CL_BGR1_APPLE:
1525 outData[0] = tempData[2];
1526 outData[1] = tempData[1];
1527 outData[2] = tempData[0];
1528 outData[3] = 1.0f;
1529 break;
1530 #endif
1531 case CL_sRGBA:
1532 outData[0] = tempData[0];
1533 outData[1] = tempData[1];
1534 outData[2] = tempData[2];
1535 outData[3] = tempData[3];
1536 break;
1537 case CL_DEPTH: outData[0] = tempData[0]; break;
1538 default:
1539 log_error("Invalid format:");
1540 print_header(format, true);
1541 break;
1542 }
1543 }
1544
read_image_pixel_float(void * imageData,image_descriptor * imageInfo,int x,int y,int z,float * outData)1545 void read_image_pixel_float(void *imageData, image_descriptor *imageInfo, int x,
1546 int y, int z, float *outData)
1547 {
1548 read_image_pixel_float(imageData, imageInfo, x, y, z, outData, 0);
1549 }
1550
get_integer_coords(float x,float y,float z,size_t width,size_t height,size_t depth,image_sampler_data * imageSampler,image_descriptor * imageInfo,int & outX,int & outY,int & outZ)1551 bool get_integer_coords(float x, float y, float z, size_t width, size_t height,
1552 size_t depth, image_sampler_data *imageSampler,
1553 image_descriptor *imageInfo, int &outX, int &outY,
1554 int &outZ)
1555 {
1556 return get_integer_coords_offset(x, y, z, 0.0f, 0.0f, 0.0f, width, height,
1557 depth, imageSampler, imageInfo, outX, outY,
1558 outZ);
1559 }
1560
get_integer_coords_offset(float x,float y,float z,float xAddressOffset,float yAddressOffset,float zAddressOffset,size_t width,size_t height,size_t depth,image_sampler_data * imageSampler,image_descriptor * imageInfo,int & outX,int & outY,int & outZ)1561 bool get_integer_coords_offset(float x, float y, float z, float xAddressOffset,
1562 float yAddressOffset, float zAddressOffset,
1563 size_t width, size_t height, size_t depth,
1564 image_sampler_data *imageSampler,
1565 image_descriptor *imageInfo, int &outX,
1566 int &outY, int &outZ)
1567 {
1568 AddressFn adFn = sAddressingTable[imageSampler];
1569
1570 float refX = floorf(x), refY = floorf(y), refZ = floorf(z);
1571
1572 // Handle sampler-directed coordinate normalization + clamping. Note that
1573 // the array coordinate for image array types is expected to be
1574 // unnormalized, and is clamped to 0..arraySize-1.
1575 if (imageSampler->normalized_coords)
1576 {
1577 switch (imageSampler->addressing_mode)
1578 {
1579 case CL_ADDRESS_REPEAT:
1580 x = RepeatNormalizedAddressFn(x, width);
1581 if (height != 0)
1582 {
1583 if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
1584 y = RepeatNormalizedAddressFn(y, height);
1585 }
1586 if (depth != 0)
1587 {
1588 if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
1589 z = RepeatNormalizedAddressFn(z, depth);
1590 }
1591
1592 if (xAddressOffset != 0.0)
1593 {
1594 // Add in the offset
1595 x += xAddressOffset;
1596 // Handle wrapping
1597 if (x > width) x -= (float)width;
1598 if (x < 0) x += (float)width;
1599 }
1600 if ((yAddressOffset != 0.0)
1601 && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY))
1602 {
1603 // Add in the offset
1604 y += yAddressOffset;
1605 // Handle wrapping
1606 if (y > height) y -= (float)height;
1607 if (y < 0) y += (float)height;
1608 }
1609 if ((zAddressOffset != 0.0)
1610 && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY))
1611 {
1612 // Add in the offset
1613 z += zAddressOffset;
1614 // Handle wrapping
1615 if (z > depth) z -= (float)depth;
1616 if (z < 0) z += (float)depth;
1617 }
1618 break;
1619
1620 case CL_ADDRESS_MIRRORED_REPEAT:
1621 x = MirroredRepeatNormalizedAddressFn(x, width);
1622 if (height != 0)
1623 {
1624 if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
1625 y = MirroredRepeatNormalizedAddressFn(y, height);
1626 }
1627 if (depth != 0)
1628 {
1629 if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
1630 z = MirroredRepeatNormalizedAddressFn(z, depth);
1631 }
1632
1633 if (xAddressOffset != 0.0)
1634 {
1635 float temp = x + xAddressOffset;
1636 if (temp > (float)width)
1637 temp = (float)width - (temp - (float)width);
1638 x = fabsf(temp);
1639 }
1640 if ((yAddressOffset != 0.0)
1641 && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY))
1642 {
1643 float temp = y + yAddressOffset;
1644 if (temp > (float)height)
1645 temp = (float)height - (temp - (float)height);
1646 y = fabsf(temp);
1647 }
1648 if ((zAddressOffset != 0.0)
1649 && (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY))
1650 {
1651 float temp = z + zAddressOffset;
1652 if (temp > (float)depth)
1653 temp = (float)depth - (temp - (float)depth);
1654 z = fabsf(temp);
1655 }
1656 break;
1657
1658 default:
1659 // Also, remultiply to the original coords. This simulates any
1660 // truncation in the pass to OpenCL
1661 x *= (float)width;
1662 x += xAddressOffset;
1663
1664 if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
1665 {
1666 y *= (float)height;
1667 y += yAddressOffset;
1668 }
1669
1670 if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
1671 {
1672 z *= (float)depth;
1673 z += zAddressOffset;
1674 }
1675 break;
1676 }
1677 }
1678
1679 // At this point, we're dealing with non-normalized coordinates.
1680
1681 outX = adFn(floorf(x), width);
1682
1683 // 1D and 2D arrays require special care for the index coordinate:
1684
1685 switch (imageInfo->type)
1686 {
1687 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1688 outY = calculate_array_index(y, (float)imageInfo->arraySize - 1.0f);
1689 outZ = 0.0f; /* don't care! */
1690 break;
1691 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1692 outY = adFn(floorf(y), height);
1693 outZ = calculate_array_index(z, (float)imageInfo->arraySize - 1.0f);
1694 break;
1695 default:
1696 // legacy path:
1697 if (height != 0) outY = adFn(floorf(y), height);
1698 if (depth != 0) outZ = adFn(floorf(z), depth);
1699 }
1700
1701 return !((int)refX == outX && (int)refY == outY && (int)refZ == outZ);
1702 }
1703
frac(float a)1704 static float frac(float a) { return a - floorf(a); }
1705
1706 static inline void pixelMax(const float a[4], const float b[4], float *results);
pixelMax(const float a[4],const float b[4],float * results)1707 static inline void pixelMax(const float a[4], const float b[4], float *results)
1708 {
1709 for (int i = 0; i < 4; i++) results[i] = errMax(fabsf(a[i]), fabsf(b[i]));
1710 }
1711
1712 // If containsDenorms is NULL, flush denorms to zero
1713 // if containsDenorms is not NULL, record whether there are any denorms
1714 static inline void check_for_denorms(float a[4], int *containsDenorms);
check_for_denorms(float a[4],int * containsDenorms)1715 static inline void check_for_denorms(float a[4], int *containsDenorms)
1716 {
1717 if (NULL == containsDenorms)
1718 {
1719 for (int i = 0; i < 4; i++)
1720 {
1721 if (IsFloatSubnormal(a[i])) a[i] = copysignf(0.0f, a[i]);
1722 }
1723 }
1724 else
1725 {
1726 for (int i = 0; i < 4; i++)
1727 {
1728 if (IsFloatSubnormal(a[i]))
1729 {
1730 *containsDenorms = 1;
1731 break;
1732 }
1733 }
1734 }
1735 }
1736
calculate_array_index(float coord,float extent)1737 inline float calculate_array_index(float coord, float extent)
1738 {
1739 // from Section 8.4 of the 1.2 Spec 'Selecting an Image from an Image Array'
1740 //
1741 // given coordinate 'w' that represents an index:
1742 // layer_index = clamp( rint(w), 0, image_array_size - 1)
1743
1744 float ret = rintf(coord);
1745 ret = ret > extent ? extent : ret;
1746 ret = ret < 0.0f ? 0.0f : ret;
1747
1748 return ret;
1749 }
1750
1751 /*
1752 * Utility function to unnormalized a coordinate given a particular sampler.
1753 *
1754 * name - the name of the coordinate, used for verbose debugging only
1755 * coord - the coordinate requiring unnormalization
1756 * offset - an addressing offset to be added to the coordinate
1757 * extent - the max value for this coordinate (e.g. width for x)
1758 */
unnormalize_coordinate(const char * name,float coord,float offset,float extent,cl_addressing_mode addressing_mode,int verbose)1759 static float unnormalize_coordinate(const char *name, float coord, float offset,
1760 float extent,
1761 cl_addressing_mode addressing_mode,
1762 int verbose)
1763 {
1764 float ret = 0.0f;
1765
1766 switch (addressing_mode)
1767 {
1768 case CL_ADDRESS_REPEAT:
1769 ret = RepeatNormalizedAddressFn(coord, extent);
1770
1771 if (verbose)
1772 {
1773 log_info("\tRepeat filter denormalizes %s (%f) to %f\n", name,
1774 coord, ret);
1775 }
1776
1777 if (offset != 0.0)
1778 {
1779 // Add in the offset, and handle wrapping.
1780 ret += offset;
1781 if (ret > extent) ret -= extent;
1782 if (ret < 0.0) ret += extent;
1783 }
1784
1785 if (verbose && offset != 0.0f)
1786 {
1787 log_info("\tAddress offset of %f added to get %f\n", offset,
1788 ret);
1789 }
1790 break;
1791
1792 case CL_ADDRESS_MIRRORED_REPEAT:
1793 ret = MirroredRepeatNormalizedAddressFn(coord, extent);
1794
1795 if (verbose)
1796 {
1797 log_info(
1798 "\tMirrored repeat filter denormalizes %s (%f) to %f\n",
1799 name, coord, ret);
1800 }
1801
1802 if (offset != 0.0)
1803 {
1804 float temp = ret + offset;
1805 if (temp > extent) temp = extent - (temp - extent);
1806 ret = fabsf(temp);
1807 }
1808
1809 if (verbose && offset != 0.0f)
1810 {
1811 log_info("\tAddress offset of %f added to get %f\n", offset,
1812 ret);
1813 }
1814 break;
1815
1816 default:
1817
1818 ret = coord * extent;
1819
1820 if (verbose)
1821 {
1822 log_info("\tFilter denormalizes %s to %f (%f * %f)\n", name,
1823 ret, coord, extent);
1824 }
1825
1826 ret += offset;
1827
1828 if (verbose && offset != 0.0f)
1829 {
1830 log_info("\tAddress offset of %f added to get %f\n", offset,
1831 ret);
1832 }
1833 }
1834
1835 return ret;
1836 }
1837
1838 FloatPixel
sample_image_pixel_float(void * imageData,image_descriptor * imageInfo,float x,float y,float z,image_sampler_data * imageSampler,float * outData,int verbose,int * containsDenorms)1839 sample_image_pixel_float(void *imageData, image_descriptor *imageInfo, float x,
1840 float y, float z, image_sampler_data *imageSampler,
1841 float *outData, int verbose, int *containsDenorms)
1842 {
1843 return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f,
1844 0.0f, 0.0f, imageSampler, outData,
1845 verbose, containsDenorms);
1846 }
1847
1848 // returns max pixel value of the pixels touched
sample_image_pixel_float(void * imageData,image_descriptor * imageInfo,float x,float y,float z,image_sampler_data * imageSampler,float * outData,int verbose,int * containsDenorms,int lod)1849 FloatPixel sample_image_pixel_float(void *imageData,
1850 image_descriptor *imageInfo, float x,
1851 float y, float z,
1852 image_sampler_data *imageSampler,
1853 float *outData, int verbose,
1854 int *containsDenorms, int lod)
1855 {
1856 return sample_image_pixel_float_offset(imageData, imageInfo, x, y, z, 0.0f,
1857 0.0f, 0.0f, imageSampler, outData,
1858 verbose, containsDenorms, lod);
1859 }
sample_image_pixel_float_offset(void * imageData,image_descriptor * imageInfo,float x,float y,float z,float xAddressOffset,float yAddressOffset,float zAddressOffset,image_sampler_data * imageSampler,float * outData,int verbose,int * containsDenorms,int lod)1860 FloatPixel sample_image_pixel_float_offset(
1861 void *imageData, image_descriptor *imageInfo, float x, float y, float z,
1862 float xAddressOffset, float yAddressOffset, float zAddressOffset,
1863 image_sampler_data *imageSampler, float *outData, int verbose,
1864 int *containsDenorms, int lod)
1865 {
1866 AddressFn adFn = sAddressingTable[imageSampler];
1867 FloatPixel returnVal;
1868 size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
1869 depth_lod = imageInfo->depth;
1870 size_t slice_pitch_lod = 0, row_pitch_lod = 0;
1871
1872 if (imageInfo->num_mip_levels > 1)
1873 {
1874 switch (imageInfo->type)
1875 {
1876 case CL_MEM_OBJECT_IMAGE3D:
1877 depth_lod =
1878 (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
1879 case CL_MEM_OBJECT_IMAGE2D:
1880 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1881 height_lod =
1882 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
1883 default:
1884 width_lod =
1885 (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
1886 }
1887 row_pitch_lod = width_lod * get_pixel_size(imageInfo->format);
1888 if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
1889 slice_pitch_lod = row_pitch_lod;
1890 else if (imageInfo->type == CL_MEM_OBJECT_IMAGE3D
1891 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
1892 slice_pitch_lod = row_pitch_lod * height_lod;
1893 }
1894 else
1895 {
1896 slice_pitch_lod = imageInfo->slicePitch;
1897 row_pitch_lod = imageInfo->rowPitch;
1898 }
1899
1900 if (containsDenorms) *containsDenorms = 0;
1901
1902 if (imageSampler->normalized_coords)
1903 {
1904
1905 // We need to unnormalize our coordinates differently depending on
1906 // the image type, but 'x' is always processed the same way.
1907
1908 x = unnormalize_coordinate("x", x, xAddressOffset, (float)width_lod,
1909 imageSampler->addressing_mode, verbose);
1910
1911 switch (imageInfo->type)
1912 {
1913
1914 // The image array types require special care:
1915
1916 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1917 z = 0; // don't care -- unused for 1D arrays
1918 break;
1919
1920 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1921 y = unnormalize_coordinate(
1922 "y", y, yAddressOffset, (float)height_lod,
1923 imageSampler->addressing_mode, verbose);
1924 break;
1925
1926 // Everybody else:
1927
1928 default:
1929 y = unnormalize_coordinate(
1930 "y", y, yAddressOffset, (float)height_lod,
1931 imageSampler->addressing_mode, verbose);
1932 z = unnormalize_coordinate(
1933 "z", z, zAddressOffset, (float)depth_lod,
1934 imageSampler->addressing_mode, verbose);
1935 }
1936 }
1937 else if (verbose)
1938 {
1939
1940 switch (imageInfo->type)
1941 {
1942 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1943 log_info("Starting coordinate: %f, array index %f\n", x, y);
1944 break;
1945 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1946 log_info("Starting coordinate: %f, %f, array index %f\n", x, y,
1947 z);
1948 break;
1949 case CL_MEM_OBJECT_IMAGE1D:
1950 case CL_MEM_OBJECT_IMAGE1D_BUFFER:
1951 log_info("Starting coordinate: %f\b", x);
1952 break;
1953 case CL_MEM_OBJECT_IMAGE2D:
1954 log_info("Starting coordinate: %f, %f\n", x, y);
1955 break;
1956 case CL_MEM_OBJECT_IMAGE3D:
1957 default: log_info("Starting coordinate: %f, %f, %f\n", x, y, z);
1958 }
1959 }
1960
1961 // At this point, we have unnormalized coordinates.
1962
1963 if (imageSampler->filter_mode == CL_FILTER_NEAREST)
1964 {
1965 int ix, iy, iz;
1966
1967 // We apply the addressing function to the now-unnormalized
1968 // coordinates. Note that the array cases again require special
1969 // care, per section 8.4 in the OpenCL 1.2 Specification.
1970
1971 ix = adFn(floorf(x), width_lod);
1972
1973 switch (imageInfo->type)
1974 {
1975 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
1976 iy =
1977 calculate_array_index(y, (float)(imageInfo->arraySize - 1));
1978 iz = 0;
1979 if (verbose)
1980 {
1981 log_info("\tArray index %f evaluates to %d\n", y, iy);
1982 }
1983 break;
1984 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
1985 iy = adFn(floorf(y), height_lod);
1986 iz =
1987 calculate_array_index(z, (float)(imageInfo->arraySize - 1));
1988 if (verbose)
1989 {
1990 log_info("\tArray index %f evaluates to %d\n", z, iz);
1991 }
1992 break;
1993 default:
1994 iy = adFn(floorf(y), height_lod);
1995 if (depth_lod != 0)
1996 iz = adFn(floorf(z), depth_lod);
1997 else
1998 iz = 0;
1999 }
2000
2001 if (verbose)
2002 {
2003 if (iz)
2004 log_info(
2005 "\tReference integer coords calculated: { %d, %d, %d }\n",
2006 ix, iy, iz);
2007 else
2008 log_info("\tReference integer coords calculated: { %d, %d }\n",
2009 ix, iy);
2010 }
2011
2012 read_image_pixel_float(imageData, imageInfo, ix, iy, iz, outData, lod);
2013 check_for_denorms(outData, containsDenorms);
2014 for (int i = 0; i < 4; i++) returnVal.p[i] = fabsf(outData[i]);
2015 return returnVal;
2016 }
2017 else
2018 {
2019 // Linear filtering cases.
2020
2021 size_t width = width_lod, height = height_lod, depth = depth_lod;
2022
2023 // Image arrays can use 2D filtering, but require us to walk into the
2024 // image a certain number of slices before reading.
2025
2026 if (depth == 0 || imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY
2027 || imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
2028 {
2029 float array_index = 0;
2030
2031 size_t layer_offset = 0;
2032
2033 if (imageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
2034 {
2035 array_index =
2036 calculate_array_index(z, (float)(imageInfo->arraySize - 1));
2037 layer_offset = slice_pitch_lod * (size_t)array_index;
2038 }
2039 else if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
2040 {
2041 array_index =
2042 calculate_array_index(y, (float)(imageInfo->arraySize - 1));
2043 layer_offset = slice_pitch_lod * (size_t)array_index;
2044
2045 // Set up y and height so that the filtering below is correct
2046 // 1D filtering on a single slice.
2047 height = 1;
2048 }
2049
2050 int x1 = adFn(floorf(x - 0.5f), width);
2051 int y1 = 0;
2052 int x2 = adFn(floorf(x - 0.5f) + 1, width);
2053 int y2 = 0;
2054 if ((imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
2055 && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
2056 && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_BUFFER))
2057 {
2058 y1 = adFn(floorf(y - 0.5f), height);
2059 y2 = adFn(floorf(y - 0.5f) + 1, height);
2060 }
2061 else
2062 {
2063 y = 0.5f;
2064 }
2065
2066 if (verbose)
2067 {
2068 log_info("\tActual integer coords used (i = floor(x-.5)): i0:{ "
2069 "%d, %d } and i1:{ %d, %d }\n",
2070 x1, y1, x2, y2);
2071 log_info("\tArray coordinate is %f\n", array_index);
2072 }
2073
2074 // Walk to beginning of the 'correct' slice, if needed.
2075 char *imgPtr = ((char *)imageData) + layer_offset;
2076
2077 float upLeft[4], upRight[4], lowLeft[4], lowRight[4];
2078 float maxUp[4], maxLow[4];
2079 read_image_pixel_float(imgPtr, imageInfo, x1, y1, 0, upLeft, lod);
2080 read_image_pixel_float(imgPtr, imageInfo, x2, y1, 0, upRight, lod);
2081 check_for_denorms(upLeft, containsDenorms);
2082 check_for_denorms(upRight, containsDenorms);
2083 pixelMax(upLeft, upRight, maxUp);
2084 read_image_pixel_float(imgPtr, imageInfo, x1, y2, 0, lowLeft, lod);
2085 read_image_pixel_float(imgPtr, imageInfo, x2, y2, 0, lowRight, lod);
2086 check_for_denorms(lowLeft, containsDenorms);
2087 check_for_denorms(lowRight, containsDenorms);
2088 pixelMax(lowLeft, lowRight, maxLow);
2089 pixelMax(maxUp, maxLow, returnVal.p);
2090
2091 if (verbose)
2092 {
2093 if (NULL == containsDenorms)
2094 log_info("\tSampled pixels (rgba order, denorms flushed to "
2095 "zero):\n");
2096 else
2097 log_info("\tSampled pixels (rgba order):\n");
2098 log_info("\t\tp00: %f, %f, %f, %f\n", upLeft[0], upLeft[1],
2099 upLeft[2], upLeft[3]);
2100 log_info("\t\tp01: %f, %f, %f, %f\n", upRight[0], upRight[1],
2101 upRight[2], upRight[3]);
2102 log_info("\t\tp10: %f, %f, %f, %f\n", lowLeft[0], lowLeft[1],
2103 lowLeft[2], lowLeft[3]);
2104 log_info("\t\tp11: %f, %f, %f, %f\n", lowRight[0], lowRight[1],
2105 lowRight[2], lowRight[3]);
2106 }
2107
2108 bool printMe = false;
2109 if (x1 <= 0 || x2 <= 0 || x1 >= (int)width - 1
2110 || x2 >= (int)width - 1)
2111 printMe = true;
2112 if (y1 <= 0 || y2 <= 0 || y1 >= (int)height - 1
2113 || y2 >= (int)height - 1)
2114 printMe = true;
2115
2116 double weights[2][2];
2117
2118 weights[0][0] = weights[0][1] = 1.0 - frac(x - 0.5f);
2119 weights[1][0] = weights[1][1] = frac(x - 0.5f);
2120 weights[0][0] *= 1.0 - frac(y - 0.5f);
2121 weights[1][0] *= 1.0 - frac(y - 0.5f);
2122 weights[0][1] *= frac(y - 0.5f);
2123 weights[1][1] *= frac(y - 0.5f);
2124
2125 if (verbose)
2126 log_info("\tfrac( x - 0.5f ) = %f, frac( y - 0.5f ) = %f\n",
2127 frac(x - 0.5f), frac(y - 0.5f));
2128
2129 for (int i = 0; i < 3; i++)
2130 {
2131 outData[i] = (float)((upLeft[i] * weights[0][0])
2132 + (upRight[i] * weights[1][0])
2133 + (lowLeft[i] * weights[0][1])
2134 + (lowRight[i] * weights[1][1]));
2135 // flush subnormal results to zero if necessary
2136 if (NULL == containsDenorms && fabs(outData[i]) < FLT_MIN)
2137 outData[i] = copysignf(0.0f, outData[i]);
2138 }
2139 outData[3] = (float)((upLeft[3] * weights[0][0])
2140 + (upRight[3] * weights[1][0])
2141 + (lowLeft[3] * weights[0][1])
2142 + (lowRight[3] * weights[1][1]));
2143 // flush subnormal results to zero if necessary
2144 if (NULL == containsDenorms && fabs(outData[3]) < FLT_MIN)
2145 outData[3] = copysignf(0.0f, outData[3]);
2146 }
2147 else
2148 {
2149 // 3D linear filtering
2150 int x1 = adFn(floorf(x - 0.5f), width_lod);
2151 int y1 = adFn(floorf(y - 0.5f), height_lod);
2152 int z1 = adFn(floorf(z - 0.5f), depth_lod);
2153 int x2 = adFn(floorf(x - 0.5f) + 1, width_lod);
2154 int y2 = adFn(floorf(y - 0.5f) + 1, height_lod);
2155 int z2 = adFn(floorf(z - 0.5f) + 1, depth_lod);
2156
2157 if (verbose)
2158 log_info("\tActual integer coords used (i = floor(x-.5)): "
2159 "i0:{%d, %d, %d} and i1:{%d, %d, %d}\n",
2160 x1, y1, z1, x2, y2, z2);
2161
2162 float upLeftA[4], upRightA[4], lowLeftA[4], lowRightA[4];
2163 float upLeftB[4], upRightB[4], lowLeftB[4], lowRightB[4];
2164 float pixelMaxA[4], pixelMaxB[4];
2165 read_image_pixel_float(imageData, imageInfo, x1, y1, z1, upLeftA,
2166 lod);
2167 read_image_pixel_float(imageData, imageInfo, x2, y1, z1, upRightA,
2168 lod);
2169 check_for_denorms(upLeftA, containsDenorms);
2170 check_for_denorms(upRightA, containsDenorms);
2171 pixelMax(upLeftA, upRightA, pixelMaxA);
2172 read_image_pixel_float(imageData, imageInfo, x1, y2, z1, lowLeftA,
2173 lod);
2174 read_image_pixel_float(imageData, imageInfo, x2, y2, z1, lowRightA,
2175 lod);
2176 check_for_denorms(lowLeftA, containsDenorms);
2177 check_for_denorms(lowRightA, containsDenorms);
2178 pixelMax(lowLeftA, lowRightA, pixelMaxB);
2179 pixelMax(pixelMaxA, pixelMaxB, returnVal.p);
2180 read_image_pixel_float(imageData, imageInfo, x1, y1, z2, upLeftB,
2181 lod);
2182 read_image_pixel_float(imageData, imageInfo, x2, y1, z2, upRightB,
2183 lod);
2184 check_for_denorms(upLeftB, containsDenorms);
2185 check_for_denorms(upRightB, containsDenorms);
2186 pixelMax(upLeftB, upRightB, pixelMaxA);
2187 read_image_pixel_float(imageData, imageInfo, x1, y2, z2, lowLeftB,
2188 lod);
2189 read_image_pixel_float(imageData, imageInfo, x2, y2, z2, lowRightB,
2190 lod);
2191 check_for_denorms(lowLeftB, containsDenorms);
2192 check_for_denorms(lowRightB, containsDenorms);
2193 pixelMax(lowLeftB, lowRightB, pixelMaxB);
2194 pixelMax(pixelMaxA, pixelMaxB, pixelMaxA);
2195 pixelMax(pixelMaxA, returnVal.p, returnVal.p);
2196
2197 if (verbose)
2198 {
2199 if (NULL == containsDenorms)
2200 log_info("\tSampled pixels (rgba order, denorms flushed to "
2201 "zero):\n");
2202 else
2203 log_info("\tSampled pixels (rgba order):\n");
2204 log_info("\t\tp000: %f, %f, %f, %f\n", upLeftA[0], upLeftA[1],
2205 upLeftA[2], upLeftA[3]);
2206 log_info("\t\tp001: %f, %f, %f, %f\n", upRightA[0], upRightA[1],
2207 upRightA[2], upRightA[3]);
2208 log_info("\t\tp010: %f, %f, %f, %f\n", lowLeftA[0], lowLeftA[1],
2209 lowLeftA[2], lowLeftA[3]);
2210 log_info("\t\tp011: %f, %f, %f, %f\n\n", lowRightA[0],
2211 lowRightA[1], lowRightA[2], lowRightA[3]);
2212 log_info("\t\tp100: %f, %f, %f, %f\n", upLeftB[0], upLeftB[1],
2213 upLeftB[2], upLeftB[3]);
2214 log_info("\t\tp101: %f, %f, %f, %f\n", upRightB[0], upRightB[1],
2215 upRightB[2], upRightB[3]);
2216 log_info("\t\tp110: %f, %f, %f, %f\n", lowLeftB[0], lowLeftB[1],
2217 lowLeftB[2], lowLeftB[3]);
2218 log_info("\t\tp111: %f, %f, %f, %f\n", lowRightB[0],
2219 lowRightB[1], lowRightB[2], lowRightB[3]);
2220 }
2221
2222 double weights[2][2][2];
2223
2224 float a = frac(x - 0.5f), b = frac(y - 0.5f), c = frac(z - 0.5f);
2225 weights[0][0][0] = weights[0][1][0] = weights[0][0][1] =
2226 weights[0][1][1] = 1.f - a;
2227 weights[1][0][0] = weights[1][1][0] = weights[1][0][1] =
2228 weights[1][1][1] = a;
2229 weights[0][0][0] *= 1.f - b;
2230 weights[1][0][0] *= 1.f - b;
2231 weights[0][0][1] *= 1.f - b;
2232 weights[1][0][1] *= 1.f - b;
2233 weights[0][1][0] *= b;
2234 weights[1][1][0] *= b;
2235 weights[0][1][1] *= b;
2236 weights[1][1][1] *= b;
2237 weights[0][0][0] *= 1.f - c;
2238 weights[0][1][0] *= 1.f - c;
2239 weights[1][0][0] *= 1.f - c;
2240 weights[1][1][0] *= 1.f - c;
2241 weights[0][0][1] *= c;
2242 weights[0][1][1] *= c;
2243 weights[1][0][1] *= c;
2244 weights[1][1][1] *= c;
2245
2246 if (verbose)
2247 log_info("\tfrac( x - 0.5f ) = %f, frac( y - 0.5f ) = %f, "
2248 "frac( z - 0.5f ) = %f\n",
2249 frac(x - 0.5f), frac(y - 0.5f), frac(z - 0.5f));
2250
2251 for (int i = 0; i < 3; i++)
2252 {
2253 outData[i] = (float)((upLeftA[i] * weights[0][0][0])
2254 + (upRightA[i] * weights[1][0][0])
2255 + (lowLeftA[i] * weights[0][1][0])
2256 + (lowRightA[i] * weights[1][1][0])
2257 + (upLeftB[i] * weights[0][0][1])
2258 + (upRightB[i] * weights[1][0][1])
2259 + (lowLeftB[i] * weights[0][1][1])
2260 + (lowRightB[i] * weights[1][1][1]));
2261 // flush subnormal results to zero if necessary
2262 if (NULL == containsDenorms && fabs(outData[i]) < FLT_MIN)
2263 outData[i] = copysignf(0.0f, outData[i]);
2264 }
2265 outData[3] = (float)((upLeftA[3] * weights[0][0][0])
2266 + (upRightA[3] * weights[1][0][0])
2267 + (lowLeftA[3] * weights[0][1][0])
2268 + (lowRightA[3] * weights[1][1][0])
2269 + (upLeftB[3] * weights[0][0][1])
2270 + (upRightB[3] * weights[1][0][1])
2271 + (lowLeftB[3] * weights[0][1][1])
2272 + (lowRightB[3] * weights[1][1][1]));
2273 // flush subnormal results to zero if necessary
2274 if (NULL == containsDenorms && fabs(outData[3]) < FLT_MIN)
2275 outData[3] = copysignf(0.0f, outData[3]);
2276 }
2277
2278 return returnVal;
2279 }
2280 }
2281
sample_image_pixel_float_offset(void * imageData,image_descriptor * imageInfo,float x,float y,float z,float xAddressOffset,float yAddressOffset,float zAddressOffset,image_sampler_data * imageSampler,float * outData,int verbose,int * containsDenorms)2282 FloatPixel sample_image_pixel_float_offset(
2283 void *imageData, image_descriptor *imageInfo, float x, float y, float z,
2284 float xAddressOffset, float yAddressOffset, float zAddressOffset,
2285 image_sampler_data *imageSampler, float *outData, int verbose,
2286 int *containsDenorms)
2287 {
2288 return sample_image_pixel_float_offset(
2289 imageData, imageInfo, x, y, z, xAddressOffset, yAddressOffset,
2290 zAddressOffset, imageSampler, outData, verbose, containsDenorms, 0);
2291 }
2292
2293
debug_find_vector_in_image(void * imagePtr,image_descriptor * imageInfo,void * vectorToFind,size_t vectorSize,int * outX,int * outY,int * outZ,size_t lod)2294 int debug_find_vector_in_image(void *imagePtr, image_descriptor *imageInfo,
2295 void *vectorToFind, size_t vectorSize, int *outX,
2296 int *outY, int *outZ, size_t lod)
2297 {
2298 int foundCount = 0;
2299 char *iPtr = (char *)imagePtr;
2300 size_t width;
2301 size_t depth;
2302 size_t height;
2303 size_t row_pitch;
2304 size_t slice_pitch;
2305
2306 switch (imageInfo->type)
2307 {
2308 case CL_MEM_OBJECT_IMAGE1D:
2309 width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2310 height = 1;
2311 depth = 1;
2312 break;
2313 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
2314 width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2315 height = 1;
2316 depth = imageInfo->arraySize;
2317 break;
2318 case CL_MEM_OBJECT_IMAGE2D:
2319 width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2320 height =
2321 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
2322 depth = 1;
2323 break;
2324 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
2325 width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2326 height =
2327 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
2328 depth = imageInfo->arraySize;
2329 break;
2330 case CL_MEM_OBJECT_IMAGE3D:
2331 width = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
2332 height =
2333 (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
2334 depth = (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
2335 break;
2336 }
2337
2338 row_pitch = width * get_pixel_size(imageInfo->format);
2339 slice_pitch = row_pitch * height;
2340
2341 for (size_t z = 0; z < depth; z++)
2342 {
2343 for (size_t y = 0; y < height; y++)
2344 {
2345 for (size_t x = 0; x < width; x++)
2346 {
2347 if (memcmp(iPtr, vectorToFind, vectorSize) == 0)
2348 {
2349 if (foundCount == 0)
2350 {
2351 *outX = (int)x;
2352 if (outY != NULL) *outY = (int)y;
2353 if (outZ != NULL) *outZ = (int)z;
2354 }
2355 foundCount++;
2356 }
2357 iPtr += vectorSize;
2358 }
2359 iPtr += row_pitch - (width * vectorSize);
2360 }
2361 iPtr += slice_pitch - (height * row_pitch);
2362 }
2363 return foundCount;
2364 }
2365
debug_find_pixel_in_image(void * imagePtr,image_descriptor * imageInfo,unsigned int * valuesToFind,int * outX,int * outY,int * outZ,int lod)2366 int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
2367 unsigned int *valuesToFind, int *outX, int *outY,
2368 int *outZ, int lod)
2369 {
2370 char vectorToFind[4 * 4];
2371 size_t vectorSize = get_format_channel_count(imageInfo->format);
2372
2373
2374 if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT8)
2375 {
2376 unsigned char *p = (unsigned char *)vectorToFind;
2377 for (unsigned int i = 0; i < vectorSize; i++)
2378 p[i] = (unsigned char)valuesToFind[i];
2379 }
2380 else if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT16)
2381 {
2382 unsigned short *p = (unsigned short *)vectorToFind;
2383 for (unsigned int i = 0; i < vectorSize; i++)
2384 p[i] = (unsigned short)valuesToFind[i];
2385 vectorSize *= 2;
2386 }
2387 else if (imageInfo->format->image_channel_data_type == CL_UNSIGNED_INT32)
2388 {
2389 unsigned int *p = (unsigned int *)vectorToFind;
2390 for (unsigned int i = 0; i < vectorSize; i++)
2391 p[i] = (unsigned int)valuesToFind[i];
2392 vectorSize *= 4;
2393 }
2394 else
2395 {
2396 log_info("WARNING: Unable to search for debug pixel: invalid image "
2397 "format\n");
2398 return false;
2399 }
2400 return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
2401 vectorSize, outX, outY, outZ, lod);
2402 }
2403
debug_find_pixel_in_image(void * imagePtr,image_descriptor * imageInfo,int * valuesToFind,int * outX,int * outY,int * outZ,int lod)2404 int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
2405 int *valuesToFind, int *outX, int *outY,
2406 int *outZ, int lod)
2407 {
2408 char vectorToFind[4 * 4];
2409 size_t vectorSize = get_format_channel_count(imageInfo->format);
2410
2411 if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT8)
2412 {
2413 char *p = (char *)vectorToFind;
2414 for (unsigned int i = 0; i < vectorSize; i++)
2415 p[i] = (char)valuesToFind[i];
2416 }
2417 else if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT16)
2418 {
2419 short *p = (short *)vectorToFind;
2420 for (unsigned int i = 0; i < vectorSize; i++)
2421 p[i] = (short)valuesToFind[i];
2422 vectorSize *= 2;
2423 }
2424 else if (imageInfo->format->image_channel_data_type == CL_SIGNED_INT32)
2425 {
2426 int *p = (int *)vectorToFind;
2427 for (unsigned int i = 0; i < vectorSize; i++)
2428 p[i] = (int)valuesToFind[i];
2429 vectorSize *= 4;
2430 }
2431 else
2432 {
2433 log_info("WARNING: Unable to search for debug pixel: invalid image "
2434 "format\n");
2435 return false;
2436 }
2437 return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
2438 vectorSize, outX, outY, outZ, lod);
2439 }
2440
debug_find_pixel_in_image(void * imagePtr,image_descriptor * imageInfo,float * valuesToFind,int * outX,int * outY,int * outZ,int lod)2441 int debug_find_pixel_in_image(void *imagePtr, image_descriptor *imageInfo,
2442 float *valuesToFind, int *outX, int *outY,
2443 int *outZ, int lod)
2444 {
2445 char vectorToFind[4 * 4];
2446 float swizzled[4];
2447 memcpy(swizzled, valuesToFind, sizeof(swizzled));
2448 size_t vectorSize = get_pixel_size(imageInfo->format);
2449 pack_image_pixel(swizzled, imageInfo->format, vectorToFind);
2450 return debug_find_vector_in_image(imagePtr, imageInfo, vectorToFind,
2451 vectorSize, outX, outY, outZ, lod);
2452 }
2453
2454 template <class T>
swizzle_vector_for_image(T * srcVector,const cl_image_format * imageFormat)2455 void swizzle_vector_for_image(T *srcVector, const cl_image_format *imageFormat)
2456 {
2457 T temp;
2458 switch (imageFormat->image_channel_order)
2459 {
2460 case CL_A: srcVector[0] = srcVector[3]; break;
2461 case CL_R:
2462 case CL_Rx:
2463 case CL_RG:
2464 case CL_RGx:
2465 case CL_RGB:
2466 case CL_RGBx:
2467 case CL_RGBA:
2468 case CL_sRGB:
2469 case CL_sRGBx:
2470 case CL_sRGBA: break;
2471 case CL_RA: srcVector[1] = srcVector[3]; break;
2472 case CL_ARGB:
2473 temp = srcVector[3];
2474 srcVector[3] = srcVector[2];
2475 srcVector[2] = srcVector[1];
2476 srcVector[1] = srcVector[0];
2477 srcVector[0] = temp;
2478 break;
2479 case CL_ABGR:
2480 temp = srcVector[3];
2481 srcVector[3] = srcVector[0];
2482 srcVector[0] = temp;
2483 temp = srcVector[2];
2484 srcVector[2] = srcVector[1];
2485 srcVector[1] = temp;
2486 break;
2487 case CL_BGRA:
2488 case CL_sBGRA:
2489 temp = srcVector[0];
2490 srcVector[0] = srcVector[2];
2491 srcVector[2] = temp;
2492 break;
2493 case CL_INTENSITY:
2494 srcVector[3] = srcVector[0];
2495 srcVector[2] = srcVector[0];
2496 srcVector[1] = srcVector[0];
2497 break;
2498 case CL_LUMINANCE:
2499 srcVector[2] = srcVector[0];
2500 srcVector[1] = srcVector[0];
2501 break;
2502 #ifdef CL_1RGB_APPLE
2503 case CL_1RGB_APPLE:
2504 temp = srcVector[3];
2505 srcVector[3] = srcVector[2];
2506 srcVector[2] = srcVector[1];
2507 srcVector[1] = srcVector[0];
2508 srcVector[0] = temp;
2509 break;
2510 #endif
2511 #ifdef CL_BGR1_APPLE
2512 case CL_BGR1_APPLE:
2513 temp = srcVector[0];
2514 srcVector[0] = srcVector[2];
2515 srcVector[2] = temp;
2516 break;
2517 #endif
2518 }
2519 }
2520
2521 #define SATURATE(v, min, max) (v < min ? min : (v > max ? max : v))
2522
pack_image_pixel(unsigned int * srcVector,const cl_image_format * imageFormat,void * outData)2523 void pack_image_pixel(unsigned int *srcVector,
2524 const cl_image_format *imageFormat, void *outData)
2525 {
2526 swizzle_vector_for_image<unsigned int>(srcVector, imageFormat);
2527 size_t channelCount = get_format_channel_count(imageFormat);
2528
2529 switch (imageFormat->image_channel_data_type)
2530 {
2531 case CL_UNSIGNED_INT8: {
2532 unsigned char *ptr = (unsigned char *)outData;
2533 for (unsigned int i = 0; i < channelCount; i++)
2534 ptr[i] = (unsigned char)SATURATE(srcVector[i], 0, 255);
2535 break;
2536 }
2537 case CL_UNSIGNED_INT16: {
2538 unsigned short *ptr = (unsigned short *)outData;
2539 for (unsigned int i = 0; i < channelCount; i++)
2540 ptr[i] = (unsigned short)SATURATE(srcVector[i], 0, 65535);
2541 break;
2542 }
2543 case CL_UNSIGNED_INT32: {
2544 unsigned int *ptr = (unsigned int *)outData;
2545 for (unsigned int i = 0; i < channelCount; i++)
2546 ptr[i] = (unsigned int)srcVector[i];
2547 break;
2548 }
2549 default: break;
2550 }
2551 }
2552
pack_image_pixel(int * srcVector,const cl_image_format * imageFormat,void * outData)2553 void pack_image_pixel(int *srcVector, const cl_image_format *imageFormat,
2554 void *outData)
2555 {
2556 swizzle_vector_for_image<int>(srcVector, imageFormat);
2557 size_t chanelCount = get_format_channel_count(imageFormat);
2558
2559 switch (imageFormat->image_channel_data_type)
2560 {
2561 case CL_SIGNED_INT8: {
2562 char *ptr = (char *)outData;
2563 for (unsigned int i = 0; i < chanelCount; i++)
2564 ptr[i] = (char)SATURATE(srcVector[i], -128, 127);
2565 break;
2566 }
2567 case CL_SIGNED_INT16: {
2568 short *ptr = (short *)outData;
2569 for (unsigned int i = 0; i < chanelCount; i++)
2570 ptr[i] = (short)SATURATE(srcVector[i], -32768, 32767);
2571 break;
2572 }
2573 case CL_SIGNED_INT32: {
2574 int *ptr = (int *)outData;
2575 for (unsigned int i = 0; i < chanelCount; i++)
2576 ptr[i] = (int)srcVector[i];
2577 break;
2578 }
2579 default: break;
2580 }
2581 }
2582
round_to_even(float v)2583 int round_to_even(float v)
2584 {
2585 // clamp overflow
2586 if (v >= -(float)INT_MIN) return INT_MAX;
2587 if (v <= (float)INT_MIN) return INT_MIN;
2588
2589 // round fractional values to integer value
2590 if (fabsf(v) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23))
2591 {
2592 static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23),
2593 MAKE_HEX_FLOAT(-0x1.0p23f, -0x1L, 23) };
2594 float magicVal = magic[v < 0.0f];
2595 v += magicVal;
2596 v -= magicVal;
2597 }
2598
2599 return (int)v;
2600 }
2601
pack_image_pixel(float * srcVector,const cl_image_format * imageFormat,void * outData)2602 void pack_image_pixel(float *srcVector, const cl_image_format *imageFormat,
2603 void *outData)
2604 {
2605 swizzle_vector_for_image<float>(srcVector, imageFormat);
2606 size_t channelCount = get_format_channel_count(imageFormat);
2607 switch (imageFormat->image_channel_data_type)
2608 {
2609 case CL_HALF_FLOAT: {
2610 cl_half *ptr = (cl_half *)outData;
2611
2612 switch (gFloatToHalfRoundingMode)
2613 {
2614 case kRoundToNearestEven:
2615 for (unsigned int i = 0; i < channelCount; i++)
2616 ptr[i] = cl_half_from_float(srcVector[i], CL_HALF_RTE);
2617 break;
2618 case kRoundTowardZero:
2619 for (unsigned int i = 0; i < channelCount; i++)
2620 ptr[i] = cl_half_from_float(srcVector[i], CL_HALF_RTZ);
2621 break;
2622 default:
2623 log_error("ERROR: Test internal error -- unhandled or "
2624 "unknown float->half rounding mode.\n");
2625 exit(-1);
2626 break;
2627 }
2628 break;
2629 }
2630
2631 case CL_FLOAT: {
2632 cl_float *ptr = (cl_float *)outData;
2633 for (unsigned int i = 0; i < channelCount; i++)
2634 ptr[i] = srcVector[i];
2635 break;
2636 }
2637
2638 case CL_SNORM_INT8: {
2639 cl_char *ptr = (cl_char *)outData;
2640 for (unsigned int i = 0; i < channelCount; i++)
2641 ptr[i] =
2642 (cl_char)NORMALIZE_SIGNED(srcVector[i], -127.0f, 127.f);
2643 break;
2644 }
2645 case CL_SNORM_INT16: {
2646 cl_short *ptr = (cl_short *)outData;
2647 for (unsigned int i = 0; i < channelCount; i++)
2648 ptr[i] =
2649 (short)NORMALIZE_SIGNED(srcVector[i], -32767.f, 32767.f);
2650 break;
2651 }
2652 case CL_UNORM_INT8: {
2653 cl_uchar *ptr = (cl_uchar *)outData;
2654 if (is_sRGBA_order(imageFormat->image_channel_order))
2655 {
2656 ptr[0] = (unsigned char)(sRGBmap(srcVector[0]) + 0.5);
2657 ptr[1] = (unsigned char)(sRGBmap(srcVector[1]) + 0.5);
2658 ptr[2] = (unsigned char)(sRGBmap(srcVector[2]) + 0.5);
2659 if (channelCount == 4)
2660 ptr[3] = (unsigned char)NORMALIZE(srcVector[3], 255.f);
2661 }
2662 else
2663 {
2664 for (unsigned int i = 0; i < channelCount; i++)
2665 ptr[i] = (unsigned char)NORMALIZE(srcVector[i], 255.f);
2666 }
2667 #ifdef CL_1RGB_APPLE
2668 if (imageFormat->image_channel_order == CL_1RGB_APPLE)
2669 ptr[0] = 255.0f;
2670 #endif
2671 #ifdef CL_BGR1_APPLE
2672 if (imageFormat->image_channel_order == CL_BGR1_APPLE)
2673 ptr[3] = 255.0f;
2674 #endif
2675 break;
2676 }
2677 case CL_UNORM_INT16: {
2678 cl_ushort *ptr = (cl_ushort *)outData;
2679 for (unsigned int i = 0; i < channelCount; i++)
2680 ptr[i] = (unsigned short)NORMALIZE(srcVector[i], 65535.f);
2681 break;
2682 }
2683 case CL_UNORM_SHORT_555: {
2684 cl_ushort *ptr = (cl_ushort *)outData;
2685 ptr[0] =
2686 (((unsigned short)NORMALIZE(srcVector[0], 31.f) & 31) << 10)
2687 | (((unsigned short)NORMALIZE(srcVector[1], 31.f) & 31) << 5)
2688 | (((unsigned short)NORMALIZE(srcVector[2], 31.f) & 31) << 0);
2689 break;
2690 }
2691 case CL_UNORM_SHORT_565: {
2692 cl_ushort *ptr = (cl_ushort *)outData;
2693 ptr[0] =
2694 (((unsigned short)NORMALIZE(srcVector[0], 31.f) & 31) << 11)
2695 | (((unsigned short)NORMALIZE(srcVector[1], 63.f) & 63) << 5)
2696 | (((unsigned short)NORMALIZE(srcVector[2], 31.f) & 31) << 0);
2697 break;
2698 }
2699 case CL_UNORM_INT_101010: {
2700 cl_uint *ptr = (cl_uint *)outData;
2701 ptr[0] =
2702 (((unsigned int)NORMALIZE(srcVector[0], 1023.f) & 1023) << 20)
2703 | (((unsigned int)NORMALIZE(srcVector[1], 1023.f) & 1023) << 10)
2704 | (((unsigned int)NORMALIZE(srcVector[2], 1023.f) & 1023) << 0);
2705 break;
2706 }
2707 case CL_SIGNED_INT8: {
2708 cl_char *ptr = (cl_char *)outData;
2709 for (unsigned int i = 0; i < channelCount; i++)
2710 ptr[i] =
2711 (cl_char)CONVERT_INT(srcVector[i], -127.0f, 127.f, 127);
2712 break;
2713 }
2714 case CL_SIGNED_INT16: {
2715 cl_short *ptr = (cl_short *)outData;
2716 for (unsigned int i = 0; i < channelCount; i++)
2717 ptr[i] =
2718 (short)CONVERT_INT(srcVector[i], -32767.f, 32767.f, 32767);
2719 break;
2720 }
2721 case CL_SIGNED_INT32: {
2722 cl_int *ptr = (cl_int *)outData;
2723 for (unsigned int i = 0; i < channelCount; i++)
2724 ptr[i] = (int)CONVERT_INT(
2725 srcVector[i], MAKE_HEX_FLOAT(-0x1.0p31f, -1, 31),
2726 MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffe, 30 - 23),
2727 CL_INT_MAX);
2728 break;
2729 }
2730 case CL_UNSIGNED_INT8: {
2731 cl_uchar *ptr = (cl_uchar *)outData;
2732 for (unsigned int i = 0; i < channelCount; i++)
2733 ptr[i] =
2734 (cl_uchar)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX);
2735 break;
2736 }
2737 case CL_UNSIGNED_INT16: {
2738 cl_ushort *ptr = (cl_ushort *)outData;
2739 for (unsigned int i = 0; i < channelCount; i++)
2740 ptr[i] = (cl_ushort)CONVERT_UINT(srcVector[i], 32767.f,
2741 CL_USHRT_MAX);
2742 break;
2743 }
2744 case CL_UNSIGNED_INT32: {
2745 cl_uint *ptr = (cl_uint *)outData;
2746 for (unsigned int i = 0; i < channelCount; i++)
2747 ptr[i] = (cl_uint)CONVERT_UINT(
2748 srcVector[i],
2749 MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffe, 31 - 23),
2750 CL_UINT_MAX);
2751 break;
2752 }
2753 #ifdef CL_SFIXED14_APPLE
2754 case CL_SFIXED14_APPLE: {
2755 cl_ushort *ptr = (cl_ushort *)outData;
2756 for (unsigned int i = 0; i < channelCount; i++)
2757 {
2758 cl_float f = fmaxf(srcVector[i], -1.0f);
2759 f = fminf(f, 3.0f);
2760 cl_int d = rintf(f * 0x1.0p14f);
2761 d += 16384;
2762 if (d > CL_USHRT_MAX) d = CL_USHRT_MAX;
2763 ptr[i] = d;
2764 }
2765 break;
2766 }
2767 #endif
2768 default:
2769 log_error("INTERNAL ERROR: unknown format (%d)\n",
2770 imageFormat->image_channel_data_type);
2771 exit(-1);
2772 break;
2773 }
2774 }
2775
pack_image_pixel_error(const float * srcVector,const cl_image_format * imageFormat,const void * results,float * errors)2776 void pack_image_pixel_error(const float *srcVector,
2777 const cl_image_format *imageFormat,
2778 const void *results, float *errors)
2779 {
2780 size_t channelCount = get_format_channel_count(imageFormat);
2781 switch (imageFormat->image_channel_data_type)
2782 {
2783 case CL_HALF_FLOAT: {
2784 const cl_half *ptr = (const cl_half *)results;
2785
2786 for (unsigned int i = 0; i < channelCount; i++)
2787 errors[i] = Ulp_Error_Half(ptr[i], srcVector[i]);
2788
2789 break;
2790 }
2791
2792 case CL_FLOAT: {
2793 const cl_ushort *ptr = (const cl_ushort *)results;
2794
2795 for (unsigned int i = 0; i < channelCount; i++)
2796 errors[i] = Ulp_Error(ptr[i], srcVector[i]);
2797
2798 break;
2799 }
2800
2801 case CL_SNORM_INT8: {
2802 const cl_char *ptr = (const cl_char *)results;
2803
2804 for (unsigned int i = 0; i < channelCount; i++)
2805 errors[i] = ptr[i]
2806 - NORMALIZE_SIGNED_UNROUNDED(srcVector[i], -127.0f, 127.f);
2807
2808 break;
2809 }
2810 case CL_SNORM_INT16: {
2811 const cl_short *ptr = (const cl_short *)results;
2812
2813 for (unsigned int i = 0; i < channelCount; i++)
2814 errors[i] = ptr[i]
2815 - NORMALIZE_SIGNED_UNROUNDED(srcVector[i], -32767.f,
2816 32767.f);
2817
2818 break;
2819 }
2820 case CL_UNORM_INT8: {
2821 const cl_uchar *ptr = (const cl_uchar *)results;
2822
2823 for (unsigned int i = 0; i < channelCount; i++)
2824 errors[i] = ptr[i] - NORMALIZE_UNROUNDED(srcVector[i], 255.f);
2825
2826 break;
2827 }
2828 case CL_UNORM_INT16: {
2829 const cl_ushort *ptr = (const cl_ushort *)results;
2830
2831 for (unsigned int i = 0; i < channelCount; i++)
2832 errors[i] = ptr[i] - NORMALIZE_UNROUNDED(srcVector[i], 65535.f);
2833
2834 break;
2835 }
2836 case CL_UNORM_SHORT_555: {
2837 const cl_ushort *ptr = (const cl_ushort *)results;
2838
2839 errors[0] =
2840 ((ptr[0] >> 10) & 31) - NORMALIZE_UNROUNDED(srcVector[0], 31.f);
2841 errors[1] =
2842 ((ptr[0] >> 5) & 31) - NORMALIZE_UNROUNDED(srcVector[1], 31.f);
2843 errors[2] =
2844 ((ptr[0] >> 0) & 31) - NORMALIZE_UNROUNDED(srcVector[2], 31.f);
2845
2846 break;
2847 }
2848 case CL_UNORM_SHORT_565: {
2849 const cl_ushort *ptr = (const cl_ushort *)results;
2850
2851 errors[0] =
2852 ((ptr[0] >> 11) & 31) - NORMALIZE_UNROUNDED(srcVector[0], 31.f);
2853 errors[1] =
2854 ((ptr[0] >> 5) & 63) - NORMALIZE_UNROUNDED(srcVector[1], 63.f);
2855 errors[2] =
2856 ((ptr[0] >> 0) & 31) - NORMALIZE_UNROUNDED(srcVector[2], 31.f);
2857
2858 break;
2859 }
2860 case CL_UNORM_INT_101010: {
2861 const cl_uint *ptr = (const cl_uint *)results;
2862
2863 errors[0] = ((ptr[0] >> 20) & 1023)
2864 - NORMALIZE_UNROUNDED(srcVector[0], 1023.f);
2865 errors[1] = ((ptr[0] >> 10) & 1023)
2866 - NORMALIZE_UNROUNDED(srcVector[1], 1023.f);
2867 errors[2] = ((ptr[0] >> 0) & 1023)
2868 - NORMALIZE_UNROUNDED(srcVector[2], 1023.f);
2869
2870 break;
2871 }
2872 case CL_SIGNED_INT8: {
2873 const cl_char *ptr = (const cl_char *)results;
2874
2875 for (unsigned int i = 0; i < channelCount; i++)
2876 errors[i] =
2877 ptr[i] - CONVERT_INT(srcVector[i], -127.0f, 127.f, 127);
2878
2879 break;
2880 }
2881 case CL_SIGNED_INT16: {
2882 const cl_short *ptr = (const cl_short *)results;
2883 for (unsigned int i = 0; i < channelCount; i++)
2884 errors[i] = ptr[i]
2885 - CONVERT_INT(srcVector[i], -32767.f, 32767.f, 32767);
2886 break;
2887 }
2888 case CL_SIGNED_INT32: {
2889 const cl_int *ptr = (const cl_int *)results;
2890 for (unsigned int i = 0; i < channelCount; i++)
2891 errors[i] = (cl_float)(
2892 (cl_long)ptr[i]
2893 - (cl_long)CONVERT_INT(
2894 srcVector[i], MAKE_HEX_FLOAT(-0x1.0p31f, -1, 31),
2895 MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffe, 30 - 23),
2896 CL_INT_MAX));
2897 break;
2898 }
2899 case CL_UNSIGNED_INT8: {
2900 const cl_uchar *ptr = (const cl_uchar *)results;
2901 for (unsigned int i = 0; i < channelCount; i++)
2902 errors[i] = (cl_int)ptr[i]
2903 - (cl_int)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX);
2904 break;
2905 }
2906 case CL_UNSIGNED_INT16: {
2907 const cl_ushort *ptr = (const cl_ushort *)results;
2908 for (unsigned int i = 0; i < channelCount; i++)
2909 errors[i] = (cl_int)ptr[i]
2910 - (cl_int)CONVERT_UINT(srcVector[i], 32767.f, CL_USHRT_MAX);
2911 break;
2912 }
2913 case CL_UNSIGNED_INT32: {
2914 const cl_uint *ptr = (const cl_uint *)results;
2915 for (unsigned int i = 0; i < channelCount; i++)
2916 errors[i] = (cl_float)(
2917 (cl_long)ptr[i]
2918 - (cl_long)CONVERT_UINT(
2919 srcVector[i],
2920 MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffe, 31 - 23),
2921 CL_UINT_MAX));
2922 break;
2923 }
2924 #ifdef CL_SFIXED14_APPLE
2925 case CL_SFIXED14_APPLE: {
2926 const cl_ushort *ptr = (const cl_ushort *)results;
2927
2928 for (unsigned int i = 0; i < channelCount; i++)
2929 errors[i] = ptr[i]
2930 - NORMALIZE_SIGNED_UNROUNDED(((int)srcVector[i] - 16384),
2931 -16384.f, 49151.f);
2932
2933 break;
2934 }
2935 #endif
2936 default:
2937 log_error("INTERNAL ERROR: unknown format (%d)\n",
2938 imageFormat->image_channel_data_type);
2939 exit(-1);
2940 break;
2941 }
2942 }
2943
2944
2945 //
2946 // Autodetect which rounding mode is used for image writes to CL_HALF_FLOAT
2947 // This should be called lazily before attempting to verify image writes,
2948 // otherwise an error will occur.
2949 //
DetectFloatToHalfRoundingMode(cl_command_queue q)2950 int DetectFloatToHalfRoundingMode(
2951 cl_command_queue q) // Returns CL_SUCCESS on success
2952 {
2953 cl_int err = CL_SUCCESS;
2954
2955 if (gFloatToHalfRoundingMode == kDefaultRoundingMode)
2956 {
2957 // Some numbers near 0.5f, that we look at to see how the values are
2958 // rounded.
2959 static const cl_uint inData[4 * 4] = {
2960 0x3f000fffU, 0x3f001000U, 0x3f001001U, 0U,
2961 0x3f001fffU, 0x3f002000U, 0x3f002001U, 0U,
2962 0x3f002fffU, 0x3f003000U, 0x3f003001U, 0U,
2963 0x3f003fffU, 0x3f004000U, 0x3f004001U, 0U
2964 };
2965 static const size_t count = sizeof(inData) / (4 * sizeof(inData[0]));
2966 const float *inp = (const float *)inData;
2967 cl_context context = NULL;
2968
2969 // Create an input buffer
2970 err = clGetCommandQueueInfo(q, CL_QUEUE_CONTEXT, sizeof(context),
2971 &context, NULL);
2972 if (err)
2973 {
2974 log_error("Error: could not get context from command queue in "
2975 "DetectFloatToHalfRoundingMode (%d)",
2976 err);
2977 return err;
2978 }
2979
2980 cl_mem inBuf = clCreateBuffer(context,
2981 CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR
2982 | CL_MEM_ALLOC_HOST_PTR,
2983 sizeof(inData), (void *)inData, &err);
2984 if (NULL == inBuf || err)
2985 {
2986 log_error("Error: could not create input buffer in "
2987 "DetectFloatToHalfRoundingMode (err: %d)",
2988 err);
2989 return err;
2990 }
2991
2992 // Create a small output image
2993 cl_image_format fmt = { CL_RGBA, CL_HALF_FLOAT };
2994 cl_mem outImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &fmt,
2995 count, 1, 0, NULL, &err);
2996 if (NULL == outImage || err)
2997 {
2998 log_error("Error: could not create half float out image in "
2999 "DetectFloatToHalfRoundingMode (err: %d)",
3000 err);
3001 clReleaseMemObject(inBuf);
3002 return err;
3003 }
3004
3005 // Create our program, and a kernel
3006 const char *kernelSource[1] = {
3007 "kernel void detect_round( global float4 *in, write_only image2d_t "
3008 "out )\n"
3009 "{\n"
3010 " write_imagef( out, (int2)(get_global_id(0),0), "
3011 "in[get_global_id(0)] );\n"
3012 "}\n"
3013 };
3014
3015 clProgramWrapper program;
3016 clKernelWrapper kernel;
3017 err = create_single_kernel_helper(context, &program, &kernel, 1,
3018 kernelSource, "detect_round");
3019
3020 if (NULL == program || err)
3021 {
3022 log_error("Error: could not create program in "
3023 "DetectFloatToHalfRoundingMode (err: %d)",
3024 err);
3025 clReleaseMemObject(inBuf);
3026 clReleaseMemObject(outImage);
3027 return err;
3028 }
3029
3030 cl_device_id device = NULL;
3031 err = clGetCommandQueueInfo(q, CL_QUEUE_DEVICE, sizeof(device), &device,
3032 NULL);
3033 if (err)
3034 {
3035 log_error("Error: could not get device from command queue in "
3036 "DetectFloatToHalfRoundingMode (%d)",
3037 err);
3038 clReleaseMemObject(inBuf);
3039 clReleaseMemObject(outImage);
3040 return err;
3041 }
3042
3043 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inBuf);
3044 if (err)
3045 {
3046 log_error("Error: could not set argument 0 of kernel in "
3047 "DetectFloatToHalfRoundingMode (%d)",
3048 err);
3049 clReleaseMemObject(inBuf);
3050 clReleaseMemObject(outImage);
3051 return err;
3052 }
3053
3054 err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &outImage);
3055 if (err)
3056 {
3057 log_error("Error: could not set argument 1 of kernel in "
3058 "DetectFloatToHalfRoundingMode (%d)",
3059 err);
3060 clReleaseMemObject(inBuf);
3061 clReleaseMemObject(outImage);
3062 return err;
3063 }
3064
3065 // Run the kernel
3066 size_t global_work_size = count;
3067 err = clEnqueueNDRangeKernel(q, kernel, 1, NULL, &global_work_size,
3068 NULL, 0, NULL, NULL);
3069 if (err)
3070 {
3071 log_error("Error: could not enqueue kernel in "
3072 "DetectFloatToHalfRoundingMode (%d)",
3073 err);
3074 clReleaseMemObject(inBuf);
3075 clReleaseMemObject(outImage);
3076 return err;
3077 }
3078
3079 // read the results
3080 cl_half outBuf[count * 4];
3081 memset(outBuf, -1, sizeof(outBuf));
3082 size_t origin[3] = { 0, 0, 0 };
3083 size_t region[3] = { count, 1, 1 };
3084 err = clEnqueueReadImage(q, outImage, CL_TRUE, origin, region, 0, 0,
3085 outBuf, 0, NULL, NULL);
3086 if (err)
3087 {
3088 log_error("Error: could not read output image in "
3089 "DetectFloatToHalfRoundingMode (%d)",
3090 err);
3091 clReleaseMemObject(inBuf);
3092 clReleaseMemObject(outImage);
3093 return err;
3094 }
3095
3096 // Generate our list of reference results
3097 cl_half rte_ref[count * 4];
3098 cl_half rtz_ref[count * 4];
3099 for (size_t i = 0; i < 4 * count; i++)
3100 {
3101 rte_ref[i] = cl_half_from_float(inp[i], CL_HALF_RTE);
3102 rtz_ref[i] = cl_half_from_float(inp[i], CL_HALF_RTZ);
3103 }
3104
3105 // Verify that we got something in either rtz or rte mode
3106 if (0 == memcmp(rte_ref, outBuf, sizeof(rte_ref)))
3107 {
3108 log_info("Autodetected float->half rounding mode to be rte\n");
3109 gFloatToHalfRoundingMode = kRoundToNearestEven;
3110 }
3111 else if (0 == memcmp(rtz_ref, outBuf, sizeof(rtz_ref)))
3112 {
3113 log_info("Autodetected float->half rounding mode to be rtz\n");
3114 gFloatToHalfRoundingMode = kRoundTowardZero;
3115 }
3116 else
3117 {
3118 log_error("ERROR: float to half conversions proceed with invalid "
3119 "rounding mode!\n");
3120 log_info("\nfor:");
3121 for (size_t i = 0; i < count; i++)
3122 log_info(" {%a, %a, %a, %a},", inp[4 * i], inp[4 * i + 1],
3123 inp[4 * i + 2], inp[4 * i + 3]);
3124 log_info("\ngot:");
3125 for (size_t i = 0; i < count; i++)
3126 log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
3127 outBuf[4 * i], outBuf[4 * i + 1], outBuf[4 * i + 2],
3128 outBuf[4 * i + 3]);
3129 log_info("\nrte:");
3130 for (size_t i = 0; i < count; i++)
3131 log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
3132 rte_ref[4 * i], rte_ref[4 * i + 1], rte_ref[4 * i + 2],
3133 rte_ref[4 * i + 3]);
3134 log_info("\nrtz:");
3135 for (size_t i = 0; i < count; i++)
3136 log_info(" {0x%4.4x, 0x%4.4x, 0x%4.4x, 0x%4.4x},",
3137 rtz_ref[4 * i], rtz_ref[4 * i + 1], rtz_ref[4 * i + 2],
3138 rtz_ref[4 * i + 3]);
3139 log_info("\n");
3140 err = -1;
3141 gFloatToHalfRoundingMode = kRoundingModeCount; // illegal value
3142 }
3143
3144 // clean up
3145 clReleaseMemObject(inBuf);
3146 clReleaseMemObject(outImage);
3147 return err;
3148 }
3149
3150 // Make sure that the rounding mode was successfully detected, if we checked
3151 // earlier
3152 if (gFloatToHalfRoundingMode != kRoundToNearestEven
3153 && gFloatToHalfRoundingMode != kRoundTowardZero)
3154 return -2;
3155
3156 return err;
3157 }
3158
create_random_image_data(ExplicitType dataType,image_descriptor * imageInfo,BufferOwningPtr<char> & P,MTdata d,bool image2DFromBuffer)3159 char *create_random_image_data(ExplicitType dataType,
3160 image_descriptor *imageInfo,
3161 BufferOwningPtr<char> &P, MTdata d,
3162 bool image2DFromBuffer)
3163 {
3164 size_t allocSize, numPixels;
3165 if (/*gTestMipmaps*/ imageInfo->num_mip_levels > 1)
3166 {
3167 allocSize = (size_t)(compute_mipmapped_image_size(*imageInfo) * 4
3168 * get_explicit_type_size(dataType))
3169 / get_pixel_size(imageInfo->format);
3170 numPixels = allocSize / (get_explicit_type_size(dataType) * 4);
3171 }
3172 else
3173 {
3174 numPixels = (image2DFromBuffer ? imageInfo->rowPitch : imageInfo->width)
3175 * imageInfo->height * (imageInfo->depth ? imageInfo->depth : 1)
3176 * (imageInfo->arraySize ? imageInfo->arraySize : 1);
3177 allocSize = numPixels * 4 * get_explicit_type_size(dataType);
3178 }
3179
3180 #if 0 // DEBUG
3181 {
3182 fprintf(stderr,"--- create_random_image_data:\n");
3183 fprintf(stderr,"allocSize = %zu\n",allocSize);
3184 fprintf(stderr,"numPixels = %zu\n",numPixels);
3185 fprintf(stderr,"width = %zu\n",imageInfo->width);
3186 fprintf(stderr,"height = %zu\n",imageInfo->height);
3187 fprintf(stderr,"depth = %zu\n",imageInfo->depth);
3188 fprintf(stderr,"rowPitch = %zu\n",imageInfo->rowPitch);
3189 fprintf(stderr,"slicePitch = %zu\n",imageInfo->slicePitch);
3190 fprintf(stderr,"arraySize = %zu\n",imageInfo->arraySize);
3191 fprintf(stderr,"explicit_type_size = %zu\n",get_explicit_type_size(dataType));
3192 }
3193 #endif
3194
3195 #if defined(__APPLE__)
3196 char *data = NULL;
3197 if (gDeviceType == CL_DEVICE_TYPE_CPU)
3198 {
3199 size_t mapSize =
3200 ((allocSize + 4095L) & -4096L) + 8192; // alloc two extra pages.
3201
3202 void *map = mmap(0, mapSize, PROT_READ | PROT_WRITE,
3203 MAP_ANON | MAP_PRIVATE, 0, 0);
3204 if (map == MAP_FAILED)
3205 {
3206 perror("create_random_image_data: mmap");
3207 log_error("%s:%d: mmap failed, mapSize = %zu\n", __FILE__, __LINE__,
3208 mapSize);
3209 }
3210 intptr_t data_end = (intptr_t)map + mapSize - 4096;
3211 data = (char *)(data_end - (intptr_t)allocSize);
3212
3213 mprotect(map, 4096, PROT_NONE);
3214 mprotect((void *)((char *)map + mapSize - 4096), 4096, PROT_NONE);
3215 P.reset(data, map, mapSize);
3216 }
3217 else
3218 {
3219 data = (char *)malloc(allocSize);
3220 P.reset(data);
3221 }
3222 #else
3223 char *data =
3224 (char *)align_malloc(allocSize, get_pixel_alignment(imageInfo->format));
3225 P.reset(data, NULL, 0, allocSize, true);
3226 #endif
3227
3228 if (data == NULL)
3229 {
3230 log_error(
3231 "ERROR: Unable to malloc %lu bytes for create_random_image_data\n",
3232 allocSize);
3233 return NULL;
3234 }
3235
3236 switch (dataType)
3237 {
3238 case kFloat: {
3239 float *inputValues = (float *)data;
3240 switch (imageInfo->format->image_channel_data_type)
3241 {
3242 case CL_HALF_FLOAT: {
3243 // Generate data that is (mostly) inside the range of a half
3244 // float const float HALF_MIN = 5.96046448e-08f;
3245 const float HALF_MAX = 65504.0f;
3246
3247 size_t i = 0;
3248 inputValues[i++] = 0.f;
3249 inputValues[i++] = 1.f;
3250 inputValues[i++] = -1.f;
3251 inputValues[i++] = 2.f;
3252 for (; i < numPixels * 4; i++)
3253 inputValues[i] = get_random_float(-HALF_MAX - 2.f,
3254 HALF_MAX + 2.f, d);
3255 }
3256 break;
3257 #ifdef CL_SFIXED14_APPLE
3258 case CL_SFIXED14_APPLE: {
3259 size_t i = 0;
3260 if (numPixels * 4 >= 8)
3261 {
3262 inputValues[i++] = INFINITY;
3263 inputValues[i++] = 0x1.0p14f;
3264 inputValues[i++] = 0x1.0p31f;
3265 inputValues[i++] = 0x1.0p32f;
3266 inputValues[i++] = -INFINITY;
3267 inputValues[i++] = -0x1.0p14f;
3268 inputValues[i++] = -0x1.0p31f;
3269 inputValues[i++] = -0x1.1p31f;
3270 }
3271 for (; i < numPixels * 4; i++)
3272 inputValues[i] = get_random_float(-1.1f, 3.1f, d);
3273 }
3274 break;
3275 #endif
3276 case CL_FLOAT: {
3277 size_t i = 0;
3278 inputValues[i++] = INFINITY;
3279 inputValues[i++] = -INFINITY;
3280 inputValues[i++] = 0.0f;
3281 inputValues[i++] = 0.0f;
3282 cl_uint *p = (cl_uint *)data;
3283 for (; i < numPixels * 4; i++) p[i] = genrand_int32(d);
3284 }
3285 break;
3286
3287 default:
3288 size_t i = 0;
3289 if (numPixels * 4 >= 36)
3290 {
3291 inputValues[i++] = 0.0f;
3292 inputValues[i++] = 0.5f;
3293 inputValues[i++] = 31.5f;
3294 inputValues[i++] = 32.0f;
3295 inputValues[i++] = 127.5f;
3296 inputValues[i++] = 128.0f;
3297 inputValues[i++] = 255.5f;
3298 inputValues[i++] = 256.0f;
3299 inputValues[i++] = 1023.5f;
3300 inputValues[i++] = 1024.0f;
3301 inputValues[i++] = 32767.5f;
3302 inputValues[i++] = 32768.0f;
3303 inputValues[i++] = 65535.5f;
3304 inputValues[i++] = 65536.0f;
3305 inputValues[i++] = 2147483648.0f;
3306 inputValues[i++] = 4294967296.0f;
3307 inputValues[i++] = MAKE_HEX_FLOAT(0x1.0p63f, 1, 63);
3308 inputValues[i++] = MAKE_HEX_FLOAT(0x1.0p64f, 1, 64);
3309 inputValues[i++] = -0.0f;
3310 inputValues[i++] = -0.5f;
3311 inputValues[i++] = -31.5f;
3312 inputValues[i++] = -32.0f;
3313 inputValues[i++] = -127.5f;
3314 inputValues[i++] = -128.0f;
3315 inputValues[i++] = -255.5f;
3316 inputValues[i++] = -256.0f;
3317 inputValues[i++] = -1023.5f;
3318 inputValues[i++] = -1024.0f;
3319 inputValues[i++] = -32767.5f;
3320 inputValues[i++] = -32768.0f;
3321 inputValues[i++] = -65535.5f;
3322 inputValues[i++] = -65536.0f;
3323 inputValues[i++] = -2147483648.0f;
3324 inputValues[i++] = -4294967296.0f;
3325 inputValues[i++] = -MAKE_HEX_FLOAT(0x1.0p63f, 1, 63);
3326 inputValues[i++] = -MAKE_HEX_FLOAT(0x1.0p64f, 1, 64);
3327 }
3328 if (is_format_signed(imageInfo->format))
3329 {
3330 for (; i < numPixels * 4; i++)
3331 inputValues[i] = get_random_float(-1.1f, 1.1f, d);
3332 }
3333 else
3334 {
3335 for (; i < numPixels * 4; i++)
3336 inputValues[i] = get_random_float(-0.1f, 1.1f, d);
3337 }
3338 break;
3339 }
3340 break;
3341 }
3342
3343 case kInt: {
3344 int *imageData = (int *)data;
3345
3346 // We want to generate ints (mostly) in range of the target format
3347 int formatMin = get_format_min_int(imageInfo->format);
3348 size_t formatMax = get_format_max_int(imageInfo->format);
3349 if (formatMin == 0)
3350 {
3351 // Unsigned values, but we are only an int, so cap the actual
3352 // max at the max of signed ints
3353 if (formatMax > 2147483647L) formatMax = 2147483647L;
3354 }
3355 // If the final format is small enough, give us a bit of room for
3356 // out-of-range values to test
3357 if (formatMax < 2147483647L) formatMax += 2;
3358 if (formatMin > -2147483648LL) formatMin -= 2;
3359
3360 // Now gen
3361 for (size_t i = 0; i < numPixels * 4; i++)
3362 {
3363 imageData[i] = random_in_range(formatMin, (int)formatMax, d);
3364 }
3365 break;
3366 }
3367
3368 case kUInt:
3369 case kUnsignedInt: {
3370 unsigned int *imageData = (unsigned int *)data;
3371
3372 // We want to generate ints (mostly) in range of the target format
3373 int formatMin = get_format_min_int(imageInfo->format);
3374 size_t formatMax = get_format_max_int(imageInfo->format);
3375 if (formatMin < 0) formatMin = 0;
3376 // If the final format is small enough, give us a bit of room for
3377 // out-of-range values to test
3378 if (formatMax < 4294967295LL) formatMax += 2;
3379
3380 // Now gen
3381 for (size_t i = 0; i < numPixels * 4; i++)
3382 {
3383 imageData[i] = random_in_range(formatMin, (int)formatMax, d);
3384 }
3385 break;
3386 }
3387 default:
3388 // Unsupported source format
3389 delete[] data;
3390 return NULL;
3391 }
3392
3393 return data;
3394 }
3395
3396 /*
3397 deprecated
3398 bool clamp_image_coord( image_sampler_data *imageSampler, float value, size_t
3399 max, int &outValue )
3400 {
3401 int v = (int)value;
3402
3403 switch(imageSampler->addressing_mode)
3404 {
3405 case CL_ADDRESS_REPEAT:
3406 outValue = v;
3407 while( v < 0 )
3408 v += (int)max;
3409 while( v >= (int)max )
3410 v -= (int)max;
3411 if( v != outValue )
3412 {
3413 outValue = v;
3414 return true;
3415 }
3416 return false;
3417
3418 case CL_ADDRESS_MIRRORED_REPEAT:
3419 log_info( "ERROR: unimplemented for CL_ADDRESS_MIRRORED_REPEAT. Do
3420 we ever use this? exit(-1);
3421
3422 default:
3423 if( v < 0 )
3424 {
3425 outValue = 0;
3426 return true;
3427 }
3428 if( v >= (int)max )
3429 {
3430 outValue = (int)max - 1;
3431 return true;
3432 }
3433 outValue = v;
3434 return false;
3435 }
3436
3437 }
3438 */
3439
get_sampler_kernel_code(image_sampler_data * imageSampler,char * outLine)3440 void get_sampler_kernel_code(image_sampler_data *imageSampler, char *outLine)
3441 {
3442 const char *normalized;
3443 const char *addressMode;
3444 const char *filterMode;
3445
3446 if (imageSampler->addressing_mode == CL_ADDRESS_CLAMP)
3447 addressMode = "CLK_ADDRESS_CLAMP";
3448 else if (imageSampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE)
3449 addressMode = "CLK_ADDRESS_CLAMP_TO_EDGE";
3450 else if (imageSampler->addressing_mode == CL_ADDRESS_REPEAT)
3451 addressMode = "CLK_ADDRESS_REPEAT";
3452 else if (imageSampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT)
3453 addressMode = "CLK_ADDRESS_MIRRORED_REPEAT";
3454 else if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
3455 addressMode = "CLK_ADDRESS_NONE";
3456 else
3457 {
3458 log_error("**Error: Unknown addressing mode! Aborting...\n");
3459 abort();
3460 }
3461
3462 if (imageSampler->normalized_coords)
3463 normalized = "CLK_NORMALIZED_COORDS_TRUE";
3464 else
3465 normalized = "CLK_NORMALIZED_COORDS_FALSE";
3466
3467 if (imageSampler->filter_mode == CL_FILTER_LINEAR)
3468 filterMode = "CLK_FILTER_LINEAR";
3469 else
3470 filterMode = "CLK_FILTER_NEAREST";
3471
3472 sprintf(outLine, " const sampler_t imageSampler = %s | %s | %s;\n",
3473 addressMode, filterMode, normalized);
3474 }
3475
copy_image_data(image_descriptor * srcImageInfo,image_descriptor * dstImageInfo,void * imageValues,void * destImageValues,const size_t sourcePos[],const size_t destPos[],const size_t regionSize[])3476 void copy_image_data(image_descriptor *srcImageInfo,
3477 image_descriptor *dstImageInfo, void *imageValues,
3478 void *destImageValues, const size_t sourcePos[],
3479 const size_t destPos[], const size_t regionSize[])
3480 {
3481 // assert( srcImageInfo->format == dstImageInfo->format );
3482
3483 size_t src_mip_level_offset = 0, dst_mip_level_offset = 0;
3484 size_t sourcePos_lod[3], destPos_lod[3], src_lod, dst_lod;
3485 size_t src_row_pitch_lod, src_slice_pitch_lod;
3486 size_t dst_row_pitch_lod, dst_slice_pitch_lod;
3487
3488 size_t pixelSize = get_pixel_size(srcImageInfo->format);
3489
3490 sourcePos_lod[0] = sourcePos[0];
3491 sourcePos_lod[1] = sourcePos[1];
3492 sourcePos_lod[2] = sourcePos[2];
3493 destPos_lod[0] = destPos[0];
3494 destPos_lod[1] = destPos[1];
3495 destPos_lod[2] = destPos[2];
3496 src_row_pitch_lod = srcImageInfo->rowPitch;
3497 dst_row_pitch_lod = dstImageInfo->rowPitch;
3498 src_slice_pitch_lod = srcImageInfo->slicePitch;
3499 dst_slice_pitch_lod = dstImageInfo->slicePitch;
3500
3501 if (srcImageInfo->num_mip_levels > 1)
3502 {
3503 size_t src_width_lod = 1 /*srcImageInfo->width*/;
3504 size_t src_height_lod = 1 /*srcImageInfo->height*/;
3505 size_t src_depth_lod = 1 /*srcImageInfo->depth*/;
3506
3507 switch (srcImageInfo->type)
3508 {
3509 case CL_MEM_OBJECT_IMAGE1D:
3510 src_lod = sourcePos[1];
3511 sourcePos_lod[1] = sourcePos_lod[2] = 0;
3512 src_width_lod = (srcImageInfo->width >> src_lod)
3513 ? (srcImageInfo->width >> src_lod)
3514 : 1;
3515 break;
3516 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
3517 case CL_MEM_OBJECT_IMAGE2D:
3518 src_lod = sourcePos[2];
3519 sourcePos_lod[1] = sourcePos[1];
3520 sourcePos_lod[2] = 0;
3521 src_width_lod = (srcImageInfo->width >> src_lod)
3522 ? (srcImageInfo->width >> src_lod)
3523 : 1;
3524 if (srcImageInfo->type == CL_MEM_OBJECT_IMAGE2D)
3525 src_height_lod = (srcImageInfo->height >> src_lod)
3526 ? (srcImageInfo->height >> src_lod)
3527 : 1;
3528 break;
3529 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
3530 case CL_MEM_OBJECT_IMAGE3D:
3531 src_lod = sourcePos[3];
3532 sourcePos_lod[1] = sourcePos[1];
3533 sourcePos_lod[2] = sourcePos[2];
3534 src_width_lod = (srcImageInfo->width >> src_lod)
3535 ? (srcImageInfo->width >> src_lod)
3536 : 1;
3537 src_height_lod = (srcImageInfo->height >> src_lod)
3538 ? (srcImageInfo->height >> src_lod)
3539 : 1;
3540 if (srcImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
3541 src_depth_lod = (srcImageInfo->depth >> src_lod)
3542 ? (srcImageInfo->depth >> src_lod)
3543 : 1;
3544 break;
3545 }
3546 src_mip_level_offset = compute_mip_level_offset(srcImageInfo, src_lod);
3547 src_row_pitch_lod =
3548 src_width_lod * get_pixel_size(srcImageInfo->format);
3549 src_slice_pitch_lod = src_row_pitch_lod * src_height_lod;
3550 }
3551
3552 if (dstImageInfo->num_mip_levels > 1)
3553 {
3554 size_t dst_width_lod = 1 /*dstImageInfo->width*/;
3555 size_t dst_height_lod = 1 /*dstImageInfo->height*/;
3556 size_t dst_depth_lod = 1 /*dstImageInfo->depth*/;
3557 switch (dstImageInfo->type)
3558 {
3559 case CL_MEM_OBJECT_IMAGE1D:
3560 dst_lod = destPos[1];
3561 destPos_lod[1] = destPos_lod[2] = 0;
3562 dst_width_lod = (dstImageInfo->width >> dst_lod)
3563 ? (dstImageInfo->width >> dst_lod)
3564 : 1;
3565 break;
3566 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
3567 case CL_MEM_OBJECT_IMAGE2D:
3568 dst_lod = destPos[2];
3569 destPos_lod[1] = destPos[1];
3570 destPos_lod[2] = 0;
3571 dst_width_lod = (dstImageInfo->width >> dst_lod)
3572 ? (dstImageInfo->width >> dst_lod)
3573 : 1;
3574 if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D)
3575 dst_height_lod = (dstImageInfo->height >> dst_lod)
3576 ? (dstImageInfo->height >> dst_lod)
3577 : 1;
3578 break;
3579 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
3580 case CL_MEM_OBJECT_IMAGE3D:
3581 dst_lod = destPos[3];
3582 destPos_lod[1] = destPos[1];
3583 destPos_lod[2] = destPos[2];
3584 dst_width_lod = (dstImageInfo->width >> dst_lod)
3585 ? (dstImageInfo->width >> dst_lod)
3586 : 1;
3587 dst_height_lod = (dstImageInfo->height >> dst_lod)
3588 ? (dstImageInfo->height >> dst_lod)
3589 : 1;
3590 if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
3591 dst_depth_lod = (dstImageInfo->depth >> dst_lod)
3592 ? (dstImageInfo->depth >> dst_lod)
3593 : 1;
3594 break;
3595 }
3596 dst_mip_level_offset = compute_mip_level_offset(dstImageInfo, dst_lod);
3597 dst_row_pitch_lod =
3598 dst_width_lod * get_pixel_size(dstImageInfo->format);
3599 dst_slice_pitch_lod = dst_row_pitch_lod * dst_height_lod;
3600 }
3601
3602 // Get initial pointers
3603 char *sourcePtr = (char *)imageValues
3604 + sourcePos_lod[2] * src_slice_pitch_lod
3605 + sourcePos_lod[1] * src_row_pitch_lod + pixelSize * sourcePos_lod[0]
3606 + src_mip_level_offset;
3607 char *destPtr = (char *)destImageValues
3608 + destPos_lod[2] * dst_slice_pitch_lod
3609 + destPos_lod[1] * dst_row_pitch_lod + pixelSize * destPos_lod[0]
3610 + dst_mip_level_offset;
3611
3612 for (size_t z = 0; z < (regionSize[2] > 0 ? regionSize[2] : 1); z++)
3613 {
3614 char *rowSourcePtr = sourcePtr;
3615 char *rowDestPtr = destPtr;
3616 for (size_t y = 0; y < regionSize[1]; y++)
3617 {
3618 memcpy(rowDestPtr, rowSourcePtr, pixelSize * regionSize[0]);
3619 rowSourcePtr += src_row_pitch_lod;
3620 rowDestPtr += dst_row_pitch_lod;
3621 }
3622
3623 sourcePtr += src_slice_pitch_lod;
3624 destPtr += dst_slice_pitch_lod;
3625 }
3626 }
3627
random_float(float low,float high,MTdata d)3628 float random_float(float low, float high, MTdata d)
3629 {
3630 float t = (float)genrand_real1(d);
3631 return (1.0f - t) * low + t * high;
3632 }
3633
CoordWalker(void * coords,bool useFloats,size_t vecSize)3634 CoordWalker::CoordWalker(void *coords, bool useFloats, size_t vecSize)
3635 {
3636 if (useFloats)
3637 {
3638 mFloatCoords = (cl_float *)coords;
3639 mIntCoords = NULL;
3640 }
3641 else
3642 {
3643 mFloatCoords = NULL;
3644 mIntCoords = (cl_int *)coords;
3645 }
3646 mVecSize = vecSize;
3647 }
3648
~CoordWalker()3649 CoordWalker::~CoordWalker() {}
3650
Get(size_t idx,size_t el)3651 cl_float CoordWalker::Get(size_t idx, size_t el)
3652 {
3653 if (mIntCoords != NULL)
3654 return (cl_float)mIntCoords[idx * mVecSize + el];
3655 else
3656 return mFloatCoords[idx * mVecSize + el];
3657 }
3658
3659
print_read_header(const cl_image_format * format,image_sampler_data * sampler,bool err,int t)3660 void print_read_header(const cl_image_format *format,
3661 image_sampler_data *sampler, bool err, int t)
3662 {
3663 const char *addressMode = NULL;
3664 const char *normalizedNames[2] = { "UNNORMALIZED", "NORMALIZED" };
3665
3666 if (sampler->addressing_mode == CL_ADDRESS_CLAMP)
3667 addressMode = "CL_ADDRESS_CLAMP";
3668 else if (sampler->addressing_mode == CL_ADDRESS_CLAMP_TO_EDGE)
3669 addressMode = "CL_ADDRESS_CLAMP_TO_EDGE";
3670 else if (sampler->addressing_mode == CL_ADDRESS_REPEAT)
3671 addressMode = "CL_ADDRESS_REPEAT";
3672 else if (sampler->addressing_mode == CL_ADDRESS_MIRRORED_REPEAT)
3673 addressMode = "CL_ADDRESS_MIRRORED_REPEAT";
3674 else
3675 addressMode = "CL_ADDRESS_NONE";
3676
3677 if (t)
3678 {
3679 if (err)
3680 log_error("[%-7s %-24s %d] - %s - %s - %s - %s\n",
3681 GetChannelOrderName(format->image_channel_order),
3682 GetChannelTypeName(format->image_channel_data_type),
3683 (int)get_format_channel_count(format),
3684 sampler->filter_mode == CL_FILTER_NEAREST
3685 ? "CL_FILTER_NEAREST"
3686 : "CL_FILTER_LINEAR",
3687 addressMode,
3688 normalizedNames[sampler->normalized_coords ? 1 : 0],
3689 t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED");
3690 else
3691 log_info("[%-7s %-24s %d] - %s - %s - %s - %s\n",
3692 GetChannelOrderName(format->image_channel_order),
3693 GetChannelTypeName(format->image_channel_data_type),
3694 (int)get_format_channel_count(format),
3695 sampler->filter_mode == CL_FILTER_NEAREST
3696 ? "CL_FILTER_NEAREST"
3697 : "CL_FILTER_LINEAR",
3698 addressMode,
3699 normalizedNames[sampler->normalized_coords ? 1 : 0],
3700 t == 1 ? "TRANSPOSED" : "NON-TRANSPOSED");
3701 }
3702 else
3703 {
3704 if (err)
3705 log_error("[%-7s %-24s %d] - %s - %s - %s\n",
3706 GetChannelOrderName(format->image_channel_order),
3707 GetChannelTypeName(format->image_channel_data_type),
3708 (int)get_format_channel_count(format),
3709 sampler->filter_mode == CL_FILTER_NEAREST
3710 ? "CL_FILTER_NEAREST"
3711 : "CL_FILTER_LINEAR",
3712 addressMode,
3713 normalizedNames[sampler->normalized_coords ? 1 : 0]);
3714 else
3715 log_info("[%-7s %-24s %d] - %s - %s - %s\n",
3716 GetChannelOrderName(format->image_channel_order),
3717 GetChannelTypeName(format->image_channel_data_type),
3718 (int)get_format_channel_count(format),
3719 sampler->filter_mode == CL_FILTER_NEAREST
3720 ? "CL_FILTER_NEAREST"
3721 : "CL_FILTER_LINEAR",
3722 addressMode,
3723 normalizedNames[sampler->normalized_coords ? 1 : 0]);
3724 }
3725 }
3726
print_write_header(const cl_image_format * format,bool err=false)3727 void print_write_header(const cl_image_format *format, bool err = false)
3728 {
3729 if (err)
3730 log_error("[%-7s %-24s %d]\n",
3731 GetChannelOrderName(format->image_channel_order),
3732 GetChannelTypeName(format->image_channel_data_type),
3733 (int)get_format_channel_count(format));
3734 else
3735 log_info("[%-7s %-24s %d]\n",
3736 GetChannelOrderName(format->image_channel_order),
3737 GetChannelTypeName(format->image_channel_data_type),
3738 (int)get_format_channel_count(format));
3739 }
3740
3741
print_header(const cl_image_format * format,bool err=false)3742 void print_header(const cl_image_format *format, bool err = false)
3743 {
3744 if (err)
3745 {
3746 log_error("[%-7s %-24s %d]\n",
3747 GetChannelOrderName(format->image_channel_order),
3748 GetChannelTypeName(format->image_channel_data_type),
3749 (int)get_format_channel_count(format));
3750 }
3751 else
3752 {
3753 log_info("[%-7s %-24s %d]\n",
3754 GetChannelOrderName(format->image_channel_order),
3755 GetChannelTypeName(format->image_channel_data_type),
3756 (int)get_format_channel_count(format));
3757 }
3758 }
3759
find_format(cl_image_format * formatList,unsigned int numFormats,cl_image_format * formatToFind)3760 bool find_format(cl_image_format *formatList, unsigned int numFormats,
3761 cl_image_format *formatToFind)
3762 {
3763 for (unsigned int i = 0; i < numFormats; i++)
3764 {
3765 if (formatList[i].image_channel_order
3766 == formatToFind->image_channel_order
3767 && formatList[i].image_channel_data_type
3768 == formatToFind->image_channel_data_type)
3769 return true;
3770 }
3771 return false;
3772 }
3773
build_required_image_formats(cl_mem_flags flags,cl_mem_object_type image_type,cl_device_id device,std::vector<cl_image_format> & formatsToSupport)3774 void build_required_image_formats(
3775 cl_mem_flags flags, cl_mem_object_type image_type, cl_device_id device,
3776 std::vector<cl_image_format> &formatsToSupport)
3777 {
3778 formatsToSupport.clear();
3779
3780 // Minimum list of supported image formats for reading or writing (embedded
3781 // profile)
3782 static std::vector<cl_image_format> embeddedProfile_readOrWrite{
3783 // clang-format off
3784 { CL_RGBA, CL_UNORM_INT8 },
3785 { CL_RGBA, CL_UNORM_INT16 },
3786 { CL_RGBA, CL_SIGNED_INT8 },
3787 { CL_RGBA, CL_SIGNED_INT16 },
3788 { CL_RGBA, CL_SIGNED_INT32 },
3789 { CL_RGBA, CL_UNSIGNED_INT8 },
3790 { CL_RGBA, CL_UNSIGNED_INT16 },
3791 { CL_RGBA, CL_UNSIGNED_INT32 },
3792 { CL_RGBA, CL_HALF_FLOAT },
3793 { CL_RGBA, CL_FLOAT },
3794 // clang-format on
3795 };
3796
3797 // Minimum list of required image formats for reading or writing
3798 // num_channels, for all image types.
3799 static std::vector<cl_image_format> fullProfile_readOrWrite{
3800 // clang-format off
3801 { CL_RGBA, CL_UNORM_INT8 },
3802 { CL_RGBA, CL_UNORM_INT16 },
3803 { CL_RGBA, CL_SIGNED_INT8 },
3804 { CL_RGBA, CL_SIGNED_INT16 },
3805 { CL_RGBA, CL_SIGNED_INT32 },
3806 { CL_RGBA, CL_UNSIGNED_INT8 },
3807 { CL_RGBA, CL_UNSIGNED_INT16 },
3808 { CL_RGBA, CL_UNSIGNED_INT32 },
3809 { CL_RGBA, CL_HALF_FLOAT },
3810 { CL_RGBA, CL_FLOAT },
3811 { CL_BGRA, CL_UNORM_INT8 },
3812 // clang-format on
3813 };
3814
3815 // Minimum list of supported image formats for reading or writing
3816 // (OpenCL 2.0, 2.1, or 2.2), for all image types.
3817 static std::vector<cl_image_format> fullProfile_2x_readOrWrite{
3818 // clang-format off
3819 { CL_R, CL_UNORM_INT8 },
3820 { CL_R, CL_UNORM_INT16 },
3821 { CL_R, CL_SNORM_INT8 },
3822 { CL_R, CL_SNORM_INT16 },
3823 { CL_R, CL_SIGNED_INT8 },
3824 { CL_R, CL_SIGNED_INT16 },
3825 { CL_R, CL_SIGNED_INT32 },
3826 { CL_R, CL_UNSIGNED_INT8 },
3827 { CL_R, CL_UNSIGNED_INT16 },
3828 { CL_R, CL_UNSIGNED_INT32 },
3829 { CL_R, CL_HALF_FLOAT },
3830 { CL_R, CL_FLOAT },
3831 { CL_RG, CL_UNORM_INT8 },
3832 { CL_RG, CL_UNORM_INT16 },
3833 { CL_RG, CL_SNORM_INT8 },
3834 { CL_RG, CL_SNORM_INT16 },
3835 { CL_RG, CL_SIGNED_INT8 },
3836 { CL_RG, CL_SIGNED_INT16 },
3837 { CL_RG, CL_SIGNED_INT32 },
3838 { CL_RG, CL_UNSIGNED_INT8 },
3839 { CL_RG, CL_UNSIGNED_INT16 },
3840 { CL_RG, CL_UNSIGNED_INT32 },
3841 { CL_RG, CL_HALF_FLOAT },
3842 { CL_RG, CL_FLOAT },
3843 { CL_RGBA, CL_UNORM_INT8 },
3844 { CL_RGBA, CL_UNORM_INT16 },
3845 { CL_RGBA, CL_SNORM_INT8 },
3846 { CL_RGBA, CL_SNORM_INT16 },
3847 { CL_RGBA, CL_SIGNED_INT8 },
3848 { CL_RGBA, CL_SIGNED_INT16 },
3849 { CL_RGBA, CL_SIGNED_INT32 },
3850 { CL_RGBA, CL_UNSIGNED_INT8 },
3851 { CL_RGBA, CL_UNSIGNED_INT16 },
3852 { CL_RGBA, CL_UNSIGNED_INT32 },
3853 { CL_RGBA, CL_HALF_FLOAT },
3854 { CL_RGBA, CL_FLOAT },
3855 { CL_BGRA, CL_UNORM_INT8 },
3856 // clang-format on
3857 };
3858
3859 // Conditional addition to the 2x readOrWrite table:
3860 // Support for the CL_DEPTH image channel order is required only for 2D
3861 // images and 2D image arrays.
3862 static std::vector<cl_image_format> fullProfile_2x_readOrWrite_Depth{
3863 // clang-format off
3864 { CL_DEPTH, CL_UNORM_INT16 },
3865 { CL_DEPTH, CL_FLOAT },
3866 // clang-format on
3867 };
3868
3869 // Conditional addition to the 2x readOrWrite table:
3870 // Support for reading from the CL_sRGBA image channel order is optional for
3871 // 1D image buffers. Support for writing to the CL_sRGBA image channel order
3872 // is optional for all image types.
3873 static std::vector<cl_image_format> fullProfile_2x_readOrWrite_srgb{
3874 { CL_sRGBA, CL_UNORM_INT8 },
3875 };
3876
3877 // Minimum list of required image formats for reading and writing.
3878 static std::vector<cl_image_format> fullProfile_readAndWrite{
3879 // clang-format off
3880 { CL_R, CL_UNORM_INT8 },
3881 { CL_R, CL_SIGNED_INT8 },
3882 { CL_R, CL_SIGNED_INT16 },
3883 { CL_R, CL_SIGNED_INT32 },
3884 { CL_R, CL_UNSIGNED_INT8 },
3885 { CL_R, CL_UNSIGNED_INT16 },
3886 { CL_R, CL_UNSIGNED_INT32 },
3887 { CL_R, CL_HALF_FLOAT },
3888 { CL_R, CL_FLOAT },
3889 { CL_RGBA, CL_UNORM_INT8 },
3890 { CL_RGBA, CL_SIGNED_INT8 },
3891 { CL_RGBA, CL_SIGNED_INT16 },
3892 { CL_RGBA, CL_SIGNED_INT32 },
3893 { CL_RGBA, CL_UNSIGNED_INT8 },
3894 { CL_RGBA, CL_UNSIGNED_INT16 },
3895 { CL_RGBA, CL_UNSIGNED_INT32 },
3896 { CL_RGBA, CL_HALF_FLOAT },
3897 { CL_RGBA, CL_FLOAT },
3898 // clang-format on
3899 };
3900
3901 // Embedded profile
3902 if (gIsEmbedded)
3903 {
3904 copy(embeddedProfile_readOrWrite.begin(),
3905 embeddedProfile_readOrWrite.end(),
3906 back_inserter(formatsToSupport));
3907 }
3908 // Full profile
3909 else
3910 {
3911 Version version = get_device_cl_version(device);
3912 if (version < Version(2, 0) || version >= Version(3, 0))
3913 {
3914 // Full profile, OpenCL 1.2 or 3.0.
3915 if (flags & CL_MEM_KERNEL_READ_AND_WRITE)
3916 {
3917 // Note: assumes that read-write images are supported!
3918 copy(fullProfile_readAndWrite.begin(),
3919 fullProfile_readAndWrite.end(),
3920 back_inserter(formatsToSupport));
3921 }
3922 else
3923 {
3924 copy(fullProfile_readOrWrite.begin(),
3925 fullProfile_readOrWrite.end(),
3926 back_inserter(formatsToSupport));
3927 }
3928 }
3929 else
3930 {
3931 // Full profile, OpenCL 2.0, 2.1, 2.2.
3932 if (flags & CL_MEM_KERNEL_READ_AND_WRITE)
3933 {
3934 copy(fullProfile_readAndWrite.begin(),
3935 fullProfile_readAndWrite.end(),
3936 back_inserter(formatsToSupport));
3937 }
3938 else
3939 {
3940 copy(fullProfile_2x_readOrWrite.begin(),
3941 fullProfile_2x_readOrWrite.end(),
3942 back_inserter(formatsToSupport));
3943
3944 // Support for the CL_DEPTH image channel order is required only
3945 // for 2D images and 2D image arrays.
3946 if (image_type == CL_MEM_OBJECT_IMAGE2D
3947 || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
3948 {
3949 copy(fullProfile_2x_readOrWrite_Depth.begin(),
3950 fullProfile_2x_readOrWrite_Depth.end(),
3951 back_inserter(formatsToSupport));
3952 }
3953
3954 // Support for reading from the CL_sRGBA image channel order is
3955 // optional for 1D image buffers. Support for writing to the
3956 // CL_sRGBA image channel order is optional for all image types.
3957 if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER
3958 && flags == CL_MEM_READ_ONLY)
3959 {
3960 copy(fullProfile_2x_readOrWrite_srgb.begin(),
3961 fullProfile_2x_readOrWrite_srgb.end(),
3962 back_inserter(formatsToSupport));
3963 }
3964 }
3965 }
3966 }
3967 }
3968
is_image_format_required(cl_image_format format,cl_mem_flags flags,cl_mem_object_type image_type,cl_device_id device)3969 bool is_image_format_required(cl_image_format format, cl_mem_flags flags,
3970 cl_mem_object_type image_type,
3971 cl_device_id device)
3972 {
3973 std::vector<cl_image_format> formatsToSupport;
3974 build_required_image_formats(flags, image_type, device, formatsToSupport);
3975
3976 for (auto &formatItr : formatsToSupport)
3977 {
3978 if (formatItr.image_channel_order == format.image_channel_order
3979 && formatItr.image_channel_data_type
3980 == format.image_channel_data_type)
3981 {
3982 return true;
3983 }
3984 }
3985
3986 return false;
3987 }
3988
compute_max_mip_levels(size_t width,size_t height,size_t depth)3989 cl_uint compute_max_mip_levels(size_t width, size_t height, size_t depth)
3990 {
3991 cl_uint retMaxMipLevels = 0, max_dim = 0;
3992
3993 max_dim = width;
3994 max_dim = height > max_dim ? height : max_dim;
3995 max_dim = depth > max_dim ? depth : max_dim;
3996
3997 while (max_dim)
3998 {
3999 retMaxMipLevels++;
4000 max_dim >>= 1;
4001 }
4002 return retMaxMipLevels;
4003 }
4004
compute_mipmapped_image_size(image_descriptor imageInfo)4005 cl_ulong compute_mipmapped_image_size(image_descriptor imageInfo)
4006 {
4007 cl_ulong retSize = 0;
4008 size_t curr_width, curr_height, curr_depth, curr_array_size;
4009 curr_width = imageInfo.width;
4010 curr_height = imageInfo.height;
4011 curr_depth = imageInfo.depth;
4012 curr_array_size = imageInfo.arraySize;
4013
4014 for (int i = 0; i < (int)imageInfo.num_mip_levels; i++)
4015 {
4016 switch (imageInfo.type)
4017 {
4018 case CL_MEM_OBJECT_IMAGE3D:
4019 retSize += (cl_ulong)curr_width * curr_height * curr_depth
4020 * get_pixel_size(imageInfo.format);
4021 break;
4022 case CL_MEM_OBJECT_IMAGE2D:
4023 retSize += (cl_ulong)curr_width * curr_height
4024 * get_pixel_size(imageInfo.format);
4025 break;
4026 case CL_MEM_OBJECT_IMAGE1D:
4027 retSize +=
4028 (cl_ulong)curr_width * get_pixel_size(imageInfo.format);
4029 break;
4030 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
4031 retSize += (cl_ulong)curr_width * curr_array_size
4032 * get_pixel_size(imageInfo.format);
4033 break;
4034 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
4035 retSize += (cl_ulong)curr_width * curr_height * curr_array_size
4036 * get_pixel_size(imageInfo.format);
4037 break;
4038 }
4039
4040 switch (imageInfo.type)
4041 {
4042 case CL_MEM_OBJECT_IMAGE3D:
4043 curr_depth = curr_depth >> 1 ? curr_depth >> 1 : 1;
4044 case CL_MEM_OBJECT_IMAGE2D:
4045 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
4046 curr_height = curr_height >> 1 ? curr_height >> 1 : 1;
4047 case CL_MEM_OBJECT_IMAGE1D:
4048 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
4049 curr_width = curr_width >> 1 ? curr_width >> 1 : 1;
4050 }
4051 }
4052
4053 return retSize;
4054 }
4055
compute_mip_level_offset(image_descriptor * imageInfo,size_t lod)4056 size_t compute_mip_level_offset(image_descriptor *imageInfo, size_t lod)
4057 {
4058 size_t retOffset = 0;
4059 size_t width, height, depth;
4060 width = imageInfo->width;
4061 height = imageInfo->height;
4062 depth = imageInfo->depth;
4063
4064 for (size_t i = 0; i < lod; i++)
4065 {
4066 switch (imageInfo->type)
4067 {
4068 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
4069 retOffset += (size_t)width * height * imageInfo->arraySize
4070 * get_pixel_size(imageInfo->format);
4071 break;
4072 case CL_MEM_OBJECT_IMAGE3D:
4073 retOffset += (size_t)width * height * depth
4074 * get_pixel_size(imageInfo->format);
4075 break;
4076 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
4077 retOffset += (size_t)width * imageInfo->arraySize
4078 * get_pixel_size(imageInfo->format);
4079 break;
4080 case CL_MEM_OBJECT_IMAGE2D:
4081 retOffset +=
4082 (size_t)width * height * get_pixel_size(imageInfo->format);
4083 break;
4084 case CL_MEM_OBJECT_IMAGE1D:
4085 retOffset += (size_t)width * get_pixel_size(imageInfo->format);
4086 break;
4087 }
4088
4089 // Compute next lod dimensions
4090 switch (imageInfo->type)
4091 {
4092 case CL_MEM_OBJECT_IMAGE3D: depth = (depth >> 1) ? (depth >> 1) : 1;
4093 case CL_MEM_OBJECT_IMAGE2D:
4094 case CL_MEM_OBJECT_IMAGE2D_ARRAY:
4095 height = (height >> 1) ? (height >> 1) : 1;
4096 case CL_MEM_OBJECT_IMAGE1D_ARRAY:
4097 case CL_MEM_OBJECT_IMAGE1D: width = (width >> 1) ? (width >> 1) : 1;
4098 }
4099 }
4100 return retOffset;
4101 }
4102
convert_image_type_to_string(cl_mem_object_type image_type)4103 const char *convert_image_type_to_string(cl_mem_object_type image_type)
4104 {
4105 switch (image_type)
4106 {
4107 case CL_MEM_OBJECT_IMAGE1D: return "1D";
4108 case CL_MEM_OBJECT_IMAGE2D: return "2D";
4109 case CL_MEM_OBJECT_IMAGE3D: return "3D";
4110 case CL_MEM_OBJECT_IMAGE1D_ARRAY: return "1D array";
4111 case CL_MEM_OBJECT_IMAGE2D_ARRAY: return "2D array";
4112 case CL_MEM_OBJECT_IMAGE1D_BUFFER: return "1D image buffer";
4113 default: return "unrecognized object type";
4114 }
4115 }
4116