1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2023 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17
18 #if !defined(ASTCENC_DECOMPRESS_ONLY)
19
20 /**
21 * @brief Functions for color quantization.
22 *
23 * The design of the color quantization functionality requires the caller to use higher level error
24 * analysis to determine the base encoding that should be used. This earlier analysis will select
25 * the basic type of the endpoint that should be used:
26 *
27 * * Mode: LDR or HDR
28 * * Quantization level
29 * * Channel count: L, LA, RGB, or RGBA
30 * * Endpoint 2 type: Direct color endcode, or scaled from endpoint 1.
31 *
32 * However, this leaves a number of decisions about exactly how to pack the endpoints open. In
33 * particular we need to determine if blue contraction can be used, or/and if delta encoding can be
34 * used. If they can be applied these will allow us to maintain higher precision in the endpoints
35 * without needing additional storage.
36 */
37
38 #include <stdio.h>
39 #include <assert.h>
40
41 #include "astcenc_internal.h"
42
43 /**
44 * @brief Compute the error of an LDR RGB or RGBA encoding.
45 *
46 * @param uquant0 The original endpoint 0 color.
47 * @param uquant1 The original endpoint 1 color.
48 * @param quant0 The unpacked quantized endpoint 0 color.
49 * @param quant1 The unpacked quantized endpoint 1 color.
50 *
51 * @return The MSE of the encoding.
52 */
get_rgba_encoding_error(vfloat4 uquant0,vfloat4 uquant1,vint4 quant0,vint4 quant1)53 static float get_rgba_encoding_error(
54 vfloat4 uquant0,
55 vfloat4 uquant1,
56 vint4 quant0,
57 vint4 quant1
58 ) {
59 vfloat4 error0 = uquant0 - int_to_float(quant0);
60 vfloat4 error1 = uquant1 - int_to_float(quant1);
61 return hadd_s(error0 * error0 + error1 * error1);
62 }
63
64 /**
65 * @brief Determine the quantized value given a quantization level.
66 *
67 * @param quant_level The quantization level to use.
68 * @param value The value to convert. This must be in the 0-255 range.
69 *
70 * @return The unpacked quantized value, returned in 0-255 range.
71 */
quant_color(quant_method quant_level,int value)72 static inline uint8_t quant_color(
73 quant_method quant_level,
74 int value
75 ) {
76 int index = value * 2 + 1;
77 return color_unquant_to_uquant_tables[quant_level - QUANT_6][index];
78 }
79
80 /**
81 * @brief Determine the quantized value given a quantization level.
82 *
83 * @param quant_level The quantization level to use.
84 * @param value The value to convert. This must be in the 0-255 range.
85 *
86 * @return The unpacked quantized value, returned in 0-255 range.
87 */
quant_color3(quant_method quant_level,vint4 value)88 static inline vint4 quant_color3(
89 quant_method quant_level,
90 vint4 value
91 ) {
92 vint4 index = value * 2 + 1;
93 return vint4(
94 color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<0>()],
95 color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<1>()],
96 color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<2>()],
97 0);
98 }
99
100 /**
101 * @brief Determine the quantized value given a quantization level and residual.
102 *
103 * @param quant_level The quantization level to use.
104 * @param value The value to convert. This must be in the 0-255 range.
105 * @param valuef The original value before rounding, used to compute a residual.
106 *
107 * @return The unpacked quantized value, returned in 0-255 range.
108 */
quant_color(quant_method quant_level,int value,float valuef)109 static inline uint8_t quant_color(
110 quant_method quant_level,
111 int value,
112 float valuef
113 ) {
114 int index = value * 2;
115
116 // Compute the residual to determine if we should round down or up ties.
117 // Test should be residual >= 0, but empirical testing shows small bias helps.
118 float residual = valuef - static_cast<float>(value);
119 if (residual >= -0.1f)
120 {
121 index++;
122 }
123
124 return color_unquant_to_uquant_tables[quant_level - QUANT_6][index];
125 }
126
127 /**
128 * @brief Determine the quantized value given a quantization level and residual.
129 *
130 * @param quant_level The quantization level to use.
131 * @param value The value to convert. This must be in the 0-255 range.
132 * @param valuef The original value before rounding, used to compute a residual.
133 *
134 * @return The unpacked quantized value, returned in 0-255 range.
135 */
quant_color3(quant_method quant_level,vint4 value,vfloat4 valuef)136 static inline vint4 quant_color3(
137 quant_method quant_level,
138 vint4 value,
139 vfloat4 valuef
140 ) {
141 vint4 index = value * 2;
142
143 // Compute the residual to determine if we should round down or up ties.
144 // Test should be residual >= 0, but empirical testing shows small bias helps.
145 vfloat4 residual = valuef - int_to_float(value);
146 vmask4 mask = residual >= vfloat4(-0.1f);
147 index = select(index, index + 1, mask);
148
149 return vint4(
150 color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<0>()],
151 color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<1>()],
152 color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<2>()],
153 0);
154 }
155
156 /**
157 * @brief Quantize an LDR RGB color.
158 *
159 * Since this is a fall-back encoding, we cannot actually fail but must produce a sensible result.
160 * For this encoding @c color0 cannot be larger than @c color1. If @c color0 is actually larger
161 * than @c color1, @c color0 is reduced and @c color1 is increased until the constraint is met.
162 *
163 * @param color0 The input unquantized color0 endpoint.
164 * @param color1 The input unquantized color1 endpoint.
165 * @param[out] color0_out The output quantized color0 endpoint.
166 * @param[out] color1_out The output quantized color1 endpoint.
167 * @param quant_level The quantization level to use.
168 */
quantize_rgb(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)169 static void quantize_rgb(
170 vfloat4 color0,
171 vfloat4 color1,
172 vint4& color0_out,
173 vint4& color1_out,
174 quant_method quant_level
175 ) {
176 vint4 color0i, color1i;
177 vfloat4 nudge(0.2f);
178
179 do
180 {
181 vint4 color0q = max(float_to_int_rtn(color0), vint4(0));
182 color0i = quant_color3(quant_level, color0q, color0);
183 color0 = color0 - nudge;
184
185 vint4 color1q = min(float_to_int_rtn(color1), vint4(255));
186 color1i = quant_color3(quant_level, color1q, color1);
187 color1 = color1 + nudge;
188 } while (hadd_rgb_s(color0i) > hadd_rgb_s(color1i));
189
190 color0_out = color0i;
191 color1_out = color1i;
192 }
193
194 /**
195 * @brief Quantize an LDR RGBA color.
196 *
197 * Since this is a fall-back encoding, we cannot actually fail but must produce a sensible result.
198 * For this encoding @c color0.rgb cannot be larger than @c color1.rgb (this indicates blue
199 * contraction). If @c color0.rgb is actually larger than @c color1.rgb, @c color0.rgb is reduced
200 * and @c color1.rgb is increased until the constraint is met.
201 *
202 * @param color0 The input unquantized color0 endpoint.
203 * @param color1 The input unquantized color1 endpoint.
204 * @param[out] color0_out The output quantized color0 endpoint.
205 * @param[out] color1_out The output quantized color1 endpoint.
206 * @param quant_level The quantization level to use.
207 */
quantize_rgba(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)208 static void quantize_rgba(
209 vfloat4 color0,
210 vfloat4 color1,
211 vint4& color0_out,
212 vint4& color1_out,
213 quant_method quant_level
214 ) {
215 quantize_rgb(color0, color1, color0_out, color1_out, quant_level);
216
217 float a0 = color0.lane<3>();
218 float a1 = color1.lane<3>();
219
220 color0_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a0), a0));
221 color1_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a1), a1));
222 }
223
224 /**
225 * @brief Try to quantize an LDR RGB color using blue-contraction.
226 *
227 * Blue-contraction is only usable if encoded color 1 is larger than color 0.
228 *
229 * @param color0 The input unquantized color0 endpoint.
230 * @param color1 The input unquantized color1 endpoint.
231 * @param[out] color0_out The output quantized color0 endpoint.
232 * @param[out] color1_out The output quantized color1 endpoint.
233 * @param quant_level The quantization level to use.
234 *
235 * @return Returns @c false on failure, @c true on success.
236 */
try_quantize_rgb_blue_contract(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)237 static bool try_quantize_rgb_blue_contract(
238 vfloat4 color0,
239 vfloat4 color1,
240 vint4& color0_out,
241 vint4& color1_out,
242 quant_method quant_level
243 ) {
244 // Apply inverse blue-contraction
245 color0 += color0 - color0.swz<2, 2, 2, 3>();
246 color1 += color1 - color1.swz<2, 2, 2, 3>();
247
248 // If anything overflows BC cannot be used
249 vmask4 color0_error = (color0 < vfloat4(0.0f)) | (color0 > vfloat4(255.0f));
250 vmask4 color1_error = (color1 < vfloat4(0.0f)) | (color1 > vfloat4(255.0f));
251 if (any(color0_error | color1_error))
252 {
253 return false;
254 }
255
256 // Quantize the inverse blue-contracted color
257 vint4 color0i = quant_color3(quant_level, float_to_int_rtn(color0), color0);
258 vint4 color1i = quant_color3(quant_level, float_to_int_rtn(color1), color1);
259
260 // If color #1 is not larger than color #0 then blue-contraction cannot be used
261 // We must test afterwards because quantization can change the order
262 if (hadd_rgb_s(color1i) <= hadd_rgb_s(color0i))
263 {
264 return false;
265 }
266
267 color0_out = color1i;
268 color1_out = color0i;
269 return true;
270 }
271
272 /**
273 * @brief Try to quantize an LDR RGBA color using blue-contraction.
274 *
275 * Blue-contraction is only usable if encoded color 1 RGB is larger than color 0 RGB.
276 *
277 * @param color0 The input unquantized color0 endpoint.
278 * @param color1 The input unquantized color1 endpoint.
279 * @param[out] color0_out The output quantized color0 endpoint.
280 * @param[out] color1_out The output quantized color1 endpoint.
281 * @param quant_level The quantization level to use.
282 *
283 * @return Returns @c false on failure, @c true on success.
284 */
try_quantize_rgba_blue_contract(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)285 static bool try_quantize_rgba_blue_contract(
286 vfloat4 color0,
287 vfloat4 color1,
288 vint4& color0_out,
289 vint4& color1_out,
290 quant_method quant_level
291 ) {
292 if (try_quantize_rgb_blue_contract(color0, color1, color0_out, color1_out, quant_level))
293 {
294 float a0 = color0.lane<3>();
295 float a1 = color1.lane<3>();
296
297 color0_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a1), a1));
298 color1_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a0), a0));
299
300 return true;
301 }
302
303 return false;
304 }
305
306 /**
307 * @brief Try to quantize an LDR RGB color using delta encoding.
308 *
309 * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
310 * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
311 * non-negative, then we encode a regular delta.
312 *
313 * @param color0 The input unquantized color0 endpoint.
314 * @param color1 The input unquantized color1 endpoint.
315 * @param[out] color0_out The output quantized color0 endpoint.
316 * @param[out] color1_out The output quantized color1 endpoint.
317 * @param quant_level The quantization level to use.
318 *
319 * @return Returns @c false on failure, @c true on success.
320 */
try_quantize_rgb_delta(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)321 static bool try_quantize_rgb_delta(
322 vfloat4 color0,
323 vfloat4 color1,
324 vint4& color0_out,
325 vint4& color1_out,
326 quant_method quant_level
327 ) {
328 // Transform color0 to unorm9
329 vint4 color0a = float_to_int_rtn(color0);
330 color0.set_lane<3>(0.0f);
331 color0a = lsl<1>(color0a);
332
333 // Mask off the top bit
334 vint4 color0b = color0a & 0xFF;
335
336 // Quantize then unquantize in order to get a value that we take differences against
337 vint4 color0be = quant_color3(quant_level, color0b);
338 color0b = color0be | (color0a & 0x100);
339
340 // Get hold of the second value
341 vint4 color1d = float_to_int_rtn(color1);
342 color1d = lsl<1>(color1d);
343
344 // ... and take differences
345 color1d = color1d - color0b;
346 color1d.set_lane<3>(0);
347
348 // Check if the difference is too large to be encodable
349 if (any((color1d > vint4(63)) | (color1d < vint4(-64))))
350 {
351 return false;
352 }
353
354 // Insert top bit of the base into the offset
355 color1d = color1d & 0x7F;
356 color1d = color1d | lsr<1>(color0b & 0x100);
357
358 // Then quantize and unquantize; if this causes either top two bits to flip, then encoding fails
359 // since we have then corrupted either the top bit of the base or the sign bit of the offset
360 vint4 color1de = quant_color3(quant_level, color1d);
361
362 vint4 color_flips = (color1d ^ color1de) & 0xC0;
363 color_flips.set_lane<3>(0);
364 if (any(color_flips != vint4::zero()))
365 {
366 return false;
367 }
368
369 // If the sum of offsets triggers blue-contraction then encoding fails
370 vint4 ep0 = color0be;
371 vint4 ep1 = color1de;
372 bit_transfer_signed(ep1, ep0);
373 if (hadd_rgb_s(ep1) < 0)
374 {
375 return false;
376 }
377
378 // Check that the offsets produce legitimate sums as well
379 ep0 = ep0 + ep1;
380 if (any((ep0 < vint4(0)) | (ep0 > vint4(0xFF))))
381 {
382 return false;
383 }
384
385 color0_out = color0be;
386 color1_out = color1de;
387 return true;
388 }
389
390 /**
391 * @brief Try to quantize an LDR RGB color using delta encoding and blue-contraction.
392 *
393 * Blue-contraction is only usable if encoded color 1 RGB is larger than color 0 RGB.
394 *
395 * @param color0 The input unquantized color0 endpoint.
396 * @param color1 The input unquantized color1 endpoint.
397 * @param[out] color0_out The output quantized color0 endpoint.
398 * @param[out] color1_out The output quantized color1 endpoint.
399 * @param quant_level The quantization level to use.
400 *
401 * @return Returns @c false on failure, @c true on success.
402 */
try_quantize_rgb_delta_blue_contract(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)403 static bool try_quantize_rgb_delta_blue_contract(
404 vfloat4 color0,
405 vfloat4 color1,
406 vint4& color0_out,
407 vint4& color1_out,
408 quant_method quant_level
409 ) {
410 // Note: Switch around endpoint colors already at start
411 std::swap(color0, color1);
412
413 // Apply inverse blue-contraction
414 color0 += color0 - color0.swz<2, 2, 2, 3>();
415 color1 += color1 - color1.swz<2, 2, 2, 3>();
416
417 // If anything overflows BC cannot be used
418 vmask4 color0_error = (color0 < vfloat4(0.0f)) | (color0 > vfloat4(255.0f));
419 vmask4 color1_error = (color1 < vfloat4(0.0f)) | (color1 > vfloat4(255.0f));
420 if (any(color0_error | color1_error))
421 {
422 return false;
423 }
424
425 // Transform color0 to unorm9
426 vint4 color0a = float_to_int_rtn(color0);
427 color0.set_lane<3>(0.0f);
428 color0a = lsl<1>(color0a);
429
430 // Mask off the top bit
431 vint4 color0b = color0a & 0xFF;
432
433 // Quantize then unquantize in order to get a value that we take differences against
434 vint4 color0be = quant_color3(quant_level, color0b);
435 color0b = color0be | (color0a & 0x100);
436
437 // Get hold of the second value
438 vint4 color1d = float_to_int_rtn(color1);
439 color1d = lsl<1>(color1d);
440
441 // ... and take differences
442 color1d = color1d - color0b;
443 color1d.set_lane<3>(0);
444
445 // Check if the difference is too large to be encodable
446 if (any((color1d > vint4(63)) | (color1d < vint4(-64))))
447 {
448 return false;
449 }
450
451 // Insert top bit of the base into the offset
452 color1d = color1d & 0x7F;
453 color1d = color1d | lsr<1>(color0b & 0x100);
454
455 // Then quantize and unquantize; if this causes either top two bits to flip, then encoding fails
456 // since we have then corrupted either the top bit of the base or the sign bit of the offset
457 vint4 color1de = quant_color3(quant_level, color1d);
458
459 vint4 color_flips = (color1d ^ color1de) & 0xC0;
460 color_flips.set_lane<3>(0);
461 if (any(color_flips != vint4::zero()))
462 {
463 return false;
464 }
465
466 // If the sum of offsets does not trigger blue-contraction then encoding fails
467 vint4 ep0 = color0be;
468 vint4 ep1 = color1de;
469 bit_transfer_signed(ep1, ep0);
470 if (hadd_rgb_s(ep1) >= 0)
471 {
472 return false;
473 }
474
475 // Check that the offsets produce legitimate sums as well
476 ep0 = ep0 + ep1;
477 if (any((ep0 < vint4(0)) | (ep0 > vint4(0xFF))))
478 {
479 return false;
480 }
481
482 color0_out = color0be;
483 color1_out = color1de;
484 return true;
485 }
486
487 /**
488 * @brief Try to quantize an LDR A color using delta encoding.
489 *
490 * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
491 * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
492 * non-negative, then we encode a regular delta.
493 *
494 * This function only compressed the alpha - the other elements in the output array are not touched.
495 *
496 * @param color0 The input unquantized color0 endpoint.
497 * @param color1 The input unquantized color1 endpoint.
498 * @param[out] color0_out The output quantized color0 endpoint; must preserve lane 0/1/2.
499 * @param[out] color1_out The output quantized color1 endpoint; must preserve lane 0/1/2.
500 * @param quant_level The quantization level to use.
501 *
502 * @return Returns @c false on failure, @c true on success.
503 */
try_quantize_alpha_delta(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)504 static bool try_quantize_alpha_delta(
505 vfloat4 color0,
506 vfloat4 color1,
507 vint4& color0_out,
508 vint4& color1_out,
509 quant_method quant_level
510 ) {
511 float a0 = color0.lane<3>();
512 float a1 = color1.lane<3>();
513
514 int a0a = astc::flt2int_rtn(a0);
515 a0a <<= 1;
516 int a0b = a0a & 0xFF;
517 int a0be = quant_color(quant_level, a0b);
518 a0b = a0be;
519 a0b |= a0a & 0x100;
520 int a1d = astc::flt2int_rtn(a1);
521 a1d <<= 1;
522 a1d -= a0b;
523
524 if (a1d > 63 || a1d < -64)
525 {
526 return false;
527 }
528
529 a1d &= 0x7F;
530 a1d |= (a0b & 0x100) >> 1;
531
532 int a1de = quant_color(quant_level, a1d);
533 int a1du = a1de;
534 if ((a1d ^ a1du) & 0xC0)
535 {
536 return false;
537 }
538
539 a1du &= 0x7F;
540 if (a1du & 0x40)
541 {
542 a1du -= 0x80;
543 }
544
545 a1du += a0b;
546 if (a1du < 0 || a1du > 0x1FF)
547 {
548 return false;
549 }
550
551 color0_out.set_lane<3>(a0be);
552 color1_out.set_lane<3>(a1de);
553
554 return true;
555 }
556
557 /**
558 * @brief Try to quantize an LDR LA color using delta encoding.
559 *
560 * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
561 * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
562 * non-negative, then we encode a regular delta.
563 *
564 * This function only compressed the alpha - the other elements in the output array are not touched.
565 *
566 * @param color0 The input unquantized color0 endpoint.
567 * @param color1 The input unquantized color1 endpoint.
568 * @param[out] output The output endpoints, returned as (l0, l1, a0, a1).
569 * @param quant_level The quantization level to use.
570 *
571 * @return Returns @c false on failure, @c true on success.
572 */
try_quantize_luminance_alpha_delta(vfloat4 color0,vfloat4 color1,uint8_t output[4],quant_method quant_level)573 static bool try_quantize_luminance_alpha_delta(
574 vfloat4 color0,
575 vfloat4 color1,
576 uint8_t output[4],
577 quant_method quant_level
578 ) {
579 float l0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
580 float l1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
581
582 float a0 = color0.lane<3>();
583 float a1 = color1.lane<3>();
584
585 int l0a = astc::flt2int_rtn(l0);
586 int a0a = astc::flt2int_rtn(a0);
587 l0a <<= 1;
588 a0a <<= 1;
589
590 int l0b = l0a & 0xFF;
591 int a0b = a0a & 0xFF;
592 int l0be = quant_color(quant_level, l0b);
593 int a0be = quant_color(quant_level, a0b);
594 l0b = l0be;
595 a0b = a0be;
596 l0b |= l0a & 0x100;
597 a0b |= a0a & 0x100;
598
599 int l1d = astc::flt2int_rtn(l1);
600 int a1d = astc::flt2int_rtn(a1);
601 l1d <<= 1;
602 a1d <<= 1;
603 l1d -= l0b;
604 a1d -= a0b;
605
606 if (l1d > 63 || l1d < -64)
607 {
608 return false;
609 }
610
611 if (a1d > 63 || a1d < -64)
612 {
613 return false;
614 }
615
616 l1d &= 0x7F;
617 a1d &= 0x7F;
618 l1d |= (l0b & 0x100) >> 1;
619 a1d |= (a0b & 0x100) >> 1;
620
621 int l1de = quant_color(quant_level, l1d);
622 int a1de = quant_color(quant_level, a1d);
623 int l1du = l1de;
624 int a1du = a1de;
625
626 if ((l1d ^ l1du) & 0xC0)
627 {
628 return false;
629 }
630
631 if ((a1d ^ a1du) & 0xC0)
632 {
633 return false;
634 }
635
636 l1du &= 0x7F;
637 a1du &= 0x7F;
638
639 if (l1du & 0x40)
640 {
641 l1du -= 0x80;
642 }
643
644 if (a1du & 0x40)
645 {
646 a1du -= 0x80;
647 }
648
649 l1du += l0b;
650 a1du += a0b;
651
652 if (l1du < 0 || l1du > 0x1FF)
653 {
654 return false;
655 }
656
657 if (a1du < 0 || a1du > 0x1FF)
658 {
659 return false;
660 }
661
662 output[0] = static_cast<uint8_t>(l0be);
663 output[1] = static_cast<uint8_t>(l1de);
664 output[2] = static_cast<uint8_t>(a0be);
665 output[3] = static_cast<uint8_t>(a1de);
666
667 return true;
668 }
669
670 /**
671 * @brief Try to quantize an LDR RGBA color using delta encoding.
672 *
673 * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
674 * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
675 * non-negative, then we encode a regular delta.
676 *
677 * This function only compressed the alpha - the other elements in the output array are not touched.
678 *
679 * @param color0 The input unquantized color0 endpoint.
680 * @param color1 The input unquantized color1 endpoint.
681 * @param[out] color0_out The output quantized color0 endpoint
682 * @param[out] color1_out The output quantized color1 endpoint
683 * @param quant_level The quantization level to use.
684 *
685 * @return Returns @c false on failure, @c true on success.
686 */
try_quantize_rgba_delta(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)687 static bool try_quantize_rgba_delta(
688 vfloat4 color0,
689 vfloat4 color1,
690 vint4& color0_out,
691 vint4& color1_out,
692 quant_method quant_level
693 ) {
694 return try_quantize_rgb_delta(color0, color1, color0_out, color1_out, quant_level) &&
695 try_quantize_alpha_delta(color0, color1, color0_out, color1_out, quant_level);
696 }
697
698 /**
699 * @brief Try to quantize an LDR RGBA color using delta and blue contract encoding.
700 *
701 * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
702 * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
703 * non-negative, then we encode a regular delta.
704 *
705 * This function only compressed the alpha - the other elements in the output array are not touched.
706 *
707 * @param color0 The input unquantized color0 endpoint.
708 * @param color1 The input unquantized color1 endpoint.
709 * @param[out] color0_out The output quantized color0 endpoint
710 * @param[out] color1_out The output quantized color1 endpoint
711 * @param quant_level The quantization level to use.
712 *
713 * @return Returns @c false on failure, @c true on success.
714 */
try_quantize_rgba_delta_blue_contract(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)715 static bool try_quantize_rgba_delta_blue_contract(
716 vfloat4 color0,
717 vfloat4 color1,
718 vint4& color0_out,
719 vint4& color1_out,
720 quant_method quant_level
721 ) {
722 // Note that we swap the color0 and color1 ordering for alpha to match RGB blue-contract
723 return try_quantize_rgb_delta_blue_contract(color0, color1, color0_out, color1_out, quant_level) &&
724 try_quantize_alpha_delta(color1, color0, color0_out, color1_out, quant_level);
725 }
726
727 /**
728 * @brief Quantize an LDR RGB color using scale encoding.
729 *
730 * @param color The input unquantized color endpoint and scale factor.
731 * @param[out] output The output endpoints, returned as (r0, g0, b0, s).
732 * @param quant_level The quantization level to use.
733 */
quantize_rgbs(vfloat4 color,uint8_t output[4],quant_method quant_level)734 static void quantize_rgbs(
735 vfloat4 color,
736 uint8_t output[4],
737 quant_method quant_level
738 ) {
739 float scale = 1.0f / 257.0f;
740
741 float r = astc::clamp255f(color.lane<0>() * scale);
742 float g = astc::clamp255f(color.lane<1>() * scale);
743 float b = astc::clamp255f(color.lane<2>() * scale);
744
745 int ri = quant_color(quant_level, astc::flt2int_rtn(r), r);
746 int gi = quant_color(quant_level, astc::flt2int_rtn(g), g);
747 int bi = quant_color(quant_level, astc::flt2int_rtn(b), b);
748
749 float oldcolorsum = hadd_rgb_s(color) * scale;
750 float newcolorsum = static_cast<float>(ri + gi + bi);
751
752 float scalea = astc::clamp1f(color.lane<3>() * (oldcolorsum + 1e-10f) / (newcolorsum + 1e-10f));
753 int scale_idx = astc::flt2int_rtn(scalea * 256.0f);
754 scale_idx = astc::clamp(scale_idx, 0, 255);
755
756 output[0] = static_cast<uint8_t>(ri);
757 output[1] = static_cast<uint8_t>(gi);
758 output[2] = static_cast<uint8_t>(bi);
759 output[3] = quant_color(quant_level, scale_idx);
760 }
761
762 /**
763 * @brief Quantize an LDR RGBA color using scale encoding.
764 *
765 * @param color0 The input unquantized color0 alpha endpoint.
766 * @param color1 The input unquantized color1 alpha endpoint.
767 * @param color The input unquantized color endpoint and scale factor.
768 * @param[out] output The output endpoints, returned as (r0, g0, b0, s, a0, a1).
769 * @param quant_level The quantization level to use.
770 */
quantize_rgbs_alpha(vfloat4 color0,vfloat4 color1,vfloat4 color,uint8_t output[6],quant_method quant_level)771 static void quantize_rgbs_alpha(
772 vfloat4 color0,
773 vfloat4 color1,
774 vfloat4 color,
775 uint8_t output[6],
776 quant_method quant_level
777 ) {
778 float a0 = color0.lane<3>();
779 float a1 = color1.lane<3>();
780
781 output[4] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
782 output[5] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
783
784 quantize_rgbs(color, output, quant_level);
785 }
786
787 /**
788 * @brief Quantize a LDR L color.
789 *
790 * @param color0 The input unquantized color0 endpoint.
791 * @param color1 The input unquantized color1 endpoint.
792 * @param[out] output The output endpoints, returned as (l0, l1).
793 * @param quant_level The quantization level to use.
794 */
quantize_luminance(vfloat4 color0,vfloat4 color1,uint8_t output[2],quant_method quant_level)795 static void quantize_luminance(
796 vfloat4 color0,
797 vfloat4 color1,
798 uint8_t output[2],
799 quant_method quant_level
800 ) {
801 float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
802 float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
803
804 if (lum0 > lum1)
805 {
806 float avg = (lum0 + lum1) * 0.5f;
807 lum0 = avg;
808 lum1 = avg;
809 }
810
811 output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0), lum0);
812 output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1), lum1);
813 }
814
815 /**
816 * @brief Quantize a LDR LA color.
817 *
818 * @param color0 The input unquantized color0 endpoint.
819 * @param color1 The input unquantized color1 endpoint.
820 * @param[out] output The output endpoints, returned as (l0, l1, a0, a1).
821 * @param quant_level The quantization level to use.
822 */
quantize_luminance_alpha(vfloat4 color0,vfloat4 color1,uint8_t output[4],quant_method quant_level)823 static void quantize_luminance_alpha(
824 vfloat4 color0,
825 vfloat4 color1,
826 uint8_t output[4],
827 quant_method quant_level
828 ) {
829 float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
830 float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
831
832 float a0 = color0.lane<3>();
833 float a1 = color1.lane<3>();
834
835 output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0), lum0);
836 output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1), lum1);
837 output[2] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
838 output[3] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
839 }
840
841 /**
842 * @brief Quantize and unquantize a value ensuring top two bits are the same.
843 *
844 * @param quant_level The quantization level to use.
845 * @param value The input unquantized value.
846 * @param[out] quant_value The quantized value.
847 */
quantize_and_unquantize_retain_top_two_bits(quant_method quant_level,uint8_t value,uint8_t & quant_value)848 static inline void quantize_and_unquantize_retain_top_two_bits(
849 quant_method quant_level,
850 uint8_t value,
851 uint8_t& quant_value
852 ) {
853 int perform_loop;
854 uint8_t quantval;
855
856 do
857 {
858 quantval = quant_color(quant_level, value);
859
860 // Perform looping if the top two bits were modified by quant/unquant
861 perform_loop = (value & 0xC0) != (quantval & 0xC0);
862
863 if ((quantval & 0xC0) > (value & 0xC0))
864 {
865 // Quant/unquant rounded UP so that the top two bits changed;
866 // decrement the input in hopes that this will avoid rounding up.
867 value--;
868 }
869 else if ((quantval & 0xC0) < (value & 0xC0))
870 {
871 // Quant/unquant rounded DOWN so that the top two bits changed;
872 // decrement the input in hopes that this will avoid rounding down.
873 value--;
874 }
875 } while (perform_loop);
876
877 quant_value = quantval;
878 }
879
880 /**
881 * @brief Quantize and unquantize a value ensuring top four bits are the same.
882 *
883 * @param quant_level The quantization level to use.
884 * @param value The input unquantized value.
885 * @param[out] quant_value The quantized value in 0-255 range.
886 */
quantize_and_unquantize_retain_top_four_bits(quant_method quant_level,uint8_t value,uint8_t & quant_value)887 static inline void quantize_and_unquantize_retain_top_four_bits(
888 quant_method quant_level,
889 uint8_t value,
890 uint8_t& quant_value
891 ) {
892 uint8_t perform_loop;
893 uint8_t quantval;
894
895 do
896 {
897 quantval = quant_color(quant_level, value);
898 // Perform looping if the top four bits were modified by quant/unquant
899 perform_loop = (value & 0xF0) != (quantval & 0xF0);
900
901 if ((quantval & 0xF0) > (value & 0xF0))
902 {
903 // Quant/unquant rounded UP so that the top four bits changed;
904 // decrement the input value in hopes that this will avoid rounding up.
905 value--;
906 }
907 else if ((quantval & 0xF0) < (value & 0xF0))
908 {
909 // Quant/unquant rounded DOWN so that the top four bits changed;
910 // decrement the input value in hopes that this will avoid rounding down.
911 value--;
912 }
913 } while (perform_loop);
914
915 quant_value = quantval;
916 }
917
918 /**
919 * @brief Quantize a HDR RGB color using RGB + offset.
920 *
921 * @param color The input unquantized color endpoint and offset.
922 * @param[out] output The output endpoints, returned as packed RGBS with some mode bits.
923 * @param quant_level The quantization level to use.
924 */
quantize_hdr_rgbo(vfloat4 color,uint8_t output[4],quant_method quant_level)925 static void quantize_hdr_rgbo(
926 vfloat4 color,
927 uint8_t output[4],
928 quant_method quant_level
929 ) {
930 color.set_lane<0>(color.lane<0>() + color.lane<3>());
931 color.set_lane<1>(color.lane<1>() + color.lane<3>());
932 color.set_lane<2>(color.lane<2>() + color.lane<3>());
933
934 color = clamp(0.0f, 65535.0f, color);
935
936 vfloat4 color_bak = color;
937
938 int majcomp;
939 if (color.lane<0>() > color.lane<1>() && color.lane<0>() > color.lane<2>())
940 {
941 majcomp = 0; // red is largest component
942 }
943 else if (color.lane<1>() > color.lane<2>())
944 {
945 majcomp = 1; // green is largest component
946 }
947 else
948 {
949 majcomp = 2; // blue is largest component
950 }
951
952 // swap around the red component and the largest component.
953 switch (majcomp)
954 {
955 case 1:
956 color = color.swz<1, 0, 2, 3>();
957 break;
958 case 2:
959 color = color.swz<2, 1, 0, 3>();
960 break;
961 default:
962 break;
963 }
964
965 static const int mode_bits[5][3] {
966 {11, 5, 7},
967 {11, 6, 5},
968 {10, 5, 8},
969 {9, 6, 7},
970 {8, 7, 6}
971 };
972
973 static const float mode_cutoffs[5][2] {
974 {1024, 4096},
975 {2048, 1024},
976 {2048, 16384},
977 {8192, 16384},
978 {32768, 16384}
979 };
980
981 static const float mode_rscales[5] {
982 32.0f,
983 32.0f,
984 64.0f,
985 128.0f,
986 256.0f,
987 };
988
989 static const float mode_scales[5] {
990 1.0f / 32.0f,
991 1.0f / 32.0f,
992 1.0f / 64.0f,
993 1.0f / 128.0f,
994 1.0f / 256.0f,
995 };
996
997 float r_base = color.lane<0>();
998 float g_base = color.lane<0>() - color.lane<1>() ;
999 float b_base = color.lane<0>() - color.lane<2>() ;
1000 float s_base = color.lane<3>() ;
1001
1002 for (int mode = 0; mode < 5; mode++)
1003 {
1004 if (g_base > mode_cutoffs[mode][0] || b_base > mode_cutoffs[mode][0] || s_base > mode_cutoffs[mode][1])
1005 {
1006 continue;
1007 }
1008
1009 // Encode the mode into a 4-bit vector
1010 int mode_enc = mode < 4 ? (mode | (majcomp << 2)) : (majcomp | 0xC);
1011
1012 float mode_scale = mode_scales[mode];
1013 float mode_rscale = mode_rscales[mode];
1014
1015 int gb_intcutoff = 1 << mode_bits[mode][1];
1016 int s_intcutoff = 1 << mode_bits[mode][2];
1017
1018 // Quantize and unquantize R
1019 int r_intval = astc::flt2int_rtn(r_base * mode_scale);
1020
1021 int r_lowbits = r_intval & 0x3f;
1022
1023 r_lowbits |= (mode_enc & 3) << 6;
1024
1025 uint8_t r_quantval;
1026 quantize_and_unquantize_retain_top_two_bits(
1027 quant_level, static_cast<uint8_t>(r_lowbits), r_quantval);
1028
1029 r_intval = (r_intval & ~0x3f) | (r_quantval & 0x3f);
1030 float r_fval = static_cast<float>(r_intval) * mode_rscale;
1031
1032 // Recompute G and B, then quantize and unquantize them
1033 float g_fval = r_fval - color.lane<1>() ;
1034 float b_fval = r_fval - color.lane<2>() ;
1035
1036 g_fval = astc::clamp(g_fval, 0.0f, 65535.0f);
1037 b_fval = astc::clamp(b_fval, 0.0f, 65535.0f);
1038
1039 int g_intval = astc::flt2int_rtn(g_fval * mode_scale);
1040 int b_intval = astc::flt2int_rtn(b_fval * mode_scale);
1041
1042 if (g_intval >= gb_intcutoff || b_intval >= gb_intcutoff)
1043 {
1044 continue;
1045 }
1046
1047 int g_lowbits = g_intval & 0x1f;
1048 int b_lowbits = b_intval & 0x1f;
1049
1050 int bit0 = 0;
1051 int bit1 = 0;
1052 int bit2 = 0;
1053 int bit3 = 0;
1054
1055 switch (mode)
1056 {
1057 case 0:
1058 case 2:
1059 bit0 = (r_intval >> 9) & 1;
1060 break;
1061 case 1:
1062 case 3:
1063 bit0 = (r_intval >> 8) & 1;
1064 break;
1065 case 4:
1066 case 5:
1067 bit0 = (g_intval >> 6) & 1;
1068 break;
1069 }
1070
1071 switch (mode)
1072 {
1073 case 0:
1074 case 1:
1075 case 2:
1076 case 3:
1077 bit2 = (r_intval >> 7) & 1;
1078 break;
1079 case 4:
1080 case 5:
1081 bit2 = (b_intval >> 6) & 1;
1082 break;
1083 }
1084
1085 switch (mode)
1086 {
1087 case 0:
1088 case 2:
1089 bit1 = (r_intval >> 8) & 1;
1090 break;
1091 case 1:
1092 case 3:
1093 case 4:
1094 case 5:
1095 bit1 = (g_intval >> 5) & 1;
1096 break;
1097 }
1098
1099 switch (mode)
1100 {
1101 case 0:
1102 bit3 = (r_intval >> 10) & 1;
1103 break;
1104 case 2:
1105 bit3 = (r_intval >> 6) & 1;
1106 break;
1107 case 1:
1108 case 3:
1109 case 4:
1110 case 5:
1111 bit3 = (b_intval >> 5) & 1;
1112 break;
1113 }
1114
1115 g_lowbits |= (mode_enc & 0x4) << 5;
1116 b_lowbits |= (mode_enc & 0x8) << 4;
1117
1118 g_lowbits |= bit0 << 6;
1119 g_lowbits |= bit1 << 5;
1120 b_lowbits |= bit2 << 6;
1121 b_lowbits |= bit3 << 5;
1122
1123 uint8_t g_quantval;
1124 uint8_t b_quantval;
1125
1126 quantize_and_unquantize_retain_top_four_bits(
1127 quant_level, static_cast<uint8_t>(g_lowbits), g_quantval);
1128 quantize_and_unquantize_retain_top_four_bits(
1129 quant_level, static_cast<uint8_t>(b_lowbits), b_quantval);
1130
1131 g_intval = (g_intval & ~0x1f) | (g_quantval & 0x1f);
1132 b_intval = (b_intval & ~0x1f) | (b_quantval & 0x1f);
1133
1134 g_fval = static_cast<float>(g_intval) * mode_rscale;
1135 b_fval = static_cast<float>(b_intval) * mode_rscale;
1136
1137 // Recompute the scale value, based on the errors introduced to red, green and blue
1138
1139 // If the error is positive, then the R,G,B errors combined have raised the color
1140 // value overall; as such, the scale value needs to be increased.
1141 float rgb_errorsum = (r_fval - color.lane<0>() ) + (r_fval - g_fval - color.lane<1>() ) + (r_fval - b_fval - color.lane<2>() );
1142
1143 float s_fval = s_base + rgb_errorsum * (1.0f / 3.0f);
1144 s_fval = astc::clamp(s_fval, 0.0f, 1e9f);
1145
1146 int s_intval = astc::flt2int_rtn(s_fval * mode_scale);
1147
1148 if (s_intval >= s_intcutoff)
1149 {
1150 continue;
1151 }
1152
1153 int s_lowbits = s_intval & 0x1f;
1154
1155 int bit4;
1156 int bit5;
1157 int bit6;
1158 switch (mode)
1159 {
1160 case 1:
1161 bit6 = (r_intval >> 9) & 1;
1162 break;
1163 default:
1164 bit6 = (s_intval >> 5) & 1;
1165 break;
1166 }
1167
1168 switch (mode)
1169 {
1170 case 4:
1171 bit5 = (r_intval >> 7) & 1;
1172 break;
1173 case 1:
1174 bit5 = (r_intval >> 10) & 1;
1175 break;
1176 default:
1177 bit5 = (s_intval >> 6) & 1;
1178 break;
1179 }
1180
1181 switch (mode)
1182 {
1183 case 2:
1184 bit4 = (s_intval >> 7) & 1;
1185 break;
1186 default:
1187 bit4 = (r_intval >> 6) & 1;
1188 break;
1189 }
1190
1191 s_lowbits |= bit6 << 5;
1192 s_lowbits |= bit5 << 6;
1193 s_lowbits |= bit4 << 7;
1194
1195 uint8_t s_quantval;
1196
1197 quantize_and_unquantize_retain_top_four_bits(
1198 quant_level, static_cast<uint8_t>(s_lowbits), s_quantval);
1199
1200 output[0] = r_quantval;
1201 output[1] = g_quantval;
1202 output[2] = b_quantval;
1203 output[3] = s_quantval;
1204 return;
1205 }
1206
1207 // Failed to encode any of the modes above? In that case encode using mode #5
1208 float vals[4];
1209 vals[0] = color_bak.lane<0>();
1210 vals[1] = color_bak.lane<1>();
1211 vals[2] = color_bak.lane<2>();
1212 vals[3] = color_bak.lane<3>();
1213
1214 int ivals[4];
1215 float cvals[3];
1216
1217 for (int i = 0; i < 3; i++)
1218 {
1219 vals[i] = astc::clamp(vals[i], 0.0f, 65020.0f);
1220 ivals[i] = astc::flt2int_rtn(vals[i] * (1.0f / 512.0f));
1221 cvals[i] = static_cast<float>(ivals[i]) * 512.0f;
1222 }
1223
1224 float rgb_errorsum = (cvals[0] - vals[0]) + (cvals[1] - vals[1]) + (cvals[2] - vals[2]);
1225 vals[3] += rgb_errorsum * (1.0f / 3.0f);
1226
1227 vals[3] = astc::clamp(vals[3], 0.0f, 65020.0f);
1228 ivals[3] = astc::flt2int_rtn(vals[3] * (1.0f / 512.0f));
1229
1230 int encvals[4];
1231 encvals[0] = (ivals[0] & 0x3f) | 0xC0;
1232 encvals[1] = (ivals[1] & 0x7f) | 0x80;
1233 encvals[2] = (ivals[2] & 0x7f) | 0x80;
1234 encvals[3] = (ivals[3] & 0x7f) | ((ivals[0] & 0x40) << 1);
1235
1236 for (uint8_t i = 0; i < 4; i++)
1237 {
1238 quantize_and_unquantize_retain_top_four_bits(
1239 quant_level, static_cast<uint8_t>(encvals[i]), output[i]);
1240 }
1241
1242 return;
1243 }
1244
1245 /**
1246 * @brief Quantize a HDR RGB color using direct RGB encoding.
1247 *
1248 * @param color0 The input unquantized color0 endpoint.
1249 * @param color1 The input unquantized color1 endpoint.
1250 * @param[out] output The output endpoints, returned as packed RGB+RGB pairs with mode bits.
1251 * @param quant_level The quantization level to use.
1252 */
quantize_hdr_rgb(vfloat4 color0,vfloat4 color1,uint8_t output[6],quant_method quant_level)1253 static void quantize_hdr_rgb(
1254 vfloat4 color0,
1255 vfloat4 color1,
1256 uint8_t output[6],
1257 quant_method quant_level
1258 ) {
1259 // Note: color*.lane<3> is not used so we can ignore it
1260 color0 = clamp(0.0f, 65535.0f, color0);
1261 color1 = clamp(0.0f, 65535.0f, color1);
1262
1263 vfloat4 color0_bak = color0;
1264 vfloat4 color1_bak = color1;
1265
1266 int majcomp;
1267 if (color1.lane<0>() > color1.lane<1>() && color1.lane<0>() > color1.lane<2>())
1268 {
1269 majcomp = 0;
1270 }
1271 else if (color1.lane<1>() > color1.lane<2>())
1272 {
1273 majcomp = 1;
1274 }
1275 else
1276 {
1277 majcomp = 2;
1278 }
1279
1280 // Swizzle the components
1281 switch (majcomp)
1282 {
1283 case 1: // red-green swap
1284 color0 = color0.swz<1, 0, 2, 3>();
1285 color1 = color1.swz<1, 0, 2, 3>();
1286 break;
1287 case 2: // red-blue swap
1288 color0 = color0.swz<2, 1, 0, 3>();
1289 color1 = color1.swz<2, 1, 0, 3>();
1290 break;
1291 default:
1292 break;
1293 }
1294
1295 float a_base = color1.lane<0>();
1296 a_base = astc::clamp(a_base, 0.0f, 65535.0f);
1297
1298 float b0_base = a_base - color1.lane<1>();
1299 float b1_base = a_base - color1.lane<2>();
1300 float c_base = a_base - color0.lane<0>();
1301 float d0_base = a_base - b0_base - c_base - color0.lane<1>();
1302 float d1_base = a_base - b1_base - c_base - color0.lane<2>();
1303
1304 // Number of bits in the various fields in the various modes
1305 static const int mode_bits[8][4] {
1306 {9, 7, 6, 7},
1307 {9, 8, 6, 6},
1308 {10, 6, 7, 7},
1309 {10, 7, 7, 6},
1310 {11, 8, 6, 5},
1311 {11, 6, 8, 6},
1312 {12, 7, 7, 5},
1313 {12, 6, 7, 6}
1314 };
1315
1316 // Cutoffs to use for the computed values of a,b,c,d, assuming the
1317 // range 0..65535 are LNS values corresponding to fp16.
1318 static const float mode_cutoffs[8][4] {
1319 {16384, 8192, 8192, 8}, // mode 0: 9,7,6,7
1320 {32768, 8192, 4096, 8}, // mode 1: 9,8,6,6
1321 {4096, 8192, 4096, 4}, // mode 2: 10,6,7,7
1322 {8192, 8192, 2048, 4}, // mode 3: 10,7,7,6
1323 {8192, 2048, 512, 2}, // mode 4: 11,8,6,5
1324 {2048, 8192, 1024, 2}, // mode 5: 11,6,8,6
1325 {2048, 2048, 256, 1}, // mode 6: 12,7,7,5
1326 {1024, 2048, 512, 1}, // mode 7: 12,6,7,6
1327 };
1328
1329 static const float mode_scales[8] {
1330 1.0f / 128.0f,
1331 1.0f / 128.0f,
1332 1.0f / 64.0f,
1333 1.0f / 64.0f,
1334 1.0f / 32.0f,
1335 1.0f / 32.0f,
1336 1.0f / 16.0f,
1337 1.0f / 16.0f,
1338 };
1339
1340 // Scaling factors when going from what was encoded in the mode to 16 bits.
1341 static const float mode_rscales[8] {
1342 128.0f,
1343 128.0f,
1344 64.0f,
1345 64.0f,
1346 32.0f,
1347 32.0f,
1348 16.0f,
1349 16.0f
1350 };
1351
1352 // Try modes one by one, with the highest-precision mode first.
1353 for (int mode = 7; mode >= 0; mode--)
1354 {
1355 // For each mode, test if we can in fact accommodate the computed b, c, and d values.
1356 // If we clearly can't, then we skip to the next mode.
1357
1358 float b_cutoff = mode_cutoffs[mode][0];
1359 float c_cutoff = mode_cutoffs[mode][1];
1360 float d_cutoff = mode_cutoffs[mode][2];
1361
1362 if (b0_base > b_cutoff || b1_base > b_cutoff || c_base > c_cutoff || fabsf(d0_base) > d_cutoff || fabsf(d1_base) > d_cutoff)
1363 {
1364 continue;
1365 }
1366
1367 float mode_scale = mode_scales[mode];
1368 float mode_rscale = mode_rscales[mode];
1369
1370 int b_intcutoff = 1 << mode_bits[mode][1];
1371 int c_intcutoff = 1 << mode_bits[mode][2];
1372 int d_intcutoff = 1 << (mode_bits[mode][3] - 1);
1373
1374 // Quantize and unquantize A, with the assumption that its high bits can be handled safely.
1375 int a_intval = astc::flt2int_rtn(a_base * mode_scale);
1376 int a_lowbits = a_intval & 0xFF;
1377
1378 int a_quantval = quant_color(quant_level, a_lowbits);
1379 int a_uquantval = a_quantval;
1380 a_intval = (a_intval & ~0xFF) | a_uquantval;
1381 float a_fval = static_cast<float>(a_intval) * mode_rscale;
1382
1383 // Recompute C, then quantize and unquantize it
1384 float c_fval = a_fval - color0.lane<0>();
1385 c_fval = astc::clamp(c_fval, 0.0f, 65535.0f);
1386
1387 int c_intval = astc::flt2int_rtn(c_fval * mode_scale);
1388
1389 if (c_intval >= c_intcutoff)
1390 {
1391 continue;
1392 }
1393
1394 int c_lowbits = c_intval & 0x3f;
1395
1396 c_lowbits |= (mode & 1) << 7;
1397 c_lowbits |= (a_intval & 0x100) >> 2;
1398
1399 uint8_t c_quantval;
1400
1401 quantize_and_unquantize_retain_top_two_bits(
1402 quant_level, static_cast<uint8_t>(c_lowbits), c_quantval);
1403
1404 c_intval = (c_intval & ~0x3F) | (c_quantval & 0x3F);
1405 c_fval = static_cast<float>(c_intval) * mode_rscale;
1406
1407 // Recompute B0 and B1, then quantize and unquantize them
1408 float b0_fval = a_fval - color1.lane<1>();
1409 float b1_fval = a_fval - color1.lane<2>();
1410
1411 b0_fval = astc::clamp(b0_fval, 0.0f, 65535.0f);
1412 b1_fval = astc::clamp(b1_fval, 0.0f, 65535.0f);
1413 int b0_intval = astc::flt2int_rtn(b0_fval * mode_scale);
1414 int b1_intval = astc::flt2int_rtn(b1_fval * mode_scale);
1415
1416 if (b0_intval >= b_intcutoff || b1_intval >= b_intcutoff)
1417 {
1418 continue;
1419 }
1420
1421 int b0_lowbits = b0_intval & 0x3f;
1422 int b1_lowbits = b1_intval & 0x3f;
1423
1424 int bit0 = 0;
1425 int bit1 = 0;
1426 switch (mode)
1427 {
1428 case 0:
1429 case 1:
1430 case 3:
1431 case 4:
1432 case 6:
1433 bit0 = (b0_intval >> 6) & 1;
1434 break;
1435 case 2:
1436 case 5:
1437 case 7:
1438 bit0 = (a_intval >> 9) & 1;
1439 break;
1440 }
1441
1442 switch (mode)
1443 {
1444 case 0:
1445 case 1:
1446 case 3:
1447 case 4:
1448 case 6:
1449 bit1 = (b1_intval >> 6) & 1;
1450 break;
1451 case 2:
1452 bit1 = (c_intval >> 6) & 1;
1453 break;
1454 case 5:
1455 case 7:
1456 bit1 = (a_intval >> 10) & 1;
1457 break;
1458 }
1459
1460 b0_lowbits |= bit0 << 6;
1461 b1_lowbits |= bit1 << 6;
1462
1463 b0_lowbits |= ((mode >> 1) & 1) << 7;
1464 b1_lowbits |= ((mode >> 2) & 1) << 7;
1465
1466 uint8_t b0_quantval;
1467 uint8_t b1_quantval;
1468
1469 quantize_and_unquantize_retain_top_two_bits(
1470 quant_level, static_cast<uint8_t>(b0_lowbits), b0_quantval);
1471 quantize_and_unquantize_retain_top_two_bits(
1472 quant_level, static_cast<uint8_t>(b1_lowbits), b1_quantval);
1473
1474 b0_intval = (b0_intval & ~0x3f) | (b0_quantval & 0x3f);
1475 b1_intval = (b1_intval & ~0x3f) | (b1_quantval & 0x3f);
1476 b0_fval = static_cast<float>(b0_intval) * mode_rscale;
1477 b1_fval = static_cast<float>(b1_intval) * mode_rscale;
1478
1479 // Recompute D0 and D1, then quantize and unquantize them
1480 float d0_fval = a_fval - b0_fval - c_fval - color0.lane<1>();
1481 float d1_fval = a_fval - b1_fval - c_fval - color0.lane<2>();
1482
1483 d0_fval = astc::clamp(d0_fval, -65535.0f, 65535.0f);
1484 d1_fval = astc::clamp(d1_fval, -65535.0f, 65535.0f);
1485
1486 int d0_intval = astc::flt2int_rtn(d0_fval * mode_scale);
1487 int d1_intval = astc::flt2int_rtn(d1_fval * mode_scale);
1488
1489 if (abs(d0_intval) >= d_intcutoff || abs(d1_intval) >= d_intcutoff)
1490 {
1491 continue;
1492 }
1493
1494 int d0_lowbits = d0_intval & 0x1f;
1495 int d1_lowbits = d1_intval & 0x1f;
1496
1497 int bit2 = 0;
1498 int bit3 = 0;
1499 int bit4;
1500 int bit5;
1501 switch (mode)
1502 {
1503 case 0:
1504 case 2:
1505 bit2 = (d0_intval >> 6) & 1;
1506 break;
1507 case 1:
1508 case 4:
1509 bit2 = (b0_intval >> 7) & 1;
1510 break;
1511 case 3:
1512 bit2 = (a_intval >> 9) & 1;
1513 break;
1514 case 5:
1515 bit2 = (c_intval >> 7) & 1;
1516 break;
1517 case 6:
1518 case 7:
1519 bit2 = (a_intval >> 11) & 1;
1520 break;
1521 }
1522 switch (mode)
1523 {
1524 case 0:
1525 case 2:
1526 bit3 = (d1_intval >> 6) & 1;
1527 break;
1528 case 1:
1529 case 4:
1530 bit3 = (b1_intval >> 7) & 1;
1531 break;
1532 case 3:
1533 case 5:
1534 case 6:
1535 case 7:
1536 bit3 = (c_intval >> 6) & 1;
1537 break;
1538 }
1539
1540 switch (mode)
1541 {
1542 case 4:
1543 case 6:
1544 bit4 = (a_intval >> 9) & 1;
1545 bit5 = (a_intval >> 10) & 1;
1546 break;
1547 default:
1548 bit4 = (d0_intval >> 5) & 1;
1549 bit5 = (d1_intval >> 5) & 1;
1550 break;
1551 }
1552
1553 d0_lowbits |= bit2 << 6;
1554 d1_lowbits |= bit3 << 6;
1555 d0_lowbits |= bit4 << 5;
1556 d1_lowbits |= bit5 << 5;
1557
1558 d0_lowbits |= (majcomp & 1) << 7;
1559 d1_lowbits |= ((majcomp >> 1) & 1) << 7;
1560
1561 uint8_t d0_quantval;
1562 uint8_t d1_quantval;
1563
1564 quantize_and_unquantize_retain_top_four_bits(
1565 quant_level, static_cast<uint8_t>(d0_lowbits), d0_quantval);
1566 quantize_and_unquantize_retain_top_four_bits(
1567 quant_level, static_cast<uint8_t>(d1_lowbits), d1_quantval);
1568
1569 output[0] = static_cast<uint8_t>(a_quantval);
1570 output[1] = c_quantval;
1571 output[2] = b0_quantval;
1572 output[3] = b1_quantval;
1573 output[4] = d0_quantval;
1574 output[5] = d1_quantval;
1575 return;
1576 }
1577
1578 // If neither of the modes fit we will use a flat representation for storing data, using 8 bits
1579 // for red and green, and 7 bits for blue. This gives color accuracy roughly similar to LDR
1580 // 4:4:3 which is not at all great but usable. This representation is used if the light color is
1581 // more than 4x the color value of the dark color.
1582 float vals[6];
1583 vals[0] = color0_bak.lane<0>();
1584 vals[1] = color1_bak.lane<0>();
1585 vals[2] = color0_bak.lane<1>();
1586 vals[3] = color1_bak.lane<1>();
1587 vals[4] = color0_bak.lane<2>();
1588 vals[5] = color1_bak.lane<2>();
1589
1590 for (int i = 0; i < 6; i++)
1591 {
1592 vals[i] = astc::clamp(vals[i], 0.0f, 65020.0f);
1593 }
1594
1595 for (int i = 0; i < 4; i++)
1596 {
1597 int idx = astc::flt2int_rtn(vals[i] * 1.0f / 256.0f);
1598 output[i] = quant_color(quant_level, idx);
1599 }
1600
1601 for (int i = 4; i < 6; i++)
1602 {
1603 int idx = astc::flt2int_rtn(vals[i] * 1.0f / 512.0f) + 128;
1604 quantize_and_unquantize_retain_top_two_bits(
1605 quant_level, static_cast<uint8_t>(idx), output[i]);
1606 }
1607
1608 return;
1609 }
1610
1611 /**
1612 * @brief Quantize a HDR RGB + LDR A color using direct RGBA encoding.
1613 *
1614 * @param color0 The input unquantized color0 endpoint.
1615 * @param color1 The input unquantized color1 endpoint.
1616 * @param[out] output The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
1617 * @param quant_level The quantization level to use.
1618 */
quantize_hdr_rgb_ldr_alpha(vfloat4 color0,vfloat4 color1,uint8_t output[8],quant_method quant_level)1619 static void quantize_hdr_rgb_ldr_alpha(
1620 vfloat4 color0,
1621 vfloat4 color1,
1622 uint8_t output[8],
1623 quant_method quant_level
1624 ) {
1625 float scale = 1.0f / 257.0f;
1626
1627 float a0 = astc::clamp255f(color0.lane<3>() * scale);
1628 float a1 = astc::clamp255f(color1.lane<3>() * scale);
1629
1630 output[6] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
1631 output[7] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
1632
1633 quantize_hdr_rgb(color0, color1, output, quant_level);
1634 }
1635
1636 /**
1637 * @brief Quantize a HDR L color using the large range encoding.
1638 *
1639 * @param color0 The input unquantized color0 endpoint.
1640 * @param color1 The input unquantized color1 endpoint.
1641 * @param[out] output The output endpoints, returned as packed (l0, l1).
1642 * @param quant_level The quantization level to use.
1643 */
quantize_hdr_luminance_large_range(vfloat4 color0,vfloat4 color1,uint8_t output[2],quant_method quant_level)1644 static void quantize_hdr_luminance_large_range(
1645 vfloat4 color0,
1646 vfloat4 color1,
1647 uint8_t output[2],
1648 quant_method quant_level
1649 ) {
1650 float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
1651 float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
1652
1653 if (lum1 < lum0)
1654 {
1655 float avg = (lum0 + lum1) * 0.5f;
1656 lum0 = avg;
1657 lum1 = avg;
1658 }
1659
1660 int ilum1 = astc::flt2int_rtn(lum1);
1661 int ilum0 = astc::flt2int_rtn(lum0);
1662
1663 // Find the closest encodable point in the upper half of the code-point space
1664 int upper_v0 = (ilum0 + 128) >> 8;
1665 int upper_v1 = (ilum1 + 128) >> 8;
1666
1667 upper_v0 = astc::clamp(upper_v0, 0, 255);
1668 upper_v1 = astc::clamp(upper_v1, 0, 255);
1669
1670 // Find the closest encodable point in the lower half of the code-point space
1671 int lower_v0 = (ilum1 + 256) >> 8;
1672 int lower_v1 = ilum0 >> 8;
1673
1674 lower_v0 = astc::clamp(lower_v0, 0, 255);
1675 lower_v1 = astc::clamp(lower_v1, 0, 255);
1676
1677 // Determine the distance between the point in code-point space and the input value
1678 int upper0_dec = upper_v0 << 8;
1679 int upper1_dec = upper_v1 << 8;
1680 int lower0_dec = (lower_v1 << 8) + 128;
1681 int lower1_dec = (lower_v0 << 8) - 128;
1682
1683 int upper0_diff = upper0_dec - ilum0;
1684 int upper1_diff = upper1_dec - ilum1;
1685 int lower0_diff = lower0_dec - ilum0;
1686 int lower1_diff = lower1_dec - ilum1;
1687
1688 int upper_error = (upper0_diff * upper0_diff) + (upper1_diff * upper1_diff);
1689 int lower_error = (lower0_diff * lower0_diff) + (lower1_diff * lower1_diff);
1690
1691 int v0, v1;
1692 if (upper_error < lower_error)
1693 {
1694 v0 = upper_v0;
1695 v1 = upper_v1;
1696 }
1697 else
1698 {
1699 v0 = lower_v0;
1700 v1 = lower_v1;
1701 }
1702
1703 // OK; encode
1704 output[0] = quant_color(quant_level, v0);
1705 output[1] = quant_color(quant_level, v1);
1706 }
1707
1708 /**
1709 * @brief Quantize a HDR L color using the small range encoding.
1710 *
1711 * @param color0 The input unquantized color0 endpoint.
1712 * @param color1 The input unquantized color1 endpoint.
1713 * @param[out] output The output endpoints, returned as packed (l0, l1) with mode bits.
1714 * @param quant_level The quantization level to use.
1715 *
1716 * @return Returns @c false on failure, @c true on success.
1717 */
try_quantize_hdr_luminance_small_range(vfloat4 color0,vfloat4 color1,uint8_t output[2],quant_method quant_level)1718 static bool try_quantize_hdr_luminance_small_range(
1719 vfloat4 color0,
1720 vfloat4 color1,
1721 uint8_t output[2],
1722 quant_method quant_level
1723 ) {
1724 float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
1725 float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
1726
1727 if (lum1 < lum0)
1728 {
1729 float avg = (lum0 + lum1) * 0.5f;
1730 lum0 = avg;
1731 lum1 = avg;
1732 }
1733
1734 int ilum1 = astc::flt2int_rtn(lum1);
1735 int ilum0 = astc::flt2int_rtn(lum0);
1736
1737 // Difference of more than a factor-of-2 results in immediate failure
1738 if (ilum1 - ilum0 > 2048)
1739 {
1740 return false;
1741 }
1742
1743 int lowval, highval, diffval;
1744 int v0, v1;
1745 int v0e, v1e;
1746 int v0d, v1d;
1747
1748 // Try to encode the high-precision submode
1749 lowval = (ilum0 + 16) >> 5;
1750 highval = (ilum1 + 16) >> 5;
1751
1752 lowval = astc::clamp(lowval, 0, 2047);
1753 highval = astc::clamp(highval, 0, 2047);
1754
1755 v0 = lowval & 0x7F;
1756 v0e = quant_color(quant_level, v0);
1757 v0d = v0e;
1758
1759 if (v0d < 0x80)
1760 {
1761 lowval = (lowval & ~0x7F) | v0d;
1762 diffval = highval - lowval;
1763 if (diffval >= 0 && diffval <= 15)
1764 {
1765 v1 = ((lowval >> 3) & 0xF0) | diffval;
1766 v1e = quant_color(quant_level, v1);
1767 v1d = v1e;
1768 if ((v1d & 0xF0) == (v1 & 0xF0))
1769 {
1770 output[0] = static_cast<uint8_t>(v0e);
1771 output[1] = static_cast<uint8_t>(v1e);
1772 return true;
1773 }
1774 }
1775 }
1776
1777 // Try to encode the low-precision submode
1778 lowval = (ilum0 + 32) >> 6;
1779 highval = (ilum1 + 32) >> 6;
1780
1781 lowval = astc::clamp(lowval, 0, 1023);
1782 highval = astc::clamp(highval, 0, 1023);
1783
1784 v0 = (lowval & 0x7F) | 0x80;
1785 v0e = quant_color(quant_level, v0);
1786 v0d = v0e;
1787 if ((v0d & 0x80) == 0)
1788 {
1789 return false;
1790 }
1791
1792 lowval = (lowval & ~0x7F) | (v0d & 0x7F);
1793 diffval = highval - lowval;
1794 if (diffval < 0 || diffval > 31)
1795 {
1796 return false;
1797 }
1798
1799 v1 = ((lowval >> 2) & 0xE0) | diffval;
1800 v1e = quant_color(quant_level, v1);
1801 v1d = v1e;
1802 if ((v1d & 0xE0) != (v1 & 0xE0))
1803 {
1804 return false;
1805 }
1806
1807 output[0] = static_cast<uint8_t>(v0e);
1808 output[1] = static_cast<uint8_t>(v1e);
1809 return true;
1810 }
1811
1812 /**
1813 * @brief Quantize a HDR A color using either delta or direct RGBA encoding.
1814 *
1815 * @param alpha0 The input unquantized color0 endpoint.
1816 * @param alpha1 The input unquantized color1 endpoint.
1817 * @param[out] output The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
1818 * @param quant_level The quantization level to use.
1819 */
quantize_hdr_alpha(float alpha0,float alpha1,uint8_t output[2],quant_method quant_level)1820 static void quantize_hdr_alpha(
1821 float alpha0,
1822 float alpha1,
1823 uint8_t output[2],
1824 quant_method quant_level
1825 ) {
1826 alpha0 = astc::clamp(alpha0, 0.0f, 65280.0f);
1827 alpha1 = astc::clamp(alpha1, 0.0f, 65280.0f);
1828
1829 int ialpha0 = astc::flt2int_rtn(alpha0);
1830 int ialpha1 = astc::flt2int_rtn(alpha1);
1831
1832 int val0, val1, diffval;
1833 int v6, v7;
1834 int v6e, v7e;
1835 int v6d, v7d;
1836
1837 // Try to encode one of the delta submodes, in decreasing-precision order
1838 for (int i = 2; i >= 0; i--)
1839 {
1840 val0 = (ialpha0 + (128 >> i)) >> (8 - i);
1841 val1 = (ialpha1 + (128 >> i)) >> (8 - i);
1842
1843 v6 = (val0 & 0x7F) | ((i & 1) << 7);
1844 v6e = quant_color(quant_level, v6);
1845 v6d = v6e;
1846
1847 if ((v6 ^ v6d) & 0x80)
1848 {
1849 continue;
1850 }
1851
1852 val0 = (val0 & ~0x7f) | (v6d & 0x7f);
1853 diffval = val1 - val0;
1854 int cutoff = 32 >> i;
1855 int mask = 2 * cutoff - 1;
1856
1857 if (diffval < -cutoff || diffval >= cutoff)
1858 {
1859 continue;
1860 }
1861
1862 v7 = ((i & 2) << 6) | ((val0 >> 7) << (6 - i)) | (diffval & mask);
1863 v7e = quant_color(quant_level, v7);
1864 v7d = v7e;
1865
1866 static const int testbits[3] { 0xE0, 0xF0, 0xF8 };
1867
1868 if ((v7 ^ v7d) & testbits[i])
1869 {
1870 continue;
1871 }
1872
1873 output[0] = static_cast<uint8_t>(v6e);
1874 output[1] = static_cast<uint8_t>(v7e);
1875 return;
1876 }
1877
1878 // Could not encode any of the delta modes; instead encode a flat value
1879 val0 = (ialpha0 + 256) >> 9;
1880 val1 = (ialpha1 + 256) >> 9;
1881 v6 = val0 | 0x80;
1882 v7 = val1 | 0x80;
1883
1884 output[0] = quant_color(quant_level, v6);
1885 output[1] = quant_color(quant_level, v7);
1886
1887 return;
1888 }
1889
1890 /**
1891 * @brief Quantize a HDR RGBA color using either delta or direct RGBA encoding.
1892 *
1893 * @param color0 The input unquantized color0 endpoint.
1894 * @param color1 The input unquantized color1 endpoint.
1895 * @param[out] output The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
1896 * @param quant_level The quantization level to use.
1897 */
quantize_hdr_rgb_alpha(vfloat4 color0,vfloat4 color1,uint8_t output[8],quant_method quant_level)1898 static void quantize_hdr_rgb_alpha(
1899 vfloat4 color0,
1900 vfloat4 color1,
1901 uint8_t output[8],
1902 quant_method quant_level
1903 ) {
1904 quantize_hdr_rgb(color0, color1, output, quant_level);
1905 quantize_hdr_alpha(color0.lane<3>(), color1.lane<3>(), output + 6, quant_level);
1906 }
1907
1908 /* See header for documentation. */
pack_color_endpoints(QualityProfile privateProfile,vfloat4 color0,vfloat4 color1,vfloat4 rgbs_color,vfloat4 rgbo_color,int format,uint8_t * output,quant_method quant_level)1909 uint8_t pack_color_endpoints(
1910 QualityProfile privateProfile,
1911 vfloat4 color0,
1912 vfloat4 color1,
1913 vfloat4 rgbs_color,
1914 vfloat4 rgbo_color,
1915 int format,
1916 uint8_t* output,
1917 quant_method quant_level
1918 ) {
1919 assert(QUANT_6 <= quant_level && quant_level <= QUANT_256);
1920
1921 // Clamp colors to a valid LDR range
1922 // Note that HDR has a lower max, handled in the conversion functions
1923 color0 = clamp(0.0f, 65535.0f, color0);
1924 color1 = clamp(0.0f, 65535.0f, color1);
1925
1926 // Pre-scale the LDR value we need to the 0-255 quantizable range
1927 vfloat4 color0_ldr = color0 * (1.0f / 257.0f);
1928 vfloat4 color1_ldr = color1 * (1.0f / 257.0f);
1929
1930 uint8_t retval = 0;
1931 float best_error = ERROR_CALC_DEFAULT;
1932 vint4 color0_out, color1_out;
1933 vint4 color0_out2, color1_out2;
1934
1935 switch (format)
1936 {
1937 case FMT_RGB:
1938 if (quant_level <= QUANT_160)
1939 {
1940 if (try_quantize_rgb_delta_blue_contract(color0_ldr, color1_ldr, color0_out, color1_out, quant_level))
1941 {
1942 vint4 color0_unpack;
1943 vint4 color1_unpack;
1944 rgba_delta_unpack(color0_out, color1_out, color0_unpack, color1_unpack);
1945
1946 retval = FMT_RGB_DELTA;
1947 best_error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
1948 }
1949
1950 if (try_quantize_rgb_delta(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level))
1951 {
1952 vint4 color0_unpack;
1953 vint4 color1_unpack;
1954 rgba_delta_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
1955
1956 float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
1957 if (error < best_error)
1958 {
1959 retval = FMT_RGB_DELTA;
1960 best_error = error;
1961 color0_out = color0_out2;
1962 color1_out = color1_out2;
1963 }
1964 }
1965 }
1966
1967 if (quant_level < QUANT_256)
1968 {
1969 if (try_quantize_rgb_blue_contract(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level))
1970 {
1971 vint4 color0_unpack;
1972 vint4 color1_unpack;
1973 rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
1974
1975 float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
1976 if (error < best_error)
1977 {
1978 retval = FMT_RGB;
1979 best_error = error;
1980 color0_out = color0_out2;
1981 color1_out = color1_out2;
1982 }
1983 }
1984 }
1985
1986 {
1987 quantize_rgb(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level);
1988
1989 vint4 color0_unpack;
1990 vint4 color1_unpack;
1991 rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
1992
1993 float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
1994 if (error < best_error)
1995 {
1996 retval = FMT_RGB;
1997 color0_out = color0_out2;
1998 color1_out = color1_out2;
1999 }
2000 }
2001
2002 // TODO: Can we vectorize this?
2003 output[0] = static_cast<uint8_t>(color0_out.lane<0>());
2004 output[1] = static_cast<uint8_t>(color1_out.lane<0>());
2005 output[2] = static_cast<uint8_t>(color0_out.lane<1>());
2006 output[3] = static_cast<uint8_t>(color1_out.lane<1>());
2007 output[4] = static_cast<uint8_t>(color0_out.lane<2>());
2008 output[5] = static_cast<uint8_t>(color1_out.lane<2>());
2009 break;
2010
2011 case FMT_RGBA:
2012 if ((privateProfile == HIGH_QUALITY_PROFILE) && (quant_level <= QUANT_160)) // only full quality profile to try
2013 {
2014 if (try_quantize_rgba_delta_blue_contract(color0_ldr, color1_ldr, color0_out, color1_out, quant_level))
2015 {
2016 vint4 color0_unpack;
2017 vint4 color1_unpack;
2018 rgba_delta_unpack(color0_out, color1_out, color0_unpack, color1_unpack);
2019
2020 retval = FMT_RGBA_DELTA;
2021 best_error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
2022 }
2023
2024 if (try_quantize_rgba_delta(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level))
2025 {
2026 vint4 color0_unpack;
2027 vint4 color1_unpack;
2028 rgba_delta_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
2029
2030 float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
2031 if (error < best_error)
2032 {
2033 retval = FMT_RGBA_DELTA;
2034 best_error = error;
2035 color0_out = color0_out2;
2036 color1_out = color1_out2;
2037 }
2038 }
2039 }
2040
2041 if (quant_level < QUANT_256)
2042 {
2043 if (try_quantize_rgba_blue_contract(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level))
2044 {
2045 vint4 color0_unpack;
2046 vint4 color1_unpack;
2047 rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
2048
2049 float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
2050 if (error < best_error)
2051 {
2052 retval = FMT_RGBA;
2053 best_error = error;
2054 color0_out = color0_out2;
2055 color1_out = color1_out2;
2056 }
2057 }
2058 }
2059
2060 {
2061 quantize_rgba(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level);
2062
2063 vint4 color0_unpack;
2064 vint4 color1_unpack;
2065 rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
2066
2067 float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
2068 if (error < best_error)
2069 {
2070 retval = FMT_RGBA;
2071 color0_out = color0_out2;
2072 color1_out = color1_out2;
2073 }
2074 }
2075
2076 // TODO: Can we vectorize this?
2077 output[0] = static_cast<uint8_t>(color0_out.lane<0>());
2078 output[1] = static_cast<uint8_t>(color1_out.lane<0>());
2079 output[2] = static_cast<uint8_t>(color0_out.lane<1>());
2080 output[3] = static_cast<uint8_t>(color1_out.lane<1>());
2081 output[4] = static_cast<uint8_t>(color0_out.lane<2>());
2082 output[5] = static_cast<uint8_t>(color1_out.lane<2>());
2083 output[6] = static_cast<uint8_t>(color0_out.lane<3>());
2084 output[7] = static_cast<uint8_t>(color1_out.lane<3>());
2085 break;
2086
2087 case FMT_RGB_SCALE:
2088 quantize_rgbs(rgbs_color, output, quant_level);
2089 retval = FMT_RGB_SCALE;
2090 break;
2091
2092 case FMT_HDR_RGB_SCALE:
2093 quantize_hdr_rgbo(rgbo_color, output, quant_level);
2094 retval = FMT_HDR_RGB_SCALE;
2095 break;
2096
2097 case FMT_HDR_RGB:
2098 quantize_hdr_rgb(color0, color1, output, quant_level);
2099 retval = FMT_HDR_RGB;
2100 break;
2101
2102 case FMT_RGB_SCALE_ALPHA:
2103 quantize_rgbs_alpha(color0_ldr, color1_ldr, rgbs_color, output, quant_level);
2104 retval = FMT_RGB_SCALE_ALPHA;
2105 break;
2106
2107 case FMT_HDR_LUMINANCE_SMALL_RANGE:
2108 case FMT_HDR_LUMINANCE_LARGE_RANGE:
2109 if (try_quantize_hdr_luminance_small_range(color0, color1, output, quant_level))
2110 {
2111 retval = FMT_HDR_LUMINANCE_SMALL_RANGE;
2112 break;
2113 }
2114 quantize_hdr_luminance_large_range(color0, color1, output, quant_level);
2115 retval = FMT_HDR_LUMINANCE_LARGE_RANGE;
2116 break;
2117
2118 case FMT_LUMINANCE:
2119 quantize_luminance(color0_ldr, color1_ldr, output, quant_level);
2120 retval = FMT_LUMINANCE;
2121 break;
2122
2123 case FMT_LUMINANCE_ALPHA:
2124 if (quant_level <= 18)
2125 {
2126 if (try_quantize_luminance_alpha_delta(color0_ldr, color1_ldr, output, quant_level))
2127 {
2128 retval = FMT_LUMINANCE_ALPHA_DELTA;
2129 break;
2130 }
2131 }
2132 quantize_luminance_alpha(color0_ldr, color1_ldr, output, quant_level);
2133 retval = FMT_LUMINANCE_ALPHA;
2134 break;
2135
2136 case FMT_HDR_RGB_LDR_ALPHA:
2137 quantize_hdr_rgb_ldr_alpha(color0, color1, output, quant_level);
2138 retval = FMT_HDR_RGB_LDR_ALPHA;
2139 break;
2140
2141 case FMT_HDR_RGBA:
2142 quantize_hdr_rgb_alpha(color0, color1, output, quant_level);
2143 retval = FMT_HDR_RGBA;
2144 break;
2145 }
2146
2147 return retval;
2148 }
2149
2150 #endif
2151