• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2023 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 #if !defined(ASTCENC_DECOMPRESS_ONLY)
19 
20 /**
21  * @brief Functions for color quantization.
22  *
23  * The design of the color quantization functionality requires the caller to use higher level error
24  * analysis to determine the base encoding that should be used. This earlier analysis will select
25  * the basic type of the endpoint that should be used:
26  *
27  *     * Mode: LDR or HDR
28  *     * Quantization level
29  *     * Channel count: L, LA, RGB, or RGBA
30  *     * Endpoint 2 type: Direct color endcode, or scaled from endpoint 1.
31  *
32  * However, this leaves a number of decisions about exactly how to pack the endpoints open. In
33  * particular we need to determine if blue contraction can be used, or/and if delta encoding can be
34  * used. If they can be applied these will allow us to maintain higher precision in the endpoints
35  * without needing additional storage.
36  */
37 
38 #include <stdio.h>
39 #include <assert.h>
40 
41 #include "astcenc_internal.h"
42 
43 /**
44  * @brief Compute the error of an LDR RGB or RGBA encoding.
45  *
46  * @param uquant0    The original endpoint 0 color.
47  * @param uquant1    The original endpoint 1 color.
48  * @param quant0     The unpacked quantized endpoint 0 color.
49  * @param quant1     The unpacked quantized endpoint 1 color.
50  *
51  * @return The MSE of the encoding.
52  */
get_rgba_encoding_error(vfloat4 uquant0,vfloat4 uquant1,vint4 quant0,vint4 quant1)53 static float get_rgba_encoding_error(
54 	vfloat4 uquant0,
55 	vfloat4 uquant1,
56 	vint4 quant0,
57 	vint4 quant1
58 ) {
59 	vfloat4 error0 = uquant0 - int_to_float(quant0);
60 	vfloat4 error1 = uquant1 - int_to_float(quant1);
61 	return hadd_s(error0 * error0 + error1 * error1);
62 }
63 
64 /**
65  * @brief Determine the quantized value given a quantization level.
66  *
67  * @param quant_level   The quantization level to use.
68  * @param value         The value to convert. This must be in the 0-255 range.
69  *
70  * @return The unpacked quantized value, returned in 0-255 range.
71  */
quant_color(quant_method quant_level,int value)72 static inline uint8_t quant_color(
73 	quant_method quant_level,
74 	int value
75 ) {
76 	int index = value * 2 + 1;
77 	return color_unquant_to_uquant_tables[quant_level - QUANT_6][index];
78 }
79 
80 /**
81  * @brief Determine the quantized value given a quantization level.
82  *
83  * @param quant_level   The quantization level to use.
84  * @param value         The value to convert. This must be in the 0-255 range.
85  *
86  * @return The unpacked quantized value, returned in 0-255 range.
87  */
quant_color3(quant_method quant_level,vint4 value)88 static inline vint4 quant_color3(
89 	quant_method quant_level,
90 	vint4 value
91 ) {
92 	vint4 index = value * 2 + 1;
93 	return vint4(
94 		color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<0>()],
95 		color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<1>()],
96 		color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<2>()],
97 		0);
98 }
99 
100 /**
101  * @brief Determine the quantized value given a quantization level and residual.
102  *
103  * @param quant_level   The quantization level to use.
104  * @param value         The value to convert. This must be in the 0-255 range.
105  * @param valuef        The original value before rounding, used to compute a residual.
106  *
107  * @return The unpacked quantized value, returned in 0-255 range.
108  */
quant_color(quant_method quant_level,int value,float valuef)109 static inline uint8_t quant_color(
110 	quant_method quant_level,
111 	int value,
112 	float valuef
113 ) {
114 	int index = value * 2;
115 
116 	// Compute the residual to determine if we should round down or up ties.
117 	// Test should be residual >= 0, but empirical testing shows small bias helps.
118 	float residual = valuef - static_cast<float>(value);
119 	if (residual >= -0.1f)
120 	{
121 		index++;
122 	}
123 
124 	return color_unquant_to_uquant_tables[quant_level - QUANT_6][index];
125 }
126 
127 /**
128  * @brief Determine the quantized value given a quantization level and residual.
129  *
130  * @param quant_level   The quantization level to use.
131  * @param value         The value to convert. This must be in the 0-255 range.
132  * @param valuef        The original value before rounding, used to compute a residual.
133  *
134  * @return The unpacked quantized value, returned in 0-255 range.
135  */
quant_color3(quant_method quant_level,vint4 value,vfloat4 valuef)136 static inline vint4 quant_color3(
137 	quant_method quant_level,
138 	vint4 value,
139 	vfloat4 valuef
140 ) {
141 	vint4 index = value * 2;
142 
143 	// Compute the residual to determine if we should round down or up ties.
144 	// Test should be residual >= 0, but empirical testing shows small bias helps.
145 	vfloat4 residual = valuef - int_to_float(value);
146 	vmask4 mask = residual >= vfloat4(-0.1f);
147 	index = select(index, index + 1, mask);
148 
149 	return vint4(
150 		color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<0>()],
151 		color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<1>()],
152 		color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<2>()],
153 		0);
154 }
155 
156 /**
157  * @brief Quantize an LDR RGB color.
158  *
159  * Since this is a fall-back encoding, we cannot actually fail but must produce a sensible result.
160  * For this encoding @c color0 cannot be larger than @c color1. If @c color0 is actually larger
161  * than @c color1, @c color0 is reduced and @c color1 is increased until the constraint is met.
162  *
163  * @param      color0        The input unquantized color0 endpoint.
164  * @param      color1        The input unquantized color1 endpoint.
165  * @param[out] color0_out    The output quantized color0 endpoint.
166  * @param[out] color1_out    The output quantized color1 endpoint.
167  * @param      quant_level   The quantization level to use.
168  */
quantize_rgb(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)169 static void quantize_rgb(
170 	vfloat4 color0,
171 	vfloat4 color1,
172 	vint4& color0_out,
173 	vint4& color1_out,
174 	quant_method quant_level
175 ) {
176 	vint4 color0i, color1i;
177 	vfloat4 nudge(0.2f);
178 
179 	do
180 	{
181 		vint4 color0q = max(float_to_int_rtn(color0), vint4(0));
182 		color0i = quant_color3(quant_level, color0q, color0);
183 		color0 = color0 - nudge;
184 
185 		vint4 color1q = min(float_to_int_rtn(color1), vint4(255));
186 		color1i = quant_color3(quant_level, color1q, color1);
187 		color1 = color1 + nudge;
188 	} while (hadd_rgb_s(color0i) > hadd_rgb_s(color1i));
189 
190 	color0_out = color0i;
191 	color1_out = color1i;
192 }
193 
194 /**
195  * @brief Quantize an LDR RGBA color.
196  *
197  * Since this is a fall-back encoding, we cannot actually fail but must produce a sensible result.
198  * For this encoding @c color0.rgb cannot be larger than @c color1.rgb (this indicates blue
199  * contraction). If @c color0.rgb is actually larger than @c color1.rgb, @c color0.rgb is reduced
200  * and @c color1.rgb is increased until the constraint is met.
201  *
202  * @param      color0        The input unquantized color0 endpoint.
203  * @param      color1        The input unquantized color1 endpoint.
204  * @param[out] color0_out    The output quantized color0 endpoint.
205  * @param[out] color1_out    The output quantized color1 endpoint.
206  * @param      quant_level   The quantization level to use.
207  */
quantize_rgba(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)208 static void quantize_rgba(
209 	vfloat4 color0,
210 	vfloat4 color1,
211 	vint4& color0_out,
212 	vint4& color1_out,
213 	quant_method quant_level
214 ) {
215 	quantize_rgb(color0, color1, color0_out, color1_out, quant_level);
216 
217 	float a0 = color0.lane<3>();
218 	float a1 = color1.lane<3>();
219 
220 	color0_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a0), a0));
221 	color1_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a1), a1));
222 }
223 
224 /**
225  * @brief Try to quantize an LDR RGB color using blue-contraction.
226  *
227  * Blue-contraction is only usable if encoded color 1 is larger than color 0.
228  *
229  * @param      color0        The input unquantized color0 endpoint.
230  * @param      color1        The input unquantized color1 endpoint.
231  * @param[out] color0_out    The output quantized color0 endpoint.
232  * @param[out] color1_out    The output quantized color1 endpoint.
233  * @param      quant_level   The quantization level to use.
234  *
235  * @return Returns @c false on failure, @c true on success.
236  */
try_quantize_rgb_blue_contract(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)237 static bool try_quantize_rgb_blue_contract(
238 	vfloat4 color0,
239 	vfloat4 color1,
240 	vint4& color0_out,
241 	vint4& color1_out,
242 	quant_method quant_level
243 ) {
244 	// Apply inverse blue-contraction
245 	color0 += color0 - color0.swz<2, 2, 2, 3>();
246 	color1 += color1 - color1.swz<2, 2, 2, 3>();
247 
248 	// If anything overflows BC cannot be used
249 	vmask4 color0_error = (color0 < vfloat4(0.0f)) | (color0 > vfloat4(255.0f));
250 	vmask4 color1_error = (color1 < vfloat4(0.0f)) | (color1 > vfloat4(255.0f));
251 	if (any(color0_error | color1_error))
252 	{
253 		return false;
254 	}
255 
256 	// Quantize the inverse blue-contracted color
257 	vint4 color0i = quant_color3(quant_level, float_to_int_rtn(color0), color0);
258 	vint4 color1i = quant_color3(quant_level, float_to_int_rtn(color1), color1);
259 
260 	// If color #1 is not larger than color #0 then blue-contraction cannot be used
261 	// We must test afterwards because quantization can change the order
262 	if (hadd_rgb_s(color1i) <= hadd_rgb_s(color0i))
263 	{
264 		return false;
265 	}
266 
267 	color0_out = color1i;
268 	color1_out = color0i;
269 	return true;
270 }
271 
272 /**
273  * @brief Try to quantize an LDR RGBA color using blue-contraction.
274  *
275  * Blue-contraction is only usable if encoded color 1 RGB is larger than color 0 RGB.
276  *
277  * @param      color0        The input unquantized color0 endpoint.
278  * @param      color1        The input unquantized color1 endpoint.
279  * @param[out] color0_out    The output quantized color0 endpoint.
280  * @param[out] color1_out    The output quantized color1 endpoint.
281  * @param      quant_level   The quantization level to use.
282  *
283  * @return Returns @c false on failure, @c true on success.
284  */
try_quantize_rgba_blue_contract(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)285 static bool try_quantize_rgba_blue_contract(
286 	vfloat4 color0,
287 	vfloat4 color1,
288 	vint4& color0_out,
289 	vint4& color1_out,
290 	quant_method quant_level
291 ) {
292 	if (try_quantize_rgb_blue_contract(color0, color1, color0_out, color1_out, quant_level))
293 	{
294 		float a0 = color0.lane<3>();
295 		float a1 = color1.lane<3>();
296 
297 		color0_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a1), a1));
298 		color1_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a0), a0));
299 
300 		return true;
301 	}
302 
303 	return false;
304 }
305 
306 /**
307  * @brief Try to quantize an LDR RGB color using delta encoding.
308  *
309  * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
310  * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
311  * non-negative, then we encode a regular delta.
312  *
313  * @param      color0        The input unquantized color0 endpoint.
314  * @param      color1        The input unquantized color1 endpoint.
315  * @param[out] color0_out    The output quantized color0 endpoint.
316  * @param[out] color1_out    The output quantized color1 endpoint.
317  * @param      quant_level   The quantization level to use.
318  *
319  * @return Returns @c false on failure, @c true on success.
320  */
try_quantize_rgb_delta(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)321 static bool try_quantize_rgb_delta(
322 	vfloat4 color0,
323 	vfloat4 color1,
324 	vint4& color0_out,
325 	vint4& color1_out,
326 	quant_method quant_level
327 ) {
328 	// Transform color0 to unorm9
329 	vint4 color0a = float_to_int_rtn(color0);
330 	color0.set_lane<3>(0.0f);
331 	color0a = lsl<1>(color0a);
332 
333 	// Mask off the top bit
334 	vint4 color0b = color0a & 0xFF;
335 
336 	// Quantize then unquantize in order to get a value that we take differences against
337 	vint4 color0be = quant_color3(quant_level, color0b);
338 	color0b = color0be | (color0a & 0x100);
339 
340 	// Get hold of the second value
341 	vint4 color1d = float_to_int_rtn(color1);
342 	color1d = lsl<1>(color1d);
343 
344 	// ... and take differences
345 	color1d = color1d - color0b;
346 	color1d.set_lane<3>(0);
347 
348 	// Check if the difference is too large to be encodable
349 	if (any((color1d > vint4(63)) | (color1d < vint4(-64))))
350 	{
351 		return false;
352 	}
353 
354 	// Insert top bit of the base into the offset
355 	color1d = color1d & 0x7F;
356 	color1d = color1d | lsr<1>(color0b & 0x100);
357 
358 	// Then quantize and unquantize; if this causes either top two bits to flip, then encoding fails
359 	// since we have then corrupted either the top bit of the base or the sign bit of the offset
360 	vint4 color1de = quant_color3(quant_level, color1d);
361 
362 	vint4 color_flips = (color1d ^ color1de) & 0xC0;
363 	color_flips.set_lane<3>(0);
364 	if (any(color_flips != vint4::zero()))
365 	{
366 		return false;
367 	}
368 
369 	// If the sum of offsets triggers blue-contraction then encoding fails
370 	vint4 ep0 = color0be;
371 	vint4 ep1 = color1de;
372 	bit_transfer_signed(ep1, ep0);
373 	if (hadd_rgb_s(ep1) < 0)
374 	{
375 		return false;
376 	}
377 
378 	// Check that the offsets produce legitimate sums as well
379 	ep0 = ep0 + ep1;
380 	if (any((ep0 < vint4(0)) | (ep0 > vint4(0xFF))))
381 	{
382 		return false;
383 	}
384 
385 	color0_out = color0be;
386 	color1_out = color1de;
387 	return true;
388 }
389 
390 /**
391  * @brief Try to quantize an LDR RGB color using delta encoding and blue-contraction.
392  *
393  * Blue-contraction is only usable if encoded color 1 RGB is larger than color 0 RGB.
394  *
395  * @param      color0        The input unquantized color0 endpoint.
396  * @param      color1        The input unquantized color1 endpoint.
397  * @param[out] color0_out    The output quantized color0 endpoint.
398  * @param[out] color1_out    The output quantized color1 endpoint.
399  * @param      quant_level   The quantization level to use.
400  *
401  * @return Returns @c false on failure, @c true on success.
402  */
try_quantize_rgb_delta_blue_contract(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)403 static bool try_quantize_rgb_delta_blue_contract(
404 	vfloat4 color0,
405 	vfloat4 color1,
406 	vint4& color0_out,
407 	vint4& color1_out,
408 	quant_method quant_level
409 ) {
410 	// Note: Switch around endpoint colors already at start
411 	std::swap(color0, color1);
412 
413 	// Apply inverse blue-contraction
414 	color0 += color0 - color0.swz<2, 2, 2, 3>();
415 	color1 += color1 - color1.swz<2, 2, 2, 3>();
416 
417 	// If anything overflows BC cannot be used
418 	vmask4 color0_error = (color0 < vfloat4(0.0f)) | (color0 > vfloat4(255.0f));
419 	vmask4 color1_error = (color1 < vfloat4(0.0f)) | (color1 > vfloat4(255.0f));
420 	if (any(color0_error | color1_error))
421 	{
422 		return false;
423 	}
424 
425 	// Transform color0 to unorm9
426 	vint4 color0a = float_to_int_rtn(color0);
427 	color0.set_lane<3>(0.0f);
428 	color0a = lsl<1>(color0a);
429 
430 	// Mask off the top bit
431 	vint4 color0b = color0a & 0xFF;
432 
433 	// Quantize then unquantize in order to get a value that we take differences against
434 	vint4 color0be = quant_color3(quant_level, color0b);
435 	color0b = color0be | (color0a & 0x100);
436 
437 	// Get hold of the second value
438 	vint4 color1d = float_to_int_rtn(color1);
439 	color1d = lsl<1>(color1d);
440 
441 	// ... and take differences
442 	color1d = color1d - color0b;
443 	color1d.set_lane<3>(0);
444 
445 	// Check if the difference is too large to be encodable
446 	if (any((color1d > vint4(63)) | (color1d < vint4(-64))))
447 	{
448 		return false;
449 	}
450 
451 	// Insert top bit of the base into the offset
452 	color1d = color1d & 0x7F;
453 	color1d = color1d | lsr<1>(color0b & 0x100);
454 
455 	// Then quantize and unquantize; if this causes either top two bits to flip, then encoding fails
456 	// since we have then corrupted either the top bit of the base or the sign bit of the offset
457 	vint4 color1de = quant_color3(quant_level, color1d);
458 
459 	vint4 color_flips = (color1d ^ color1de) & 0xC0;
460 	color_flips.set_lane<3>(0);
461 	if (any(color_flips != vint4::zero()))
462 	{
463 		return false;
464 	}
465 
466 	// If the sum of offsets does not trigger blue-contraction then encoding fails
467 	vint4 ep0 = color0be;
468 	vint4 ep1 = color1de;
469 	bit_transfer_signed(ep1, ep0);
470 	if (hadd_rgb_s(ep1) >= 0)
471 	{
472 		return false;
473 	}
474 
475 	// Check that the offsets produce legitimate sums as well
476 	ep0 = ep0 + ep1;
477 	if (any((ep0 < vint4(0)) | (ep0 > vint4(0xFF))))
478 	{
479 		return false;
480 	}
481 
482 	color0_out = color0be;
483 	color1_out = color1de;
484 	return true;
485 }
486 
487 /**
488  * @brief Try to quantize an LDR A color using delta encoding.
489  *
490  * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
491  * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
492  * non-negative, then we encode a regular delta.
493  *
494  * This function only compressed the alpha - the other elements in the output array are not touched.
495  *
496  * @param      color0        The input unquantized color0 endpoint.
497  * @param      color1        The input unquantized color1 endpoint.
498  * @param[out] color0_out    The output quantized color0 endpoint; must preserve lane 0/1/2.
499  * @param[out] color1_out    The output quantized color1 endpoint; must preserve lane 0/1/2.
500  * @param      quant_level   The quantization level to use.
501  *
502  * @return Returns @c false on failure, @c true on success.
503  */
try_quantize_alpha_delta(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)504 static bool try_quantize_alpha_delta(
505 	vfloat4 color0,
506 	vfloat4 color1,
507 	vint4& color0_out,
508 	vint4& color1_out,
509 	quant_method quant_level
510 ) {
511 	float a0 = color0.lane<3>();
512 	float a1 = color1.lane<3>();
513 
514 	int a0a = astc::flt2int_rtn(a0);
515 	a0a <<= 1;
516 	int a0b = a0a & 0xFF;
517 	int a0be = quant_color(quant_level, a0b);
518 	a0b = a0be;
519 	a0b |= a0a & 0x100;
520 	int a1d = astc::flt2int_rtn(a1);
521 	a1d <<= 1;
522 	a1d -= a0b;
523 
524 	if (a1d > 63 || a1d < -64)
525 	{
526 		return false;
527 	}
528 
529 	a1d &= 0x7F;
530 	a1d |= (a0b & 0x100) >> 1;
531 
532 	int a1de = quant_color(quant_level, a1d);
533 	int a1du = a1de;
534 	if ((a1d ^ a1du) & 0xC0)
535 	{
536 		return false;
537 	}
538 
539 	a1du &= 0x7F;
540 	if (a1du & 0x40)
541 	{
542 		a1du -= 0x80;
543 	}
544 
545 	a1du += a0b;
546 	if (a1du < 0 || a1du > 0x1FF)
547 	{
548 		return false;
549 	}
550 
551 	color0_out.set_lane<3>(a0be);
552 	color1_out.set_lane<3>(a1de);
553 
554 	return true;
555 }
556 
557 /**
558  * @brief Try to quantize an LDR LA color using delta encoding.
559  *
560  * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
561  * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
562  * non-negative, then we encode a regular delta.
563  *
564  * This function only compressed the alpha - the other elements in the output array are not touched.
565  *
566  * @param      color0        The input unquantized color0 endpoint.
567  * @param      color1        The input unquantized color1 endpoint.
568  * @param[out] output        The output endpoints, returned as (l0, l1, a0, a1).
569  * @param      quant_level   The quantization level to use.
570  *
571  * @return Returns @c false on failure, @c true on success.
572  */
try_quantize_luminance_alpha_delta(vfloat4 color0,vfloat4 color1,uint8_t output[4],quant_method quant_level)573 static bool try_quantize_luminance_alpha_delta(
574 	vfloat4 color0,
575 	vfloat4 color1,
576 	uint8_t output[4],
577 	quant_method quant_level
578 ) {
579 	float l0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
580 	float l1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
581 
582 	float a0 = color0.lane<3>();
583 	float a1 = color1.lane<3>();
584 
585 	int l0a = astc::flt2int_rtn(l0);
586 	int a0a = astc::flt2int_rtn(a0);
587 	l0a <<= 1;
588 	a0a <<= 1;
589 
590 	int l0b = l0a & 0xFF;
591 	int a0b = a0a & 0xFF;
592 	int l0be = quant_color(quant_level, l0b);
593 	int a0be = quant_color(quant_level, a0b);
594 	l0b = l0be;
595 	a0b = a0be;
596 	l0b |= l0a & 0x100;
597 	a0b |= a0a & 0x100;
598 
599 	int l1d = astc::flt2int_rtn(l1);
600 	int a1d = astc::flt2int_rtn(a1);
601 	l1d <<= 1;
602 	a1d <<= 1;
603 	l1d -= l0b;
604 	a1d -= a0b;
605 
606 	if (l1d > 63 || l1d < -64)
607 	{
608 		return false;
609 	}
610 
611 	if (a1d > 63 || a1d < -64)
612 	{
613 		return false;
614 	}
615 
616 	l1d &= 0x7F;
617 	a1d &= 0x7F;
618 	l1d |= (l0b & 0x100) >> 1;
619 	a1d |= (a0b & 0x100) >> 1;
620 
621 	int l1de = quant_color(quant_level, l1d);
622 	int a1de = quant_color(quant_level, a1d);
623 	int l1du = l1de;
624 	int a1du = a1de;
625 
626 	if ((l1d ^ l1du) & 0xC0)
627 	{
628 		return false;
629 	}
630 
631 	if ((a1d ^ a1du) & 0xC0)
632 	{
633 		return false;
634 	}
635 
636 	l1du &= 0x7F;
637 	a1du &= 0x7F;
638 
639 	if (l1du & 0x40)
640 	{
641 		l1du -= 0x80;
642 	}
643 
644 	if (a1du & 0x40)
645 	{
646 		a1du -= 0x80;
647 	}
648 
649 	l1du += l0b;
650 	a1du += a0b;
651 
652 	if (l1du < 0 || l1du > 0x1FF)
653 	{
654 		return false;
655 	}
656 
657 	if (a1du < 0 || a1du > 0x1FF)
658 	{
659 		return false;
660 	}
661 
662 	output[0] = static_cast<uint8_t>(l0be);
663 	output[1] = static_cast<uint8_t>(l1de);
664 	output[2] = static_cast<uint8_t>(a0be);
665 	output[3] = static_cast<uint8_t>(a1de);
666 
667 	return true;
668 }
669 
670 /**
671  * @brief Try to quantize an LDR RGBA color using delta encoding.
672  *
673  * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
674  * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
675  * non-negative, then we encode a regular delta.
676  *
677  * This function only compressed the alpha - the other elements in the output array are not touched.
678  *
679  * @param      color0        The input unquantized color0 endpoint.
680  * @param      color1        The input unquantized color1 endpoint.
681  * @param[out] color0_out   The output quantized color0 endpoint
682  * @param[out] color1_out   The output quantized color1 endpoint
683  * @param      quant_level   The quantization level to use.
684  *
685  * @return Returns @c false on failure, @c true on success.
686  */
try_quantize_rgba_delta(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)687 static bool try_quantize_rgba_delta(
688 	vfloat4 color0,
689 	vfloat4 color1,
690 	vint4& color0_out,
691 	vint4& color1_out,
692 	quant_method quant_level
693 ) {
694 	return try_quantize_rgb_delta(color0, color1, color0_out, color1_out, quant_level) &&
695 	       try_quantize_alpha_delta(color0, color1, color0_out, color1_out, quant_level);
696 }
697 
698 /**
699  * @brief Try to quantize an LDR RGBA color using delta and blue contract encoding.
700  *
701  * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
702  * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
703  * non-negative, then we encode a regular delta.
704  *
705  * This function only compressed the alpha - the other elements in the output array are not touched.
706  *
707  * @param      color0       The input unquantized color0 endpoint.
708  * @param      color1       The input unquantized color1 endpoint.
709  * @param[out] color0_out   The output quantized color0 endpoint
710  * @param[out] color1_out   The output quantized color1 endpoint
711  * @param      quant_level  The quantization level to use.
712  *
713  * @return Returns @c false on failure, @c true on success.
714  */
try_quantize_rgba_delta_blue_contract(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)715 static bool try_quantize_rgba_delta_blue_contract(
716 	vfloat4 color0,
717 	vfloat4 color1,
718 	vint4& color0_out,
719 	vint4& color1_out,
720 	quant_method quant_level
721 ) {
722 	// Note that we swap the color0 and color1 ordering for alpha to match RGB blue-contract
723 	return try_quantize_rgb_delta_blue_contract(color0, color1, color0_out, color1_out, quant_level) &&
724 	       try_quantize_alpha_delta(color1, color0, color0_out, color1_out, quant_level);
725 }
726 
727 /**
728  * @brief Quantize an LDR RGB color using scale encoding.
729  *
730  * @param      color         The input unquantized color endpoint and scale factor.
731  * @param[out] output        The output endpoints, returned as (r0, g0, b0, s).
732  * @param      quant_level   The quantization level to use.
733  */
quantize_rgbs(vfloat4 color,uint8_t output[4],quant_method quant_level)734 static void quantize_rgbs(
735 	vfloat4 color,
736 	uint8_t output[4],
737 	quant_method quant_level
738 ) {
739 	float scale = 1.0f / 257.0f;
740 
741 	float r = astc::clamp255f(color.lane<0>() * scale);
742 	float g = astc::clamp255f(color.lane<1>() * scale);
743 	float b = astc::clamp255f(color.lane<2>() * scale);
744 
745 	int ri = quant_color(quant_level, astc::flt2int_rtn(r), r);
746 	int gi = quant_color(quant_level, astc::flt2int_rtn(g), g);
747 	int bi = quant_color(quant_level, astc::flt2int_rtn(b), b);
748 
749 	float oldcolorsum = hadd_rgb_s(color) * scale;
750 	float newcolorsum = static_cast<float>(ri + gi + bi);
751 
752 	float scalea = astc::clamp1f(color.lane<3>() * (oldcolorsum + 1e-10f) / (newcolorsum + 1e-10f));
753 	int scale_idx = astc::flt2int_rtn(scalea * 256.0f);
754 	scale_idx = astc::clamp(scale_idx, 0, 255);
755 
756 	output[0] = static_cast<uint8_t>(ri);
757 	output[1] = static_cast<uint8_t>(gi);
758 	output[2] = static_cast<uint8_t>(bi);
759 	output[3] = quant_color(quant_level, scale_idx);
760 }
761 
762 /**
763  * @brief Quantize an LDR RGBA color using scale encoding.
764  *
765  * @param      color0       The input unquantized color0 alpha endpoint.
766  * @param      color1       The input unquantized color1 alpha endpoint.
767  * @param      color        The input unquantized color endpoint and scale factor.
768  * @param[out] output       The output endpoints, returned as (r0, g0, b0, s, a0, a1).
769  * @param      quant_level  The quantization level to use.
770  */
quantize_rgbs_alpha(vfloat4 color0,vfloat4 color1,vfloat4 color,uint8_t output[6],quant_method quant_level)771 static void quantize_rgbs_alpha(
772 	vfloat4 color0,
773 	vfloat4 color1,
774 	vfloat4 color,
775 	uint8_t output[6],
776 	quant_method quant_level
777 ) {
778 	float a0 = color0.lane<3>();
779 	float a1 = color1.lane<3>();
780 
781 	output[4] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
782 	output[5] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
783 
784 	quantize_rgbs(color, output, quant_level);
785 }
786 
787 /**
788  * @brief Quantize a LDR L color.
789  *
790  * @param      color0        The input unquantized color0 endpoint.
791  * @param      color1        The input unquantized color1 endpoint.
792  * @param[out] output        The output endpoints, returned as (l0, l1).
793  * @param      quant_level   The quantization level to use.
794  */
quantize_luminance(vfloat4 color0,vfloat4 color1,uint8_t output[2],quant_method quant_level)795 static void quantize_luminance(
796 	vfloat4 color0,
797 	vfloat4 color1,
798 	uint8_t output[2],
799 	quant_method quant_level
800 ) {
801 	float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
802 	float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
803 
804 	if (lum0 > lum1)
805 	{
806 		float avg = (lum0 + lum1) * 0.5f;
807 		lum0 = avg;
808 		lum1 = avg;
809 	}
810 
811 	output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0), lum0);
812 	output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1), lum1);
813 }
814 
815 /**
816  * @brief Quantize a LDR LA color.
817  *
818  * @param      color0        The input unquantized color0 endpoint.
819  * @param      color1        The input unquantized color1 endpoint.
820  * @param[out] output        The output endpoints, returned as (l0, l1, a0, a1).
821  * @param      quant_level   The quantization level to use.
822  */
quantize_luminance_alpha(vfloat4 color0,vfloat4 color1,uint8_t output[4],quant_method quant_level)823 static void quantize_luminance_alpha(
824 	vfloat4 color0,
825 	vfloat4 color1,
826 	uint8_t output[4],
827 	quant_method quant_level
828 ) {
829 	float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
830 	float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
831 
832 	float a0 = color0.lane<3>();
833 	float a1 = color1.lane<3>();
834 
835 	output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0), lum0);
836 	output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1), lum1);
837 	output[2] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
838 	output[3] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
839 }
840 
841 /**
842  * @brief Quantize and unquantize a value ensuring top two bits are the same.
843  *
844  * @param      quant_level     The quantization level to use.
845  * @param      value           The input unquantized value.
846  * @param[out] quant_value     The quantized value.
847  */
quantize_and_unquantize_retain_top_two_bits(quant_method quant_level,uint8_t value,uint8_t & quant_value)848 static inline void quantize_and_unquantize_retain_top_two_bits(
849 	quant_method quant_level,
850 	uint8_t value,
851 	uint8_t& quant_value
852 ) {
853 	int perform_loop;
854 	uint8_t quantval;
855 
856 	do
857 	{
858 		quantval = quant_color(quant_level, value);
859 
860 		// Perform looping if the top two bits were modified by quant/unquant
861 		perform_loop = (value & 0xC0) != (quantval & 0xC0);
862 
863 		if ((quantval & 0xC0) > (value & 0xC0))
864 		{
865 			// Quant/unquant rounded UP so that the top two bits changed;
866 			// decrement the input in hopes that this will avoid rounding up.
867 			value--;
868 		}
869 		else if ((quantval & 0xC0) < (value & 0xC0))
870 		{
871 			// Quant/unquant rounded DOWN so that the top two bits changed;
872 			// decrement the input in hopes that this will avoid rounding down.
873 			value--;
874 		}
875 	} while (perform_loop);
876 
877 	quant_value = quantval;
878 }
879 
880 /**
881  * @brief Quantize and unquantize a value ensuring top four bits are the same.
882  *
883  * @param      quant_level     The quantization level to use.
884  * @param      value           The input unquantized value.
885  * @param[out] quant_value     The quantized value in 0-255 range.
886  */
quantize_and_unquantize_retain_top_four_bits(quant_method quant_level,uint8_t value,uint8_t & quant_value)887 static inline void quantize_and_unquantize_retain_top_four_bits(
888 	quant_method quant_level,
889 	uint8_t value,
890 	uint8_t& quant_value
891 ) {
892 	uint8_t perform_loop;
893 	uint8_t quantval;
894 
895 	do
896 	{
897 		quantval = quant_color(quant_level, value);
898 		// Perform looping if the top four bits were modified by quant/unquant
899 		perform_loop = (value & 0xF0) != (quantval & 0xF0);
900 
901 		if ((quantval & 0xF0) > (value & 0xF0))
902 		{
903 			// Quant/unquant rounded UP so that the top four bits changed;
904 			// decrement the input value in hopes that this will avoid rounding up.
905 			value--;
906 		}
907 		else if ((quantval & 0xF0) < (value & 0xF0))
908 		{
909 			// Quant/unquant rounded DOWN so that the top four bits changed;
910 			// decrement the input value in hopes that this will avoid rounding down.
911 			value--;
912 		}
913 	} while (perform_loop);
914 
915 	quant_value = quantval;
916 }
917 
918 /**
919  * @brief Quantize a HDR RGB color using RGB + offset.
920  *
921  * @param      color         The input unquantized color endpoint and offset.
922  * @param[out] output        The output endpoints, returned as packed RGBS with some mode bits.
923  * @param      quant_level   The quantization level to use.
924  */
quantize_hdr_rgbo(vfloat4 color,uint8_t output[4],quant_method quant_level)925 static void quantize_hdr_rgbo(
926 	vfloat4 color,
927 	uint8_t output[4],
928 	quant_method quant_level
929 ) {
930 	color.set_lane<0>(color.lane<0>() + color.lane<3>());
931 	color.set_lane<1>(color.lane<1>() + color.lane<3>());
932 	color.set_lane<2>(color.lane<2>() + color.lane<3>());
933 
934 	color = clamp(0.0f, 65535.0f, color);
935 
936 	vfloat4 color_bak = color;
937 
938 	int majcomp;
939 	if (color.lane<0>() > color.lane<1>() && color.lane<0>() > color.lane<2>())
940 	{
941 		majcomp = 0;			// red is largest component
942 	}
943 	else if (color.lane<1>() > color.lane<2>())
944 	{
945 		majcomp = 1;			// green is largest component
946 	}
947 	else
948 	{
949 		majcomp = 2;			// blue is largest component
950 	}
951 
952 	// swap around the red component and the largest component.
953 	switch (majcomp)
954 	{
955 	case 1:
956 		color = color.swz<1, 0, 2, 3>();
957 		break;
958 	case 2:
959 		color = color.swz<2, 1, 0, 3>();
960 		break;
961 	default:
962 		break;
963 	}
964 
965 	static const int mode_bits[5][3] {
966 		{11, 5, 7},
967 		{11, 6, 5},
968 		{10, 5, 8},
969 		{9, 6, 7},
970 		{8, 7, 6}
971 	};
972 
973 	static const float mode_cutoffs[5][2] {
974 		{1024, 4096},
975 		{2048, 1024},
976 		{2048, 16384},
977 		{8192, 16384},
978 		{32768, 16384}
979 	};
980 
981 	static const float mode_rscales[5] {
982 		32.0f,
983 		32.0f,
984 		64.0f,
985 		128.0f,
986 		256.0f,
987 	};
988 
989 	static const float mode_scales[5] {
990 		1.0f / 32.0f,
991 		1.0f / 32.0f,
992 		1.0f / 64.0f,
993 		1.0f / 128.0f,
994 		1.0f / 256.0f,
995 	};
996 
997 	float r_base = color.lane<0>();
998 	float g_base = color.lane<0>() - color.lane<1>() ;
999 	float b_base = color.lane<0>() - color.lane<2>() ;
1000 	float s_base = color.lane<3>() ;
1001 
1002 	for (int mode = 0; mode < 5; mode++)
1003 	{
1004 		if (g_base > mode_cutoffs[mode][0] || b_base > mode_cutoffs[mode][0] || s_base > mode_cutoffs[mode][1])
1005 		{
1006 			continue;
1007 		}
1008 
1009 		// Encode the mode into a 4-bit vector
1010 		int mode_enc = mode < 4 ? (mode | (majcomp << 2)) : (majcomp | 0xC);
1011 
1012 		float mode_scale = mode_scales[mode];
1013 		float mode_rscale = mode_rscales[mode];
1014 
1015 		int gb_intcutoff = 1 << mode_bits[mode][1];
1016 		int s_intcutoff = 1 << mode_bits[mode][2];
1017 
1018 		// Quantize and unquantize R
1019 		int r_intval = astc::flt2int_rtn(r_base * mode_scale);
1020 
1021 		int r_lowbits = r_intval & 0x3f;
1022 
1023 		r_lowbits |= (mode_enc & 3) << 6;
1024 
1025 		uint8_t r_quantval;
1026 		quantize_and_unquantize_retain_top_two_bits(
1027 		    quant_level, static_cast<uint8_t>(r_lowbits), r_quantval);
1028 
1029 		r_intval = (r_intval & ~0x3f) | (r_quantval & 0x3f);
1030 		float r_fval = static_cast<float>(r_intval) * mode_rscale;
1031 
1032 		// Recompute G and B, then quantize and unquantize them
1033 		float g_fval = r_fval - color.lane<1>() ;
1034 		float b_fval = r_fval - color.lane<2>() ;
1035 
1036 		g_fval = astc::clamp(g_fval, 0.0f, 65535.0f);
1037 		b_fval = astc::clamp(b_fval, 0.0f, 65535.0f);
1038 
1039 		int g_intval = astc::flt2int_rtn(g_fval * mode_scale);
1040 		int b_intval = astc::flt2int_rtn(b_fval * mode_scale);
1041 
1042 		if (g_intval >= gb_intcutoff || b_intval >= gb_intcutoff)
1043 		{
1044 			continue;
1045 		}
1046 
1047 		int g_lowbits = g_intval & 0x1f;
1048 		int b_lowbits = b_intval & 0x1f;
1049 
1050 		int bit0 = 0;
1051 		int bit1 = 0;
1052 		int bit2 = 0;
1053 		int bit3 = 0;
1054 
1055 		switch (mode)
1056 		{
1057 		case 0:
1058 		case 2:
1059 			bit0 = (r_intval >> 9) & 1;
1060 			break;
1061 		case 1:
1062 		case 3:
1063 			bit0 = (r_intval >> 8) & 1;
1064 			break;
1065 		case 4:
1066 		case 5:
1067 			bit0 = (g_intval >> 6) & 1;
1068 			break;
1069 		}
1070 
1071 		switch (mode)
1072 		{
1073 		case 0:
1074 		case 1:
1075 		case 2:
1076 		case 3:
1077 			bit2 = (r_intval >> 7) & 1;
1078 			break;
1079 		case 4:
1080 		case 5:
1081 			bit2 = (b_intval >> 6) & 1;
1082 			break;
1083 		}
1084 
1085 		switch (mode)
1086 		{
1087 		case 0:
1088 		case 2:
1089 			bit1 = (r_intval >> 8) & 1;
1090 			break;
1091 		case 1:
1092 		case 3:
1093 		case 4:
1094 		case 5:
1095 			bit1 = (g_intval >> 5) & 1;
1096 			break;
1097 		}
1098 
1099 		switch (mode)
1100 		{
1101 		case 0:
1102 			bit3 = (r_intval >> 10) & 1;
1103 			break;
1104 		case 2:
1105 			bit3 = (r_intval >> 6) & 1;
1106 			break;
1107 		case 1:
1108 		case 3:
1109 		case 4:
1110 		case 5:
1111 			bit3 = (b_intval >> 5) & 1;
1112 			break;
1113 		}
1114 
1115 		g_lowbits |= (mode_enc & 0x4) << 5;
1116 		b_lowbits |= (mode_enc & 0x8) << 4;
1117 
1118 		g_lowbits |= bit0 << 6;
1119 		g_lowbits |= bit1 << 5;
1120 		b_lowbits |= bit2 << 6;
1121 		b_lowbits |= bit3 << 5;
1122 
1123 		uint8_t g_quantval;
1124 		uint8_t b_quantval;
1125 
1126 		quantize_and_unquantize_retain_top_four_bits(
1127 		    quant_level, static_cast<uint8_t>(g_lowbits), g_quantval);
1128 		quantize_and_unquantize_retain_top_four_bits(
1129 		    quant_level, static_cast<uint8_t>(b_lowbits), b_quantval);
1130 
1131 		g_intval = (g_intval & ~0x1f) | (g_quantval & 0x1f);
1132 		b_intval = (b_intval & ~0x1f) | (b_quantval & 0x1f);
1133 
1134 		g_fval = static_cast<float>(g_intval) * mode_rscale;
1135 		b_fval = static_cast<float>(b_intval) * mode_rscale;
1136 
1137 		// Recompute the scale value, based on the errors introduced to red, green and blue
1138 
1139 		// If the error is positive, then the R,G,B errors combined have raised the color
1140 		// value overall; as such, the scale value needs to be increased.
1141 		float rgb_errorsum = (r_fval - color.lane<0>() ) + (r_fval - g_fval - color.lane<1>() ) + (r_fval - b_fval - color.lane<2>() );
1142 
1143 		float s_fval = s_base + rgb_errorsum * (1.0f / 3.0f);
1144 		s_fval = astc::clamp(s_fval, 0.0f, 1e9f);
1145 
1146 		int s_intval = astc::flt2int_rtn(s_fval * mode_scale);
1147 
1148 		if (s_intval >= s_intcutoff)
1149 		{
1150 			continue;
1151 		}
1152 
1153 		int s_lowbits = s_intval & 0x1f;
1154 
1155 		int bit4;
1156 		int bit5;
1157 		int bit6;
1158 		switch (mode)
1159 		{
1160 		case 1:
1161 			bit6 = (r_intval >> 9) & 1;
1162 			break;
1163 		default:
1164 			bit6 = (s_intval >> 5) & 1;
1165 			break;
1166 		}
1167 
1168 		switch (mode)
1169 		{
1170 		case 4:
1171 			bit5 = (r_intval >> 7) & 1;
1172 			break;
1173 		case 1:
1174 			bit5 = (r_intval >> 10) & 1;
1175 			break;
1176 		default:
1177 			bit5 = (s_intval >> 6) & 1;
1178 			break;
1179 		}
1180 
1181 		switch (mode)
1182 		{
1183 		case 2:
1184 			bit4 = (s_intval >> 7) & 1;
1185 			break;
1186 		default:
1187 			bit4 = (r_intval >> 6) & 1;
1188 			break;
1189 		}
1190 
1191 		s_lowbits |= bit6 << 5;
1192 		s_lowbits |= bit5 << 6;
1193 		s_lowbits |= bit4 << 7;
1194 
1195 		uint8_t s_quantval;
1196 
1197 		quantize_and_unquantize_retain_top_four_bits(
1198 		    quant_level, static_cast<uint8_t>(s_lowbits), s_quantval);
1199 
1200 		output[0] = r_quantval;
1201 		output[1] = g_quantval;
1202 		output[2] = b_quantval;
1203 		output[3] = s_quantval;
1204 		return;
1205 	}
1206 
1207 	// Failed to encode any of the modes above? In that case encode using mode #5
1208 	float vals[4];
1209 	vals[0] = color_bak.lane<0>();
1210 	vals[1] = color_bak.lane<1>();
1211 	vals[2] = color_bak.lane<2>();
1212 	vals[3] = color_bak.lane<3>();
1213 
1214 	int ivals[4];
1215 	float cvals[3];
1216 
1217 	for (int i = 0; i < 3; i++)
1218 	{
1219 		vals[i] = astc::clamp(vals[i], 0.0f, 65020.0f);
1220 		ivals[i] = astc::flt2int_rtn(vals[i] * (1.0f / 512.0f));
1221 		cvals[i] = static_cast<float>(ivals[i]) * 512.0f;
1222 	}
1223 
1224 	float rgb_errorsum = (cvals[0] - vals[0]) + (cvals[1] - vals[1]) + (cvals[2] - vals[2]);
1225 	vals[3] += rgb_errorsum * (1.0f / 3.0f);
1226 
1227 	vals[3] = astc::clamp(vals[3], 0.0f, 65020.0f);
1228 	ivals[3] = astc::flt2int_rtn(vals[3] * (1.0f / 512.0f));
1229 
1230 	int encvals[4];
1231 	encvals[0] = (ivals[0] & 0x3f) | 0xC0;
1232 	encvals[1] = (ivals[1] & 0x7f) | 0x80;
1233 	encvals[2] = (ivals[2] & 0x7f) | 0x80;
1234 	encvals[3] = (ivals[3] & 0x7f) | ((ivals[0] & 0x40) << 1);
1235 
1236 	for (uint8_t i = 0; i < 4; i++)
1237 	{
1238 		quantize_and_unquantize_retain_top_four_bits(
1239 		    quant_level, static_cast<uint8_t>(encvals[i]), output[i]);
1240 	}
1241 
1242 	return;
1243 }
1244 
1245 /**
1246  * @brief Quantize a HDR RGB color using direct RGB encoding.
1247  *
1248  * @param      color0        The input unquantized color0 endpoint.
1249  * @param      color1        The input unquantized color1 endpoint.
1250  * @param[out] output        The output endpoints, returned as packed RGB+RGB pairs with mode bits.
1251  * @param      quant_level   The quantization level to use.
1252  */
quantize_hdr_rgb(vfloat4 color0,vfloat4 color1,uint8_t output[6],quant_method quant_level)1253 static void quantize_hdr_rgb(
1254 	vfloat4 color0,
1255 	vfloat4 color1,
1256 	uint8_t output[6],
1257 	quant_method quant_level
1258 ) {
1259 	// Note: color*.lane<3> is not used so we can ignore it
1260 	color0 = clamp(0.0f, 65535.0f, color0);
1261 	color1 = clamp(0.0f, 65535.0f, color1);
1262 
1263 	vfloat4 color0_bak = color0;
1264 	vfloat4 color1_bak = color1;
1265 
1266 	int majcomp;
1267 	if (color1.lane<0>() > color1.lane<1>() && color1.lane<0>() > color1.lane<2>())
1268 	{
1269 		majcomp = 0;
1270 	}
1271 	else if (color1.lane<1>() > color1.lane<2>())
1272 	{
1273 		majcomp = 1;
1274 	}
1275 	else
1276 	{
1277 		majcomp = 2;
1278 	}
1279 
1280 	// Swizzle the components
1281 	switch (majcomp)
1282 	{
1283 	case 1:  // red-green swap
1284 		color0 = color0.swz<1, 0, 2, 3>();
1285 		color1 = color1.swz<1, 0, 2, 3>();
1286 		break;
1287 	case 2:  // red-blue swap
1288 		color0 = color0.swz<2, 1, 0, 3>();
1289 		color1 = color1.swz<2, 1, 0, 3>();
1290 		break;
1291 	default:
1292 		break;
1293 	}
1294 
1295 	float a_base = color1.lane<0>();
1296 	a_base = astc::clamp(a_base, 0.0f, 65535.0f);
1297 
1298 	float b0_base = a_base - color1.lane<1>();
1299 	float b1_base = a_base - color1.lane<2>();
1300 	float c_base = a_base - color0.lane<0>();
1301 	float d0_base = a_base - b0_base - c_base - color0.lane<1>();
1302 	float d1_base = a_base - b1_base - c_base - color0.lane<2>();
1303 
1304 	// Number of bits in the various fields in the various modes
1305 	static const int mode_bits[8][4] {
1306 		{9, 7, 6, 7},
1307 		{9, 8, 6, 6},
1308 		{10, 6, 7, 7},
1309 		{10, 7, 7, 6},
1310 		{11, 8, 6, 5},
1311 		{11, 6, 8, 6},
1312 		{12, 7, 7, 5},
1313 		{12, 6, 7, 6}
1314 	};
1315 
1316 	// Cutoffs to use for the computed values of a,b,c,d, assuming the
1317 	// range 0..65535 are LNS values corresponding to fp16.
1318 	static const float mode_cutoffs[8][4] {
1319 		{16384, 8192, 8192, 8},	// mode 0: 9,7,6,7
1320 		{32768, 8192, 4096, 8},	// mode 1: 9,8,6,6
1321 		{4096, 8192, 4096, 4},	// mode 2: 10,6,7,7
1322 		{8192, 8192, 2048, 4},	// mode 3: 10,7,7,6
1323 		{8192, 2048, 512, 2},	// mode 4: 11,8,6,5
1324 		{2048, 8192, 1024, 2},	// mode 5: 11,6,8,6
1325 		{2048, 2048, 256, 1},	// mode 6: 12,7,7,5
1326 		{1024, 2048, 512, 1},	// mode 7: 12,6,7,6
1327 	};
1328 
1329 	static const float mode_scales[8] {
1330 		1.0f / 128.0f,
1331 		1.0f / 128.0f,
1332 		1.0f / 64.0f,
1333 		1.0f / 64.0f,
1334 		1.0f / 32.0f,
1335 		1.0f / 32.0f,
1336 		1.0f / 16.0f,
1337 		1.0f / 16.0f,
1338 	};
1339 
1340 	// Scaling factors when going from what was encoded in the mode to 16 bits.
1341 	static const float mode_rscales[8] {
1342 		128.0f,
1343 		128.0f,
1344 		64.0f,
1345 		64.0f,
1346 		32.0f,
1347 		32.0f,
1348 		16.0f,
1349 		16.0f
1350 	};
1351 
1352 	// Try modes one by one, with the highest-precision mode first.
1353 	for (int mode = 7; mode >= 0; mode--)
1354 	{
1355 		// For each mode, test if we can in fact accommodate the computed b, c, and d values.
1356 		// If we clearly can't, then we skip to the next mode.
1357 
1358 		float b_cutoff = mode_cutoffs[mode][0];
1359 		float c_cutoff = mode_cutoffs[mode][1];
1360 		float d_cutoff = mode_cutoffs[mode][2];
1361 
1362 		if (b0_base > b_cutoff || b1_base > b_cutoff || c_base > c_cutoff || fabsf(d0_base) > d_cutoff || fabsf(d1_base) > d_cutoff)
1363 		{
1364 			continue;
1365 		}
1366 
1367 		float mode_scale = mode_scales[mode];
1368 		float mode_rscale = mode_rscales[mode];
1369 
1370 		int b_intcutoff = 1 << mode_bits[mode][1];
1371 		int c_intcutoff = 1 << mode_bits[mode][2];
1372 		int d_intcutoff = 1 << (mode_bits[mode][3] - 1);
1373 
1374 		// Quantize and unquantize A, with the assumption that its high bits can be handled safely.
1375 		int a_intval = astc::flt2int_rtn(a_base * mode_scale);
1376 		int a_lowbits = a_intval & 0xFF;
1377 
1378 		int a_quantval = quant_color(quant_level, a_lowbits);
1379 		int a_uquantval = a_quantval;
1380 		a_intval = (a_intval & ~0xFF) | a_uquantval;
1381 		float a_fval = static_cast<float>(a_intval) * mode_rscale;
1382 
1383 		// Recompute C, then quantize and unquantize it
1384 		float c_fval = a_fval - color0.lane<0>();
1385 		c_fval = astc::clamp(c_fval, 0.0f, 65535.0f);
1386 
1387 		int c_intval = astc::flt2int_rtn(c_fval * mode_scale);
1388 
1389 		if (c_intval >= c_intcutoff)
1390 		{
1391 			continue;
1392 		}
1393 
1394 		int c_lowbits = c_intval & 0x3f;
1395 
1396 		c_lowbits |= (mode & 1) << 7;
1397 		c_lowbits |= (a_intval & 0x100) >> 2;
1398 
1399 		uint8_t c_quantval;
1400 
1401 		quantize_and_unquantize_retain_top_two_bits(
1402 		    quant_level, static_cast<uint8_t>(c_lowbits), c_quantval);
1403 
1404 		c_intval = (c_intval & ~0x3F) | (c_quantval & 0x3F);
1405 		c_fval = static_cast<float>(c_intval) * mode_rscale;
1406 
1407 		// Recompute B0 and B1, then quantize and unquantize them
1408 		float b0_fval = a_fval - color1.lane<1>();
1409 		float b1_fval = a_fval - color1.lane<2>();
1410 
1411 		b0_fval = astc::clamp(b0_fval, 0.0f, 65535.0f);
1412 		b1_fval = astc::clamp(b1_fval, 0.0f, 65535.0f);
1413 		int b0_intval = astc::flt2int_rtn(b0_fval * mode_scale);
1414 		int b1_intval = astc::flt2int_rtn(b1_fval * mode_scale);
1415 
1416 		if (b0_intval >= b_intcutoff || b1_intval >= b_intcutoff)
1417 		{
1418 			continue;
1419 		}
1420 
1421 		int b0_lowbits = b0_intval & 0x3f;
1422 		int b1_lowbits = b1_intval & 0x3f;
1423 
1424 		int bit0 = 0;
1425 		int bit1 = 0;
1426 		switch (mode)
1427 		{
1428 		case 0:
1429 		case 1:
1430 		case 3:
1431 		case 4:
1432 		case 6:
1433 			bit0 = (b0_intval >> 6) & 1;
1434 			break;
1435 		case 2:
1436 		case 5:
1437 		case 7:
1438 			bit0 = (a_intval >> 9) & 1;
1439 			break;
1440 		}
1441 
1442 		switch (mode)
1443 		{
1444 		case 0:
1445 		case 1:
1446 		case 3:
1447 		case 4:
1448 		case 6:
1449 			bit1 = (b1_intval >> 6) & 1;
1450 			break;
1451 		case 2:
1452 			bit1 = (c_intval >> 6) & 1;
1453 			break;
1454 		case 5:
1455 		case 7:
1456 			bit1 = (a_intval >> 10) & 1;
1457 			break;
1458 		}
1459 
1460 		b0_lowbits |= bit0 << 6;
1461 		b1_lowbits |= bit1 << 6;
1462 
1463 		b0_lowbits |= ((mode >> 1) & 1) << 7;
1464 		b1_lowbits |= ((mode >> 2) & 1) << 7;
1465 
1466 		uint8_t b0_quantval;
1467 		uint8_t b1_quantval;
1468 
1469 		quantize_and_unquantize_retain_top_two_bits(
1470 		    quant_level, static_cast<uint8_t>(b0_lowbits), b0_quantval);
1471 		quantize_and_unquantize_retain_top_two_bits(
1472 		    quant_level, static_cast<uint8_t>(b1_lowbits), b1_quantval);
1473 
1474 		b0_intval = (b0_intval & ~0x3f) | (b0_quantval & 0x3f);
1475 		b1_intval = (b1_intval & ~0x3f) | (b1_quantval & 0x3f);
1476 		b0_fval = static_cast<float>(b0_intval) * mode_rscale;
1477 		b1_fval = static_cast<float>(b1_intval) * mode_rscale;
1478 
1479 		// Recompute D0 and D1, then quantize and unquantize them
1480 		float d0_fval = a_fval - b0_fval - c_fval - color0.lane<1>();
1481 		float d1_fval = a_fval - b1_fval - c_fval - color0.lane<2>();
1482 
1483 		d0_fval = astc::clamp(d0_fval, -65535.0f, 65535.0f);
1484 		d1_fval = astc::clamp(d1_fval, -65535.0f, 65535.0f);
1485 
1486 		int d0_intval = astc::flt2int_rtn(d0_fval * mode_scale);
1487 		int d1_intval = astc::flt2int_rtn(d1_fval * mode_scale);
1488 
1489 		if (abs(d0_intval) >= d_intcutoff || abs(d1_intval) >= d_intcutoff)
1490 		{
1491 			continue;
1492 		}
1493 
1494 		int d0_lowbits = d0_intval & 0x1f;
1495 		int d1_lowbits = d1_intval & 0x1f;
1496 
1497 		int bit2 = 0;
1498 		int bit3 = 0;
1499 		int bit4;
1500 		int bit5;
1501 		switch (mode)
1502 		{
1503 		case 0:
1504 		case 2:
1505 			bit2 = (d0_intval >> 6) & 1;
1506 			break;
1507 		case 1:
1508 		case 4:
1509 			bit2 = (b0_intval >> 7) & 1;
1510 			break;
1511 		case 3:
1512 			bit2 = (a_intval >> 9) & 1;
1513 			break;
1514 		case 5:
1515 			bit2 = (c_intval >> 7) & 1;
1516 			break;
1517 		case 6:
1518 		case 7:
1519 			bit2 = (a_intval >> 11) & 1;
1520 			break;
1521 		}
1522 		switch (mode)
1523 		{
1524 		case 0:
1525 		case 2:
1526 			bit3 = (d1_intval >> 6) & 1;
1527 			break;
1528 		case 1:
1529 		case 4:
1530 			bit3 = (b1_intval >> 7) & 1;
1531 			break;
1532 		case 3:
1533 		case 5:
1534 		case 6:
1535 		case 7:
1536 			bit3 = (c_intval >> 6) & 1;
1537 			break;
1538 		}
1539 
1540 		switch (mode)
1541 		{
1542 		case 4:
1543 		case 6:
1544 			bit4 = (a_intval >> 9) & 1;
1545 			bit5 = (a_intval >> 10) & 1;
1546 			break;
1547 		default:
1548 			bit4 = (d0_intval >> 5) & 1;
1549 			bit5 = (d1_intval >> 5) & 1;
1550 			break;
1551 		}
1552 
1553 		d0_lowbits |= bit2 << 6;
1554 		d1_lowbits |= bit3 << 6;
1555 		d0_lowbits |= bit4 << 5;
1556 		d1_lowbits |= bit5 << 5;
1557 
1558 		d0_lowbits |= (majcomp & 1) << 7;
1559 		d1_lowbits |= ((majcomp >> 1) & 1) << 7;
1560 
1561 		uint8_t d0_quantval;
1562 		uint8_t d1_quantval;
1563 
1564 		quantize_and_unquantize_retain_top_four_bits(
1565 		    quant_level, static_cast<uint8_t>(d0_lowbits), d0_quantval);
1566 		quantize_and_unquantize_retain_top_four_bits(
1567 		    quant_level, static_cast<uint8_t>(d1_lowbits), d1_quantval);
1568 
1569 		output[0] = static_cast<uint8_t>(a_quantval);
1570 		output[1] = c_quantval;
1571 		output[2] = b0_quantval;
1572 		output[3] = b1_quantval;
1573 		output[4] = d0_quantval;
1574 		output[5] = d1_quantval;
1575 		return;
1576 	}
1577 
1578 	// If neither of the modes fit we will use a flat representation for storing data, using 8 bits
1579 	// for red and green, and 7 bits for blue. This gives color accuracy roughly similar to LDR
1580 	// 4:4:3 which is not at all great but usable. This representation is used if the light color is
1581 	// more than 4x the color value of the dark color.
1582 	float vals[6];
1583 	vals[0] = color0_bak.lane<0>();
1584 	vals[1] = color1_bak.lane<0>();
1585 	vals[2] = color0_bak.lane<1>();
1586 	vals[3] = color1_bak.lane<1>();
1587 	vals[4] = color0_bak.lane<2>();
1588 	vals[5] = color1_bak.lane<2>();
1589 
1590 	for (int i = 0; i < 6; i++)
1591 	{
1592 		vals[i] = astc::clamp(vals[i], 0.0f, 65020.0f);
1593 	}
1594 
1595 	for (int i = 0; i < 4; i++)
1596 	{
1597 		int idx = astc::flt2int_rtn(vals[i] * 1.0f / 256.0f);
1598 		output[i] = quant_color(quant_level, idx);
1599 	}
1600 
1601 	for (int i = 4; i < 6; i++)
1602 	{
1603 		int idx = astc::flt2int_rtn(vals[i] * 1.0f / 512.0f) + 128;
1604 		quantize_and_unquantize_retain_top_two_bits(
1605 		    quant_level, static_cast<uint8_t>(idx), output[i]);
1606 	}
1607 
1608 	return;
1609 }
1610 
1611 /**
1612  * @brief Quantize a HDR RGB + LDR A color using direct RGBA encoding.
1613  *
1614  * @param      color0        The input unquantized color0 endpoint.
1615  * @param      color1        The input unquantized color1 endpoint.
1616  * @param[out] output        The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
1617  * @param      quant_level   The quantization level to use.
1618  */
quantize_hdr_rgb_ldr_alpha(vfloat4 color0,vfloat4 color1,uint8_t output[8],quant_method quant_level)1619 static void quantize_hdr_rgb_ldr_alpha(
1620 	vfloat4 color0,
1621 	vfloat4 color1,
1622 	uint8_t output[8],
1623 	quant_method quant_level
1624 ) {
1625 	float scale = 1.0f / 257.0f;
1626 
1627 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
1628 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
1629 
1630 	output[6] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
1631 	output[7] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
1632 
1633 	quantize_hdr_rgb(color0, color1, output, quant_level);
1634 }
1635 
1636 /**
1637  * @brief Quantize a HDR L color using the large range encoding.
1638  *
1639  * @param      color0        The input unquantized color0 endpoint.
1640  * @param      color1        The input unquantized color1 endpoint.
1641  * @param[out] output        The output endpoints, returned as packed (l0, l1).
1642  * @param      quant_level   The quantization level to use.
1643  */
quantize_hdr_luminance_large_range(vfloat4 color0,vfloat4 color1,uint8_t output[2],quant_method quant_level)1644 static void quantize_hdr_luminance_large_range(
1645 	vfloat4 color0,
1646 	vfloat4 color1,
1647 	uint8_t output[2],
1648 	quant_method quant_level
1649 ) {
1650 	float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
1651 	float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
1652 
1653 	if (lum1 < lum0)
1654 	{
1655 		float avg = (lum0 + lum1) * 0.5f;
1656 		lum0 = avg;
1657 		lum1 = avg;
1658 	}
1659 
1660 	int ilum1 = astc::flt2int_rtn(lum1);
1661 	int ilum0 = astc::flt2int_rtn(lum0);
1662 
1663 	// Find the closest encodable point in the upper half of the code-point space
1664 	int upper_v0 = (ilum0 + 128) >> 8;
1665 	int upper_v1 = (ilum1 + 128) >> 8;
1666 
1667 	upper_v0 = astc::clamp(upper_v0, 0, 255);
1668 	upper_v1 = astc::clamp(upper_v1, 0, 255);
1669 
1670 	// Find the closest encodable point in the lower half of the code-point space
1671 	int lower_v0 = (ilum1 + 256) >> 8;
1672 	int lower_v1 = ilum0 >> 8;
1673 
1674 	lower_v0 = astc::clamp(lower_v0, 0, 255);
1675 	lower_v1 = astc::clamp(lower_v1, 0, 255);
1676 
1677 	// Determine the distance between the point in code-point space and the input value
1678 	int upper0_dec = upper_v0 << 8;
1679 	int upper1_dec = upper_v1 << 8;
1680 	int lower0_dec = (lower_v1 << 8) + 128;
1681 	int lower1_dec = (lower_v0 << 8) - 128;
1682 
1683 	int upper0_diff = upper0_dec - ilum0;
1684 	int upper1_diff = upper1_dec - ilum1;
1685 	int lower0_diff = lower0_dec - ilum0;
1686 	int lower1_diff = lower1_dec - ilum1;
1687 
1688 	int upper_error = (upper0_diff * upper0_diff) + (upper1_diff * upper1_diff);
1689 	int lower_error = (lower0_diff * lower0_diff) + (lower1_diff * lower1_diff);
1690 
1691 	int v0, v1;
1692 	if (upper_error < lower_error)
1693 	{
1694 		v0 = upper_v0;
1695 		v1 = upper_v1;
1696 	}
1697 	else
1698 	{
1699 		v0 = lower_v0;
1700 		v1 = lower_v1;
1701 	}
1702 
1703 	// OK; encode
1704 	output[0] = quant_color(quant_level, v0);
1705 	output[1] = quant_color(quant_level, v1);
1706 }
1707 
1708 /**
1709  * @brief Quantize a HDR L color using the small range encoding.
1710  *
1711  * @param      color0        The input unquantized color0 endpoint.
1712  * @param      color1        The input unquantized color1 endpoint.
1713  * @param[out] output        The output endpoints, returned as packed (l0, l1) with mode bits.
1714  * @param      quant_level   The quantization level to use.
1715  *
1716  * @return Returns @c false on failure, @c true on success.
1717  */
try_quantize_hdr_luminance_small_range(vfloat4 color0,vfloat4 color1,uint8_t output[2],quant_method quant_level)1718 static bool try_quantize_hdr_luminance_small_range(
1719 	vfloat4 color0,
1720 	vfloat4 color1,
1721 	uint8_t output[2],
1722 	quant_method quant_level
1723 ) {
1724 	float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
1725 	float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
1726 
1727 	if (lum1 < lum0)
1728 	{
1729 		float avg = (lum0 + lum1) * 0.5f;
1730 		lum0 = avg;
1731 		lum1 = avg;
1732 	}
1733 
1734 	int ilum1 = astc::flt2int_rtn(lum1);
1735 	int ilum0 = astc::flt2int_rtn(lum0);
1736 
1737 	// Difference of more than a factor-of-2 results in immediate failure
1738 	if (ilum1 - ilum0 > 2048)
1739 	{
1740 		return false;
1741 	}
1742 
1743 	int lowval, highval, diffval;
1744 	int v0, v1;
1745 	int v0e, v1e;
1746 	int v0d, v1d;
1747 
1748 	// Try to encode the high-precision submode
1749 	lowval = (ilum0 + 16) >> 5;
1750 	highval = (ilum1 + 16) >> 5;
1751 
1752 	lowval = astc::clamp(lowval, 0, 2047);
1753 	highval = astc::clamp(highval, 0, 2047);
1754 
1755 	v0 = lowval & 0x7F;
1756 	v0e = quant_color(quant_level, v0);
1757 	v0d = v0e;
1758 
1759 	if (v0d < 0x80)
1760 	{
1761 		lowval = (lowval & ~0x7F) | v0d;
1762 		diffval = highval - lowval;
1763 		if (diffval >= 0 && diffval <= 15)
1764 		{
1765 			v1 = ((lowval >> 3) & 0xF0) | diffval;
1766 			v1e = quant_color(quant_level, v1);
1767 			v1d = v1e;
1768 			if ((v1d & 0xF0) == (v1 & 0xF0))
1769 			{
1770 				output[0] = static_cast<uint8_t>(v0e);
1771 				output[1] = static_cast<uint8_t>(v1e);
1772 				return true;
1773 			}
1774 		}
1775 	}
1776 
1777 	// Try to encode the low-precision submode
1778 	lowval = (ilum0 + 32) >> 6;
1779 	highval = (ilum1 + 32) >> 6;
1780 
1781 	lowval = astc::clamp(lowval, 0, 1023);
1782 	highval = astc::clamp(highval, 0, 1023);
1783 
1784 	v0 = (lowval & 0x7F) | 0x80;
1785 	v0e = quant_color(quant_level, v0);
1786 	v0d = v0e;
1787 	if ((v0d & 0x80) == 0)
1788 	{
1789 		return false;
1790 	}
1791 
1792 	lowval = (lowval & ~0x7F) | (v0d & 0x7F);
1793 	diffval = highval - lowval;
1794 	if (diffval < 0 || diffval > 31)
1795 	{
1796 		return false;
1797 	}
1798 
1799 	v1 = ((lowval >> 2) & 0xE0) | diffval;
1800 	v1e = quant_color(quant_level, v1);
1801 	v1d = v1e;
1802 	if ((v1d & 0xE0) != (v1 & 0xE0))
1803 	{
1804 		return false;
1805 	}
1806 
1807 	output[0] = static_cast<uint8_t>(v0e);
1808 	output[1] = static_cast<uint8_t>(v1e);
1809 	return true;
1810 }
1811 
1812 /**
1813  * @brief Quantize a HDR A color using either delta or direct RGBA encoding.
1814  *
1815  * @param      alpha0        The input unquantized color0 endpoint.
1816  * @param      alpha1        The input unquantized color1 endpoint.
1817  * @param[out] output        The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
1818  * @param      quant_level   The quantization level to use.
1819  */
quantize_hdr_alpha(float alpha0,float alpha1,uint8_t output[2],quant_method quant_level)1820 static void quantize_hdr_alpha(
1821 	float alpha0,
1822 	float alpha1,
1823 	uint8_t output[2],
1824 	quant_method quant_level
1825 ) {
1826 	alpha0 = astc::clamp(alpha0, 0.0f, 65280.0f);
1827 	alpha1 = astc::clamp(alpha1, 0.0f, 65280.0f);
1828 
1829 	int ialpha0 = astc::flt2int_rtn(alpha0);
1830 	int ialpha1 = astc::flt2int_rtn(alpha1);
1831 
1832 	int val0, val1, diffval;
1833 	int v6, v7;
1834 	int v6e, v7e;
1835 	int v6d, v7d;
1836 
1837 	// Try to encode one of the delta submodes, in decreasing-precision order
1838 	for (int i = 2; i >= 0; i--)
1839 	{
1840 		val0 = (ialpha0 + (128 >> i)) >> (8 - i);
1841 		val1 = (ialpha1 + (128 >> i)) >> (8 - i);
1842 
1843 		v6 = (val0 & 0x7F) | ((i & 1) << 7);
1844 		v6e = quant_color(quant_level, v6);
1845 		v6d = v6e;
1846 
1847 		if ((v6 ^ v6d) & 0x80)
1848 		{
1849 			continue;
1850 		}
1851 
1852 		val0 = (val0 & ~0x7f) | (v6d & 0x7f);
1853 		diffval = val1 - val0;
1854 		int cutoff = 32 >> i;
1855 		int mask = 2 * cutoff - 1;
1856 
1857 		if (diffval < -cutoff || diffval >= cutoff)
1858 		{
1859 			continue;
1860 		}
1861 
1862 		v7 = ((i & 2) << 6) | ((val0 >> 7) << (6 - i)) | (diffval & mask);
1863 		v7e = quant_color(quant_level, v7);
1864 		v7d = v7e;
1865 
1866 		static const int testbits[3] { 0xE0, 0xF0, 0xF8 };
1867 
1868 		if ((v7 ^ v7d) & testbits[i])
1869 		{
1870 			continue;
1871 		}
1872 
1873 		output[0] = static_cast<uint8_t>(v6e);
1874 		output[1] = static_cast<uint8_t>(v7e);
1875 		return;
1876 	}
1877 
1878 	// Could not encode any of the delta modes; instead encode a flat value
1879 	val0 = (ialpha0 + 256) >> 9;
1880 	val1 = (ialpha1 + 256) >> 9;
1881 	v6 = val0 | 0x80;
1882 	v7 = val1 | 0x80;
1883 
1884 	output[0] = quant_color(quant_level, v6);
1885 	output[1] = quant_color(quant_level, v7);
1886 
1887 	return;
1888 }
1889 
1890 /**
1891  * @brief Quantize a HDR RGBA color using either delta or direct RGBA encoding.
1892  *
1893  * @param      color0        The input unquantized color0 endpoint.
1894  * @param      color1        The input unquantized color1 endpoint.
1895  * @param[out] output        The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
1896  * @param      quant_level   The quantization level to use.
1897  */
quantize_hdr_rgb_alpha(vfloat4 color0,vfloat4 color1,uint8_t output[8],quant_method quant_level)1898 static void quantize_hdr_rgb_alpha(
1899 	vfloat4 color0,
1900 	vfloat4 color1,
1901 	uint8_t output[8],
1902 	quant_method quant_level
1903 ) {
1904 	quantize_hdr_rgb(color0, color1, output, quant_level);
1905 	quantize_hdr_alpha(color0.lane<3>(), color1.lane<3>(), output + 6, quant_level);
1906 }
1907 
1908 /* See header for documentation. */
pack_color_endpoints(QualityProfile privateProfile,vfloat4 color0,vfloat4 color1,vfloat4 rgbs_color,vfloat4 rgbo_color,int format,uint8_t * output,quant_method quant_level)1909 uint8_t pack_color_endpoints(
1910 	QualityProfile privateProfile,
1911 	vfloat4 color0,
1912 	vfloat4 color1,
1913 	vfloat4 rgbs_color,
1914 	vfloat4 rgbo_color,
1915 	int format,
1916 	uint8_t* output,
1917 	quant_method quant_level
1918 ) {
1919 	assert(QUANT_6 <= quant_level && quant_level <= QUANT_256);
1920 
1921 	// Clamp colors to a valid LDR range
1922 	// Note that HDR has a lower max, handled in the conversion functions
1923 	color0 = clamp(0.0f, 65535.0f, color0);
1924 	color1 = clamp(0.0f, 65535.0f, color1);
1925 
1926 	// Pre-scale the LDR value we need to the 0-255 quantizable range
1927 	vfloat4 color0_ldr = color0 * (1.0f  / 257.0f);
1928 	vfloat4 color1_ldr = color1 * (1.0f  / 257.0f);
1929 
1930 	uint8_t retval = 0;
1931 	float best_error = ERROR_CALC_DEFAULT;
1932 	vint4 color0_out, color1_out;
1933 	vint4 color0_out2, color1_out2;
1934 
1935 	switch (format)
1936 	{
1937 	case FMT_RGB:
1938 		if (quant_level <= QUANT_160)
1939 		{
1940 			if (try_quantize_rgb_delta_blue_contract(color0_ldr, color1_ldr, color0_out, color1_out, quant_level))
1941 			{
1942 				vint4 color0_unpack;
1943 				vint4 color1_unpack;
1944 				rgba_delta_unpack(color0_out, color1_out, color0_unpack, color1_unpack);
1945 
1946 				retval = FMT_RGB_DELTA;
1947 				best_error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
1948 			}
1949 
1950 			if (try_quantize_rgb_delta(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level))
1951 			{
1952 				vint4 color0_unpack;
1953 				vint4 color1_unpack;
1954 				rgba_delta_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
1955 
1956 				float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
1957 				if (error < best_error)
1958 				{
1959 					retval = FMT_RGB_DELTA;
1960 					best_error = error;
1961 					color0_out = color0_out2;
1962 					color1_out = color1_out2;
1963 				}
1964 			}
1965 		}
1966 
1967 		if (quant_level < QUANT_256)
1968 		{
1969 			if (try_quantize_rgb_blue_contract(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level))
1970 			{
1971 				vint4 color0_unpack;
1972 				vint4 color1_unpack;
1973 				rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
1974 
1975 				float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
1976 				if (error < best_error)
1977 				{
1978 					retval = FMT_RGB;
1979 					best_error = error;
1980 					color0_out = color0_out2;
1981 					color1_out = color1_out2;
1982 				}
1983 			}
1984 		}
1985 
1986 		{
1987 			quantize_rgb(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level);
1988 
1989 			vint4 color0_unpack;
1990 			vint4 color1_unpack;
1991 			rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
1992 
1993 			float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
1994 			if (error < best_error)
1995 			{
1996 				retval =  FMT_RGB;
1997 				color0_out = color0_out2;
1998 				color1_out = color1_out2;
1999 			}
2000 		}
2001 
2002 		// TODO: Can we vectorize this?
2003 		output[0] = static_cast<uint8_t>(color0_out.lane<0>());
2004 		output[1] = static_cast<uint8_t>(color1_out.lane<0>());
2005 		output[2] = static_cast<uint8_t>(color0_out.lane<1>());
2006 		output[3] = static_cast<uint8_t>(color1_out.lane<1>());
2007 		output[4] = static_cast<uint8_t>(color0_out.lane<2>());
2008 		output[5] = static_cast<uint8_t>(color1_out.lane<2>());
2009 		break;
2010 
2011 	case FMT_RGBA:
2012 		if ((privateProfile == HIGH_QUALITY_PROFILE) && (quant_level <= QUANT_160)) // only full quality profile to try
2013 		{
2014 			if (try_quantize_rgba_delta_blue_contract(color0_ldr, color1_ldr, color0_out, color1_out, quant_level))
2015 			{
2016 				vint4 color0_unpack;
2017 				vint4 color1_unpack;
2018 				rgba_delta_unpack(color0_out, color1_out, color0_unpack, color1_unpack);
2019 
2020 				retval = FMT_RGBA_DELTA;
2021 				best_error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
2022 			}
2023 
2024 			if (try_quantize_rgba_delta(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level))
2025 			{
2026 				vint4 color0_unpack;
2027 				vint4 color1_unpack;
2028 				rgba_delta_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
2029 
2030 				float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
2031 				if (error < best_error)
2032 				{
2033 					retval = FMT_RGBA_DELTA;
2034 					best_error = error;
2035 					color0_out = color0_out2;
2036 					color1_out = color1_out2;
2037 				}
2038 			}
2039 		}
2040 
2041 		if (quant_level < QUANT_256)
2042 		{
2043 			if (try_quantize_rgba_blue_contract(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level))
2044 			{
2045 				vint4 color0_unpack;
2046 				vint4 color1_unpack;
2047 				rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
2048 
2049 				float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
2050 				if (error < best_error)
2051 				{
2052 					retval = FMT_RGBA;
2053 					best_error = error;
2054 					color0_out = color0_out2;
2055 					color1_out = color1_out2;
2056 				}
2057 			}
2058 		}
2059 
2060 		{
2061 			quantize_rgba(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level);
2062 
2063 			vint4 color0_unpack;
2064 			vint4 color1_unpack;
2065 			rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
2066 
2067 			float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
2068 			if (error < best_error)
2069 			{
2070 				retval =  FMT_RGBA;
2071 				color0_out = color0_out2;
2072 				color1_out = color1_out2;
2073 			}
2074 		}
2075 
2076 		// TODO: Can we vectorize this?
2077 		output[0] = static_cast<uint8_t>(color0_out.lane<0>());
2078 		output[1] = static_cast<uint8_t>(color1_out.lane<0>());
2079 		output[2] = static_cast<uint8_t>(color0_out.lane<1>());
2080 		output[3] = static_cast<uint8_t>(color1_out.lane<1>());
2081 		output[4] = static_cast<uint8_t>(color0_out.lane<2>());
2082 		output[5] = static_cast<uint8_t>(color1_out.lane<2>());
2083 		output[6] = static_cast<uint8_t>(color0_out.lane<3>());
2084 		output[7] = static_cast<uint8_t>(color1_out.lane<3>());
2085 		break;
2086 
2087 	case FMT_RGB_SCALE:
2088 		quantize_rgbs(rgbs_color, output, quant_level);
2089 		retval = FMT_RGB_SCALE;
2090 		break;
2091 
2092 	case FMT_HDR_RGB_SCALE:
2093 		quantize_hdr_rgbo(rgbo_color, output, quant_level);
2094 		retval = FMT_HDR_RGB_SCALE;
2095 		break;
2096 
2097 	case FMT_HDR_RGB:
2098 		quantize_hdr_rgb(color0, color1, output, quant_level);
2099 		retval = FMT_HDR_RGB;
2100 		break;
2101 
2102 	case FMT_RGB_SCALE_ALPHA:
2103 		quantize_rgbs_alpha(color0_ldr, color1_ldr, rgbs_color, output, quant_level);
2104 		retval = FMT_RGB_SCALE_ALPHA;
2105 		break;
2106 
2107 	case FMT_HDR_LUMINANCE_SMALL_RANGE:
2108 	case FMT_HDR_LUMINANCE_LARGE_RANGE:
2109 		if (try_quantize_hdr_luminance_small_range(color0, color1, output, quant_level))
2110 		{
2111 			retval = FMT_HDR_LUMINANCE_SMALL_RANGE;
2112 			break;
2113 		}
2114 		quantize_hdr_luminance_large_range(color0, color1, output, quant_level);
2115 		retval = FMT_HDR_LUMINANCE_LARGE_RANGE;
2116 		break;
2117 
2118 	case FMT_LUMINANCE:
2119 		quantize_luminance(color0_ldr, color1_ldr, output, quant_level);
2120 		retval = FMT_LUMINANCE;
2121 		break;
2122 
2123 	case FMT_LUMINANCE_ALPHA:
2124 		if (quant_level <= 18)
2125 		{
2126 			if (try_quantize_luminance_alpha_delta(color0_ldr, color1_ldr, output, quant_level))
2127 			{
2128 				retval = FMT_LUMINANCE_ALPHA_DELTA;
2129 				break;
2130 			}
2131 		}
2132 		quantize_luminance_alpha(color0_ldr, color1_ldr, output, quant_level);
2133 		retval = FMT_LUMINANCE_ALPHA;
2134 		break;
2135 
2136 	case FMT_HDR_RGB_LDR_ALPHA:
2137 		quantize_hdr_rgb_ldr_alpha(color0, color1, output, quant_level);
2138 		retval = FMT_HDR_RGB_LDR_ALPHA;
2139 		break;
2140 
2141 	case FMT_HDR_RGBA:
2142 		quantize_hdr_rgb_alpha(color0, color1, output, quant_level);
2143 		retval = FMT_HDR_RGBA;
2144 		break;
2145 	}
2146 
2147 	return retval;
2148 }
2149 
2150 #endif
2151