• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2023 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 #if !defined(ASTCENC_DECOMPRESS_ONLY)
19 
20 /**
21  * @brief Functions for color quantization.
22  *
23  * The design of the color quantization functionality requires the caller to use higher level error
24  * analysis to determine the base encoding that should be used. This earlier analysis will select
25  * the basic type of the endpoint that should be used:
26  *
27  *     * Mode: LDR or HDR
28  *     * Quantization level
29  *     * Channel count: L, LA, RGB, or RGBA
30  *     * Endpoint 2 type: Direct color endcode, or scaled from endpoint 1.
31  *
32  * However, this leaves a number of decisions about exactly how to pack the endpoints open. In
33  * particular we need to determine if blue contraction can be used, or/and if delta encoding can be
34  * used. If they can be applied these will allow us to maintain higher precision in the endpoints
35  * without needing additional storage.
36  */
37 
38 #include <stdio.h>
39 #include <assert.h>
40 
41 #include "astcenc_internal.h"
42 
43 /**
44  * @brief Compute the error of an LDR RGB or RGBA encoding.
45  *
46  * @param uquant0    The original endpoint 0 color.
47  * @param uquant1    The original endpoint 1 color.
48  * @param quant0     The unpacked quantized endpoint 0 color.
49  * @param quant1     The unpacked quantized endpoint 1 color.
50  *
51  * @return The MSE of the encoding.
52  */
get_rgba_encoding_error(vfloat4 uquant0,vfloat4 uquant1,vint4 quant0,vint4 quant1)53 static float get_rgba_encoding_error(
54 	vfloat4 uquant0,
55 	vfloat4 uquant1,
56 	vint4 quant0,
57 	vint4 quant1
58 ) {
59 	vfloat4 error0 = uquant0 - int_to_float(quant0);
60 	vfloat4 error1 = uquant1 - int_to_float(quant1);
61 	return hadd_s(error0 * error0 + error1 * error1);
62 }
63 
64 /**
65  * @brief Determine the quantized value given a quantization level.
66  *
67  * @param quant_level   The quantization level to use.
68  * @param value         The value to convert. This must be in the 0-255 range.
69  *
70  * @return The unpacked quantized value, returned in 0-255 range.
71  */
quant_color(quant_method quant_level,int value)72 static inline uint8_t quant_color(
73 	quant_method quant_level,
74 	int value
75 ) {
76     value = astc::clamp(value, 0, 255); // 255: maximum value
77 	int index = value * 2 + 1;
78 	return color_unquant_to_uquant_tables[quant_level - QUANT_6][index];
79 }
80 
81 /**
82  * @brief Determine the quantized value given a quantization level.
83  *
84  * @param quant_level   The quantization level to use.
85  * @param value         The value to convert. This must be in the 0-255 range.
86  *
87  * @return The unpacked quantized value, returned in 0-255 range.
88  */
quant_color3(quant_method quant_level,vint4 value)89 static inline vint4 quant_color3(
90 	quant_method quant_level,
91 	vint4 value
92 ) {
93 	vint4 index = value * 2 + 1;
94 	return vint4(
95 		color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<0>()],
96 		color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<1>()],
97 		color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<2>()],
98 		0);
99 }
100 
101 /**
102  * @brief Determine the quantized value given a quantization level and residual.
103  *
104  * @param quant_level   The quantization level to use.
105  * @param value         The value to convert. This must be in the 0-255 range.
106  * @param valuef        The original value before rounding, used to compute a residual.
107  *
108  * @return The unpacked quantized value, returned in 0-255 range.
109  */
quant_color(quant_method quant_level,int value,float valuef)110 static inline uint8_t quant_color(
111 	quant_method quant_level,
112 	int value,
113 	float valuef
114 ) {
115 	int index = value * 2;
116 
117 	// Compute the residual to determine if we should round down or up ties.
118 	// Test should be residual >= 0, but empirical testing shows small bias helps.
119 	float residual = valuef - static_cast<float>(value);
120 	if (residual >= -0.1f)
121 	{
122 		index++;
123 	}
124 
125 	return color_unquant_to_uquant_tables[quant_level - QUANT_6][index];
126 }
127 
128 /**
129  * @brief Determine the quantized value given a quantization level and residual.
130  *
131  * @param quant_level   The quantization level to use.
132  * @param value         The value to convert. This must be in the 0-255 range.
133  * @param valuef        The original value before rounding, used to compute a residual.
134  *
135  * @return The unpacked quantized value, returned in 0-255 range.
136  */
quant_color3(quant_method quant_level,vint4 value,vfloat4 valuef)137 static inline vint4 quant_color3(
138 	quant_method quant_level,
139 	vint4 value,
140 	vfloat4 valuef
141 ) {
142 	vint4 index = value * 2;
143 
144 	// Compute the residual to determine if we should round down or up ties.
145 	// Test should be residual >= 0, but empirical testing shows small bias helps.
146 	vfloat4 residual = valuef - int_to_float(value);
147 	vmask4 mask = residual >= vfloat4(-0.1f);
148 	index = select(index, index + 1, mask);
149 
150 	return vint4(
151 		color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<0>()],
152 		color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<1>()],
153 		color_unquant_to_uquant_tables[quant_level - QUANT_6][index.lane<2>()],
154 		0);
155 }
156 
157 /**
158  * @brief Quantize an LDR RGB color.
159  *
160  * Since this is a fall-back encoding, we cannot actually fail but must produce a sensible result.
161  * For this encoding @c color0 cannot be larger than @c color1. If @c color0 is actually larger
162  * than @c color1, @c color0 is reduced and @c color1 is increased until the constraint is met.
163  *
164  * @param      color0        The input unquantized color0 endpoint.
165  * @param      color1        The input unquantized color1 endpoint.
166  * @param[out] color0_out    The output quantized color0 endpoint.
167  * @param[out] color1_out    The output quantized color1 endpoint.
168  * @param      quant_level   The quantization level to use.
169  */
quantize_rgb(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)170 static void quantize_rgb(
171 	vfloat4 color0,
172 	vfloat4 color1,
173 	vint4& color0_out,
174 	vint4& color1_out,
175 	quant_method quant_level
176 ) {
177 	vint4 color0i, color1i;
178 	vfloat4 nudge(0.2f);
179 
180 	do
181 	{
182 		vint4 color0q = max(float_to_int_rtn(color0), vint4(0));
183 		color0i = quant_color3(quant_level, color0q, color0);
184 		color0 = color0 - nudge;
185 
186 		vint4 color1q = min(float_to_int_rtn(color1), vint4(255));
187 		color1i = quant_color3(quant_level, color1q, color1);
188 		color1 = color1 + nudge;
189 	} while (hadd_rgb_s(color0i) > hadd_rgb_s(color1i));
190 
191 	color0_out = color0i;
192 	color1_out = color1i;
193 }
194 
195 /**
196  * @brief Quantize an LDR RGBA color.
197  *
198  * Since this is a fall-back encoding, we cannot actually fail but must produce a sensible result.
199  * For this encoding @c color0.rgb cannot be larger than @c color1.rgb (this indicates blue
200  * contraction). If @c color0.rgb is actually larger than @c color1.rgb, @c color0.rgb is reduced
201  * and @c color1.rgb is increased until the constraint is met.
202  *
203  * @param      color0        The input unquantized color0 endpoint.
204  * @param      color1        The input unquantized color1 endpoint.
205  * @param[out] color0_out    The output quantized color0 endpoint.
206  * @param[out] color1_out    The output quantized color1 endpoint.
207  * @param      quant_level   The quantization level to use.
208  */
quantize_rgba(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)209 static void quantize_rgba(
210 	vfloat4 color0,
211 	vfloat4 color1,
212 	vint4& color0_out,
213 	vint4& color1_out,
214 	quant_method quant_level
215 ) {
216 	quantize_rgb(color0, color1, color0_out, color1_out, quant_level);
217 
218 	float a0 = color0.lane<3>();
219 	float a1 = color1.lane<3>();
220 
221 	color0_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a0), a0));
222 	color1_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a1), a1));
223 }
224 
225 /**
226  * @brief Try to quantize an LDR RGB color using blue-contraction.
227  *
228  * Blue-contraction is only usable if encoded color 1 is larger than color 0.
229  *
230  * @param      color0        The input unquantized color0 endpoint.
231  * @param      color1        The input unquantized color1 endpoint.
232  * @param[out] color0_out    The output quantized color0 endpoint.
233  * @param[out] color1_out    The output quantized color1 endpoint.
234  * @param      quant_level   The quantization level to use.
235  *
236  * @return Returns @c false on failure, @c true on success.
237  */
try_quantize_rgb_blue_contract(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)238 static bool try_quantize_rgb_blue_contract(
239 	vfloat4 color0,
240 	vfloat4 color1,
241 	vint4& color0_out,
242 	vint4& color1_out,
243 	quant_method quant_level
244 ) {
245 	// Apply inverse blue-contraction
246 	color0 += color0 - color0.swz<2, 2, 2, 3>();
247 	color1 += color1 - color1.swz<2, 2, 2, 3>();
248 
249 	// If anything overflows BC cannot be used
250 	vmask4 color0_error = (color0 < vfloat4(0.0f)) | (color0 > vfloat4(255.0f));
251 	vmask4 color1_error = (color1 < vfloat4(0.0f)) | (color1 > vfloat4(255.0f));
252 	if (any(color0_error | color1_error))
253 	{
254 		return false;
255 	}
256 
257 	// Quantize the inverse blue-contracted color
258 	vint4 color0i = quant_color3(quant_level, float_to_int_rtn(color0), color0);
259 	vint4 color1i = quant_color3(quant_level, float_to_int_rtn(color1), color1);
260 
261 	// If color #1 is not larger than color #0 then blue-contraction cannot be used
262 	// We must test afterwards because quantization can change the order
263 	if (hadd_rgb_s(color1i) <= hadd_rgb_s(color0i))
264 	{
265 		return false;
266 	}
267 
268 	color0_out = color1i;
269 	color1_out = color0i;
270 	return true;
271 }
272 
273 /**
274  * @brief Try to quantize an LDR RGBA color using blue-contraction.
275  *
276  * Blue-contraction is only usable if encoded color 1 RGB is larger than color 0 RGB.
277  *
278  * @param      color0        The input unquantized color0 endpoint.
279  * @param      color1        The input unquantized color1 endpoint.
280  * @param[out] color0_out    The output quantized color0 endpoint.
281  * @param[out] color1_out    The output quantized color1 endpoint.
282  * @param      quant_level   The quantization level to use.
283  *
284  * @return Returns @c false on failure, @c true on success.
285  */
try_quantize_rgba_blue_contract(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)286 static bool try_quantize_rgba_blue_contract(
287 	vfloat4 color0,
288 	vfloat4 color1,
289 	vint4& color0_out,
290 	vint4& color1_out,
291 	quant_method quant_level
292 ) {
293 	if (try_quantize_rgb_blue_contract(color0, color1, color0_out, color1_out, quant_level))
294 	{
295 		float a0 = color0.lane<3>();
296 		float a1 = color1.lane<3>();
297 
298 		color0_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a1), a1));
299 		color1_out.set_lane<3>(quant_color(quant_level, astc::flt2int_rtn(a0), a0));
300 
301 		return true;
302 	}
303 
304 	return false;
305 }
306 
307 /**
308  * @brief Try to quantize an LDR RGB color using delta encoding.
309  *
310  * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
311  * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
312  * non-negative, then we encode a regular delta.
313  *
314  * @param      color0        The input unquantized color0 endpoint.
315  * @param      color1        The input unquantized color1 endpoint.
316  * @param[out] color0_out    The output quantized color0 endpoint.
317  * @param[out] color1_out    The output quantized color1 endpoint.
318  * @param      quant_level   The quantization level to use.
319  *
320  * @return Returns @c false on failure, @c true on success.
321  */
try_quantize_rgb_delta(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)322 static bool try_quantize_rgb_delta(
323 	vfloat4 color0,
324 	vfloat4 color1,
325 	vint4& color0_out,
326 	vint4& color1_out,
327 	quant_method quant_level
328 ) {
329 	// Transform color0 to unorm9
330 	vint4 color0a = float_to_int_rtn(color0);
331 	color0.set_lane<3>(0.0f);
332 	color0a = lsl<1>(color0a);
333 
334 	// Mask off the top bit
335 	vint4 color0b = color0a & 0xFF;
336 
337 	// Quantize then unquantize in order to get a value that we take differences against
338 	vint4 color0be = quant_color3(quant_level, color0b);
339 	color0b = color0be | (color0a & 0x100);
340 
341 	// Get hold of the second value
342 	vint4 color1d = float_to_int_rtn(color1);
343 	color1d = lsl<1>(color1d);
344 
345 	// ... and take differences
346 	color1d = color1d - color0b;
347 	color1d.set_lane<3>(0);
348 
349 	// Check if the difference is too large to be encodable
350 	if (any((color1d > vint4(63)) | (color1d < vint4(-64))))
351 	{
352 		return false;
353 	}
354 
355 	// Insert top bit of the base into the offset
356 	color1d = color1d & 0x7F;
357 	color1d = color1d | lsr<1>(color0b & 0x100);
358 
359 	// Then quantize and unquantize; if this causes either top two bits to flip, then encoding fails
360 	// since we have then corrupted either the top bit of the base or the sign bit of the offset
361 	vint4 color1de = quant_color3(quant_level, color1d);
362 
363 	vint4 color_flips = (color1d ^ color1de) & 0xC0;
364 	color_flips.set_lane<3>(0);
365 	if (any(color_flips != vint4::zero()))
366 	{
367 		return false;
368 	}
369 
370 	// If the sum of offsets triggers blue-contraction then encoding fails
371 	vint4 ep0 = color0be;
372 	vint4 ep1 = color1de;
373 	bit_transfer_signed(ep1, ep0);
374 	if (hadd_rgb_s(ep1) < 0)
375 	{
376 		return false;
377 	}
378 
379 	// Check that the offsets produce legitimate sums as well
380 	ep0 = ep0 + ep1;
381 	if (any((ep0 < vint4(0)) | (ep0 > vint4(0xFF))))
382 	{
383 		return false;
384 	}
385 
386 	color0_out = color0be;
387 	color1_out = color1de;
388 	return true;
389 }
390 
391 /**
392  * @brief Try to quantize an LDR RGB color using delta encoding and blue-contraction.
393  *
394  * Blue-contraction is only usable if encoded color 1 RGB is larger than color 0 RGB.
395  *
396  * @param      color0        The input unquantized color0 endpoint.
397  * @param      color1        The input unquantized color1 endpoint.
398  * @param[out] color0_out    The output quantized color0 endpoint.
399  * @param[out] color1_out    The output quantized color1 endpoint.
400  * @param      quant_level   The quantization level to use.
401  *
402  * @return Returns @c false on failure, @c true on success.
403  */
try_quantize_rgb_delta_blue_contract(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)404 static bool try_quantize_rgb_delta_blue_contract(
405 	vfloat4 color0,
406 	vfloat4 color1,
407 	vint4& color0_out,
408 	vint4& color1_out,
409 	quant_method quant_level
410 ) {
411 	// Note: Switch around endpoint colors already at start
412 	std::swap(color0, color1);
413 
414 	// Apply inverse blue-contraction
415 	color0 += color0 - color0.swz<2, 2, 2, 3>();
416 	color1 += color1 - color1.swz<2, 2, 2, 3>();
417 
418 	// If anything overflows BC cannot be used
419 	vmask4 color0_error = (color0 < vfloat4(0.0f)) | (color0 > vfloat4(255.0f));
420 	vmask4 color1_error = (color1 < vfloat4(0.0f)) | (color1 > vfloat4(255.0f));
421 	if (any(color0_error | color1_error))
422 	{
423 		return false;
424 	}
425 
426 	// Transform color0 to unorm9
427 	vint4 color0a = float_to_int_rtn(color0);
428 	color0.set_lane<3>(0.0f);
429 	color0a = lsl<1>(color0a);
430 
431 	// Mask off the top bit
432 	vint4 color0b = color0a & 0xFF;
433 
434 	// Quantize then unquantize in order to get a value that we take differences against
435 	vint4 color0be = quant_color3(quant_level, color0b);
436 	color0b = color0be | (color0a & 0x100);
437 
438 	// Get hold of the second value
439 	vint4 color1d = float_to_int_rtn(color1);
440 	color1d = lsl<1>(color1d);
441 
442 	// ... and take differences
443 	color1d = color1d - color0b;
444 	color1d.set_lane<3>(0);
445 
446 	// Check if the difference is too large to be encodable
447 	if (any((color1d > vint4(63)) | (color1d < vint4(-64))))
448 	{
449 		return false;
450 	}
451 
452 	// Insert top bit of the base into the offset
453 	color1d = color1d & 0x7F;
454 	color1d = color1d | lsr<1>(color0b & 0x100);
455 
456 	// Then quantize and unquantize; if this causes either top two bits to flip, then encoding fails
457 	// since we have then corrupted either the top bit of the base or the sign bit of the offset
458 	vint4 color1de = quant_color3(quant_level, color1d);
459 
460 	vint4 color_flips = (color1d ^ color1de) & 0xC0;
461 	color_flips.set_lane<3>(0);
462 	if (any(color_flips != vint4::zero()))
463 	{
464 		return false;
465 	}
466 
467 	// If the sum of offsets does not trigger blue-contraction then encoding fails
468 	vint4 ep0 = color0be;
469 	vint4 ep1 = color1de;
470 	bit_transfer_signed(ep1, ep0);
471 	if (hadd_rgb_s(ep1) >= 0)
472 	{
473 		return false;
474 	}
475 
476 	// Check that the offsets produce legitimate sums as well
477 	ep0 = ep0 + ep1;
478 	if (any((ep0 < vint4(0)) | (ep0 > vint4(0xFF))))
479 	{
480 		return false;
481 	}
482 
483 	color0_out = color0be;
484 	color1_out = color1de;
485 	return true;
486 }
487 
488 /**
489  * @brief Try to quantize an LDR A color using delta encoding.
490  *
491  * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
492  * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
493  * non-negative, then we encode a regular delta.
494  *
495  * This function only compressed the alpha - the other elements in the output array are not touched.
496  *
497  * @param      color0        The input unquantized color0 endpoint.
498  * @param      color1        The input unquantized color1 endpoint.
499  * @param[out] color0_out    The output quantized color0 endpoint; must preserve lane 0/1/2.
500  * @param[out] color1_out    The output quantized color1 endpoint; must preserve lane 0/1/2.
501  * @param      quant_level   The quantization level to use.
502  *
503  * @return Returns @c false on failure, @c true on success.
504  */
try_quantize_alpha_delta(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)505 static bool try_quantize_alpha_delta(
506 	vfloat4 color0,
507 	vfloat4 color1,
508 	vint4& color0_out,
509 	vint4& color1_out,
510 	quant_method quant_level
511 ) {
512 	float a0 = color0.lane<3>();
513 	float a1 = color1.lane<3>();
514 
515 	int a0a = astc::flt2int_rtn(a0);
516 	a0a <<= 1;
517 	int a0b = a0a & 0xFF;
518 	int a0be = quant_color(quant_level, a0b);
519 	a0b = a0be;
520 	a0b |= a0a & 0x100;
521 	int a1d = astc::flt2int_rtn(a1);
522 	a1d <<= 1;
523 	a1d -= a0b;
524 
525 	if (a1d > 63 || a1d < -64)
526 	{
527 		return false;
528 	}
529 
530 	a1d &= 0x7F;
531 	a1d |= (a0b & 0x100) >> 1;
532 
533 	int a1de = quant_color(quant_level, a1d);
534 	int a1du = a1de;
535 	if ((a1d ^ a1du) & 0xC0)
536 	{
537 		return false;
538 	}
539 
540 	a1du &= 0x7F;
541 	if (a1du & 0x40)
542 	{
543 		a1du -= 0x80;
544 	}
545 
546 	a1du += a0b;
547 	if (a1du < 0 || a1du > 0x1FF)
548 	{
549 		return false;
550 	}
551 
552 	color0_out.set_lane<3>(a0be);
553 	color1_out.set_lane<3>(a1de);
554 
555 	return true;
556 }
557 
558 /**
559  * @brief Try to quantize an LDR LA color using delta encoding.
560  *
561  * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
562  * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
563  * non-negative, then we encode a regular delta.
564  *
565  * This function only compressed the alpha - the other elements in the output array are not touched.
566  *
567  * @param      color0        The input unquantized color0 endpoint.
568  * @param      color1        The input unquantized color1 endpoint.
569  * @param[out] output        The output endpoints, returned as (l0, l1, a0, a1).
570  * @param      quant_level   The quantization level to use.
571  *
572  * @return Returns @c false on failure, @c true on success.
573  */
try_quantize_luminance_alpha_delta(vfloat4 color0,vfloat4 color1,uint8_t output[4],quant_method quant_level)574 static bool try_quantize_luminance_alpha_delta(
575 	vfloat4 color0,
576 	vfloat4 color1,
577 	uint8_t output[4],
578 	quant_method quant_level
579 ) {
580 	float l0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
581 	float l1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
582 
583 	float a0 = color0.lane<3>();
584 	float a1 = color1.lane<3>();
585 
586 	int l0a = astc::flt2int_rtn(l0);
587 	int a0a = astc::flt2int_rtn(a0);
588 	l0a <<= 1;
589 	a0a <<= 1;
590 
591 	int l0b = l0a & 0xFF;
592 	int a0b = a0a & 0xFF;
593 	int l0be = quant_color(quant_level, l0b);
594 	int a0be = quant_color(quant_level, a0b);
595 	l0b = l0be;
596 	a0b = a0be;
597 	l0b |= l0a & 0x100;
598 	a0b |= a0a & 0x100;
599 
600 	int l1d = astc::flt2int_rtn(l1);
601 	int a1d = astc::flt2int_rtn(a1);
602 	l1d <<= 1;
603 	a1d <<= 1;
604 	l1d -= l0b;
605 	a1d -= a0b;
606 
607 	if (l1d > 63 || l1d < -64)
608 	{
609 		return false;
610 	}
611 
612 	if (a1d > 63 || a1d < -64)
613 	{
614 		return false;
615 	}
616 
617 	l1d &= 0x7F;
618 	a1d &= 0x7F;
619 	l1d |= (l0b & 0x100) >> 1;
620 	a1d |= (a0b & 0x100) >> 1;
621 
622 	int l1de = quant_color(quant_level, l1d);
623 	int a1de = quant_color(quant_level, a1d);
624 	int l1du = l1de;
625 	int a1du = a1de;
626 
627 	if ((l1d ^ l1du) & 0xC0)
628 	{
629 		return false;
630 	}
631 
632 	if ((a1d ^ a1du) & 0xC0)
633 	{
634 		return false;
635 	}
636 
637 	l1du &= 0x7F;
638 	a1du &= 0x7F;
639 
640 	if (l1du & 0x40)
641 	{
642 		l1du -= 0x80;
643 	}
644 
645 	if (a1du & 0x40)
646 	{
647 		a1du -= 0x80;
648 	}
649 
650 	l1du += l0b;
651 	a1du += a0b;
652 
653 	if (l1du < 0 || l1du > 0x1FF)
654 	{
655 		return false;
656 	}
657 
658 	if (a1du < 0 || a1du > 0x1FF)
659 	{
660 		return false;
661 	}
662 
663 	output[0] = static_cast<uint8_t>(l0be);
664 	output[1] = static_cast<uint8_t>(l1de);
665 	output[2] = static_cast<uint8_t>(a0be);
666 	output[3] = static_cast<uint8_t>(a1de);
667 
668 	return true;
669 }
670 
671 /**
672  * @brief Try to quantize an LDR RGBA color using delta encoding.
673  *
674  * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
675  * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
676  * non-negative, then we encode a regular delta.
677  *
678  * This function only compressed the alpha - the other elements in the output array are not touched.
679  *
680  * @param      color0        The input unquantized color0 endpoint.
681  * @param      color1        The input unquantized color1 endpoint.
682  * @param[out] color0_out   The output quantized color0 endpoint
683  * @param[out] color1_out   The output quantized color1 endpoint
684  * @param      quant_level   The quantization level to use.
685  *
686  * @return Returns @c false on failure, @c true on success.
687  */
try_quantize_rgba_delta(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)688 static bool try_quantize_rgba_delta(
689 	vfloat4 color0,
690 	vfloat4 color1,
691 	vint4& color0_out,
692 	vint4& color1_out,
693 	quant_method quant_level
694 ) {
695 	return try_quantize_rgb_delta(color0, color1, color0_out, color1_out, quant_level) &&
696 	       try_quantize_alpha_delta(color0, color1, color0_out, color1_out, quant_level);
697 }
698 
699 /**
700  * @brief Try to quantize an LDR RGBA color using delta and blue contract encoding.
701  *
702  * At decode time we move one bit from the offset to the base and seize another bit as a sign bit;
703  * we then unquantize both values as if they contain one extra bit. If the sum of the offsets is
704  * non-negative, then we encode a regular delta.
705  *
706  * This function only compressed the alpha - the other elements in the output array are not touched.
707  *
708  * @param      color0       The input unquantized color0 endpoint.
709  * @param      color1       The input unquantized color1 endpoint.
710  * @param[out] color0_out   The output quantized color0 endpoint
711  * @param[out] color1_out   The output quantized color1 endpoint
712  * @param      quant_level  The quantization level to use.
713  *
714  * @return Returns @c false on failure, @c true on success.
715  */
try_quantize_rgba_delta_blue_contract(vfloat4 color0,vfloat4 color1,vint4 & color0_out,vint4 & color1_out,quant_method quant_level)716 static bool try_quantize_rgba_delta_blue_contract(
717 	vfloat4 color0,
718 	vfloat4 color1,
719 	vint4& color0_out,
720 	vint4& color1_out,
721 	quant_method quant_level
722 ) {
723 	// Note that we swap the color0 and color1 ordering for alpha to match RGB blue-contract
724 	return try_quantize_rgb_delta_blue_contract(color0, color1, color0_out, color1_out, quant_level) &&
725 	       try_quantize_alpha_delta(color1, color0, color0_out, color1_out, quant_level);
726 }
727 
728 /**
729  * @brief Quantize an LDR RGB color using scale encoding.
730  *
731  * @param      color         The input unquantized color endpoint and scale factor.
732  * @param[out] output        The output endpoints, returned as (r0, g0, b0, s).
733  * @param      quant_level   The quantization level to use.
734  */
quantize_rgbs(vfloat4 color,uint8_t output[4],quant_method quant_level)735 static void quantize_rgbs(
736 	vfloat4 color,
737 	uint8_t output[4],
738 	quant_method quant_level
739 ) {
740 	float scale = 1.0f / 257.0f;
741 
742 	float r = astc::clamp255f(color.lane<0>() * scale);
743 	float g = astc::clamp255f(color.lane<1>() * scale);
744 	float b = astc::clamp255f(color.lane<2>() * scale);
745 
746 	int ri = quant_color(quant_level, astc::flt2int_rtn(r), r);
747 	int gi = quant_color(quant_level, astc::flt2int_rtn(g), g);
748 	int bi = quant_color(quant_level, astc::flt2int_rtn(b), b);
749 
750 	float oldcolorsum = hadd_rgb_s(color) * scale;
751 	float newcolorsum = static_cast<float>(ri + gi + bi);
752 
753 	float scalea = astc::clamp1f(color.lane<3>() * (oldcolorsum + 1e-10f) / (newcolorsum + 1e-10f));
754 	int scale_idx = astc::flt2int_rtn(scalea * 256.0f);
755 	scale_idx = astc::clamp(scale_idx, 0, 255);
756 
757 	output[0] = static_cast<uint8_t>(ri);
758 	output[1] = static_cast<uint8_t>(gi);
759 	output[2] = static_cast<uint8_t>(bi);
760 	output[3] = quant_color(quant_level, scale_idx);
761 }
762 
763 /**
764  * @brief Quantize an LDR RGBA color using scale encoding.
765  *
766  * @param      color0       The input unquantized color0 alpha endpoint.
767  * @param      color1       The input unquantized color1 alpha endpoint.
768  * @param      color        The input unquantized color endpoint and scale factor.
769  * @param[out] output       The output endpoints, returned as (r0, g0, b0, s, a0, a1).
770  * @param      quant_level  The quantization level to use.
771  */
quantize_rgbs_alpha(vfloat4 color0,vfloat4 color1,vfloat4 color,uint8_t output[6],quant_method quant_level)772 static void quantize_rgbs_alpha(
773 	vfloat4 color0,
774 	vfloat4 color1,
775 	vfloat4 color,
776 	uint8_t output[6],
777 	quant_method quant_level
778 ) {
779 	float a0 = color0.lane<3>();
780 	float a1 = color1.lane<3>();
781 
782 	output[4] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
783 	output[5] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
784 
785 	quantize_rgbs(color, output, quant_level);
786 }
787 
788 /**
789  * @brief Quantize a LDR L color.
790  *
791  * @param      color0        The input unquantized color0 endpoint.
792  * @param      color1        The input unquantized color1 endpoint.
793  * @param[out] output        The output endpoints, returned as (l0, l1).
794  * @param      quant_level   The quantization level to use.
795  */
quantize_luminance(vfloat4 color0,vfloat4 color1,uint8_t output[2],quant_method quant_level)796 static void quantize_luminance(
797 	vfloat4 color0,
798 	vfloat4 color1,
799 	uint8_t output[2],
800 	quant_method quant_level
801 ) {
802 	float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
803 	float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
804 
805 	if (lum0 > lum1)
806 	{
807 		float avg = (lum0 + lum1) * 0.5f;
808 		lum0 = avg;
809 		lum1 = avg;
810 	}
811 
812 	output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0), lum0);
813 	output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1), lum1);
814 }
815 
816 /**
817  * @brief Quantize a LDR LA color.
818  *
819  * @param      color0        The input unquantized color0 endpoint.
820  * @param      color1        The input unquantized color1 endpoint.
821  * @param[out] output        The output endpoints, returned as (l0, l1, a0, a1).
822  * @param      quant_level   The quantization level to use.
823  */
quantize_luminance_alpha(vfloat4 color0,vfloat4 color1,uint8_t output[4],quant_method quant_level)824 static void quantize_luminance_alpha(
825 	vfloat4 color0,
826 	vfloat4 color1,
827 	uint8_t output[4],
828 	quant_method quant_level
829 ) {
830 	float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
831 	float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
832 
833 	float a0 = color0.lane<3>();
834 	float a1 = color1.lane<3>();
835 
836 	output[0] = quant_color(quant_level, astc::flt2int_rtn(lum0), lum0);
837 	output[1] = quant_color(quant_level, astc::flt2int_rtn(lum1), lum1);
838 	output[2] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
839 	output[3] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
840 }
841 
842 /**
843  * @brief Quantize and unquantize a value ensuring top two bits are the same.
844  *
845  * @param      quant_level     The quantization level to use.
846  * @param      value           The input unquantized value.
847  * @param[out] quant_value     The quantized value.
848  */
quantize_and_unquantize_retain_top_two_bits(quant_method quant_level,uint8_t value,uint8_t & quant_value)849 static inline void quantize_and_unquantize_retain_top_two_bits(
850 	quant_method quant_level,
851 	uint8_t value,
852 	uint8_t& quant_value
853 ) {
854 	int perform_loop;
855 	uint8_t quantval;
856 
857 	do
858 	{
859 		quantval = quant_color(quant_level, value);
860 
861 		// Perform looping if the top two bits were modified by quant/unquant
862 		perform_loop = (value & 0xC0) != (quantval & 0xC0);
863 
864 		if ((quantval & 0xC0) > (value & 0xC0))
865 		{
866 			// Quant/unquant rounded UP so that the top two bits changed;
867 			// decrement the input in hopes that this will avoid rounding up.
868 			value--;
869 		}
870 		else if ((quantval & 0xC0) < (value & 0xC0))
871 		{
872 			// Quant/unquant rounded DOWN so that the top two bits changed;
873 			// decrement the input in hopes that this will avoid rounding down.
874 			value--;
875 		}
876 	} while (perform_loop);
877 
878 	quant_value = quantval;
879 }
880 
881 /**
882  * @brief Quantize and unquantize a value ensuring top four bits are the same.
883  *
884  * @param      quant_level     The quantization level to use.
885  * @param      value           The input unquantized value.
886  * @param[out] quant_value     The quantized value in 0-255 range.
887  */
quantize_and_unquantize_retain_top_four_bits(quant_method quant_level,uint8_t value,uint8_t & quant_value)888 static inline void quantize_and_unquantize_retain_top_four_bits(
889 	quant_method quant_level,
890 	uint8_t value,
891 	uint8_t& quant_value
892 ) {
893 	uint8_t perform_loop;
894 	uint8_t quantval;
895 
896 	do
897 	{
898 		quantval = quant_color(quant_level, value);
899 		// Perform looping if the top four bits were modified by quant/unquant
900 		perform_loop = (value & 0xF0) != (quantval & 0xF0);
901 
902 		if ((quantval & 0xF0) > (value & 0xF0))
903 		{
904 			// Quant/unquant rounded UP so that the top four bits changed;
905 			// decrement the input value in hopes that this will avoid rounding up.
906 			value--;
907 		}
908 		else if ((quantval & 0xF0) < (value & 0xF0))
909 		{
910 			// Quant/unquant rounded DOWN so that the top four bits changed;
911 			// decrement the input value in hopes that this will avoid rounding down.
912 			value--;
913 		}
914 	} while (perform_loop);
915 
916 	quant_value = quantval;
917 }
918 
919 /**
920  * @brief Quantize a HDR RGB color using RGB + offset.
921  *
922  * @param      color         The input unquantized color endpoint and offset.
923  * @param[out] output        The output endpoints, returned as packed RGBS with some mode bits.
924  * @param      quant_level   The quantization level to use.
925  */
quantize_hdr_rgbo(vfloat4 color,uint8_t output[4],quant_method quant_level)926 static void quantize_hdr_rgbo(
927 	vfloat4 color,
928 	uint8_t output[4],
929 	quant_method quant_level
930 ) {
931 	color.set_lane<0>(color.lane<0>() + color.lane<3>());
932 	color.set_lane<1>(color.lane<1>() + color.lane<3>());
933 	color.set_lane<2>(color.lane<2>() + color.lane<3>());
934 
935 	color = clamp(0.0f, 65535.0f, color);
936 
937 	vfloat4 color_bak = color;
938 
939 	int majcomp;
940 	if (color.lane<0>() > color.lane<1>() && color.lane<0>() > color.lane<2>())
941 	{
942 		majcomp = 0;			// red is largest component
943 	}
944 	else if (color.lane<1>() > color.lane<2>())
945 	{
946 		majcomp = 1;			// green is largest component
947 	}
948 	else
949 	{
950 		majcomp = 2;			// blue is largest component
951 	}
952 
953 	// swap around the red component and the largest component.
954 	switch (majcomp)
955 	{
956 	case 1:
957 		color = color.swz<1, 0, 2, 3>();
958 		break;
959 	case 2:
960 		color = color.swz<2, 1, 0, 3>();
961 		break;
962 	default:
963 		break;
964 	}
965 
966 	static const int mode_bits[5][3] {
967 		{11, 5, 7},
968 		{11, 6, 5},
969 		{10, 5, 8},
970 		{9, 6, 7},
971 		{8, 7, 6}
972 	};
973 
974 	static const float mode_cutoffs[5][2] {
975 		{1024, 4096},
976 		{2048, 1024},
977 		{2048, 16384},
978 		{8192, 16384},
979 		{32768, 16384}
980 	};
981 
982 	static const float mode_rscales[5] {
983 		32.0f,
984 		32.0f,
985 		64.0f,
986 		128.0f,
987 		256.0f,
988 	};
989 
990 	static const float mode_scales[5] {
991 		1.0f / 32.0f,
992 		1.0f / 32.0f,
993 		1.0f / 64.0f,
994 		1.0f / 128.0f,
995 		1.0f / 256.0f,
996 	};
997 
998 	float r_base = color.lane<0>();
999 	float g_base = color.lane<0>() - color.lane<1>() ;
1000 	float b_base = color.lane<0>() - color.lane<2>() ;
1001 	float s_base = color.lane<3>() ;
1002 
1003 	for (int mode = 0; mode < 5; mode++)
1004 	{
1005 		if (g_base > mode_cutoffs[mode][0] || b_base > mode_cutoffs[mode][0] || s_base > mode_cutoffs[mode][1])
1006 		{
1007 			continue;
1008 		}
1009 
1010 		// Encode the mode into a 4-bit vector
1011 		int mode_enc = mode < 4 ? (mode | (majcomp << 2)) : (majcomp | 0xC);
1012 
1013 		float mode_scale = mode_scales[mode];
1014 		float mode_rscale = mode_rscales[mode];
1015 
1016 		int gb_intcutoff = 1 << mode_bits[mode][1];
1017 		int s_intcutoff = 1 << mode_bits[mode][2];
1018 
1019 		// Quantize and unquantize R
1020 		int r_intval = astc::flt2int_rtn(r_base * mode_scale);
1021 
1022 		int r_lowbits = r_intval & 0x3f;
1023 
1024 		r_lowbits |= (mode_enc & 3) << 6;
1025 
1026 		uint8_t r_quantval;
1027 		quantize_and_unquantize_retain_top_two_bits(
1028 		    quant_level, static_cast<uint8_t>(r_lowbits), r_quantval);
1029 
1030 		r_intval = (r_intval & ~0x3f) | (r_quantval & 0x3f);
1031 		float r_fval = static_cast<float>(r_intval) * mode_rscale;
1032 
1033 		// Recompute G and B, then quantize and unquantize them
1034 		float g_fval = r_fval - color.lane<1>() ;
1035 		float b_fval = r_fval - color.lane<2>() ;
1036 
1037 		g_fval = astc::clamp(g_fval, 0.0f, 65535.0f);
1038 		b_fval = astc::clamp(b_fval, 0.0f, 65535.0f);
1039 
1040 		int g_intval = astc::flt2int_rtn(g_fval * mode_scale);
1041 		int b_intval = astc::flt2int_rtn(b_fval * mode_scale);
1042 
1043 		if (g_intval >= gb_intcutoff || b_intval >= gb_intcutoff)
1044 		{
1045 			continue;
1046 		}
1047 
1048 		int g_lowbits = g_intval & 0x1f;
1049 		int b_lowbits = b_intval & 0x1f;
1050 
1051 		int bit0 = 0;
1052 		int bit1 = 0;
1053 		int bit2 = 0;
1054 		int bit3 = 0;
1055 
1056 		switch (mode)
1057 		{
1058 		case 0:
1059 		case 2:
1060 			bit0 = (r_intval >> 9) & 1;
1061 			break;
1062 		case 1:
1063 		case 3:
1064 			bit0 = (r_intval >> 8) & 1;
1065 			break;
1066 		case 4:
1067 		case 5:
1068 			bit0 = (g_intval >> 6) & 1;
1069 			break;
1070 		}
1071 
1072 		switch (mode)
1073 		{
1074 		case 0:
1075 		case 1:
1076 		case 2:
1077 		case 3:
1078 			bit2 = (r_intval >> 7) & 1;
1079 			break;
1080 		case 4:
1081 		case 5:
1082 			bit2 = (b_intval >> 6) & 1;
1083 			break;
1084 		}
1085 
1086 		switch (mode)
1087 		{
1088 		case 0:
1089 		case 2:
1090 			bit1 = (r_intval >> 8) & 1;
1091 			break;
1092 		case 1:
1093 		case 3:
1094 		case 4:
1095 		case 5:
1096 			bit1 = (g_intval >> 5) & 1;
1097 			break;
1098 		}
1099 
1100 		switch (mode)
1101 		{
1102 		case 0:
1103 			bit3 = (r_intval >> 10) & 1;
1104 			break;
1105 		case 2:
1106 			bit3 = (r_intval >> 6) & 1;
1107 			break;
1108 		case 1:
1109 		case 3:
1110 		case 4:
1111 		case 5:
1112 			bit3 = (b_intval >> 5) & 1;
1113 			break;
1114 		}
1115 
1116 		g_lowbits |= (mode_enc & 0x4) << 5;
1117 		b_lowbits |= (mode_enc & 0x8) << 4;
1118 
1119 		g_lowbits |= bit0 << 6;
1120 		g_lowbits |= bit1 << 5;
1121 		b_lowbits |= bit2 << 6;
1122 		b_lowbits |= bit3 << 5;
1123 
1124 		uint8_t g_quantval;
1125 		uint8_t b_quantval;
1126 
1127 		quantize_and_unquantize_retain_top_four_bits(
1128 		    quant_level, static_cast<uint8_t>(g_lowbits), g_quantval);
1129 		quantize_and_unquantize_retain_top_four_bits(
1130 		    quant_level, static_cast<uint8_t>(b_lowbits), b_quantval);
1131 
1132 		g_intval = (g_intval & ~0x1f) | (g_quantval & 0x1f);
1133 		b_intval = (b_intval & ~0x1f) | (b_quantval & 0x1f);
1134 
1135 		g_fval = static_cast<float>(g_intval) * mode_rscale;
1136 		b_fval = static_cast<float>(b_intval) * mode_rscale;
1137 
1138 		// Recompute the scale value, based on the errors introduced to red, green and blue
1139 
1140 		// If the error is positive, then the R,G,B errors combined have raised the color
1141 		// value overall; as such, the scale value needs to be increased.
1142 		float rgb_errorsum = (r_fval - color.lane<0>() ) + (r_fval - g_fval - color.lane<1>() ) + (r_fval - b_fval - color.lane<2>() );
1143 
1144 		float s_fval = s_base + rgb_errorsum * (1.0f / 3.0f);
1145 		s_fval = astc::clamp(s_fval, 0.0f, 1e9f);
1146 
1147 		int s_intval = astc::flt2int_rtn(s_fval * mode_scale);
1148 
1149 		if (s_intval >= s_intcutoff)
1150 		{
1151 			continue;
1152 		}
1153 
1154 		int s_lowbits = s_intval & 0x1f;
1155 
1156 		int bit4;
1157 		int bit5;
1158 		int bit6;
1159 		switch (mode)
1160 		{
1161 		case 1:
1162 			bit6 = (r_intval >> 9) & 1;
1163 			break;
1164 		default:
1165 			bit6 = (s_intval >> 5) & 1;
1166 			break;
1167 		}
1168 
1169 		switch (mode)
1170 		{
1171 		case 4:
1172 			bit5 = (r_intval >> 7) & 1;
1173 			break;
1174 		case 1:
1175 			bit5 = (r_intval >> 10) & 1;
1176 			break;
1177 		default:
1178 			bit5 = (s_intval >> 6) & 1;
1179 			break;
1180 		}
1181 
1182 		switch (mode)
1183 		{
1184 		case 2:
1185 			bit4 = (s_intval >> 7) & 1;
1186 			break;
1187 		default:
1188 			bit4 = (r_intval >> 6) & 1;
1189 			break;
1190 		}
1191 
1192 		s_lowbits |= bit6 << 5;
1193 		s_lowbits |= bit5 << 6;
1194 		s_lowbits |= bit4 << 7;
1195 
1196 		uint8_t s_quantval;
1197 
1198 		quantize_and_unquantize_retain_top_four_bits(
1199 		    quant_level, static_cast<uint8_t>(s_lowbits), s_quantval);
1200 
1201 		output[0] = r_quantval;
1202 		output[1] = g_quantval;
1203 		output[2] = b_quantval;
1204 		output[3] = s_quantval;
1205 		return;
1206 	}
1207 
1208 	// Failed to encode any of the modes above? In that case encode using mode #5
1209 	float vals[4];
1210 	vals[0] = color_bak.lane<0>();
1211 	vals[1] = color_bak.lane<1>();
1212 	vals[2] = color_bak.lane<2>();
1213 	vals[3] = color_bak.lane<3>();
1214 
1215 	int ivals[4];
1216 	float cvals[3];
1217 
1218 	for (int i = 0; i < 3; i++)
1219 	{
1220 		vals[i] = astc::clamp(vals[i], 0.0f, 65020.0f);
1221 		ivals[i] = astc::flt2int_rtn(vals[i] * (1.0f / 512.0f));
1222 		cvals[i] = static_cast<float>(ivals[i]) * 512.0f;
1223 	}
1224 
1225 	float rgb_errorsum = (cvals[0] - vals[0]) + (cvals[1] - vals[1]) + (cvals[2] - vals[2]);
1226 	vals[3] += rgb_errorsum * (1.0f / 3.0f);
1227 
1228 	vals[3] = astc::clamp(vals[3], 0.0f, 65020.0f);
1229 	ivals[3] = astc::flt2int_rtn(vals[3] * (1.0f / 512.0f));
1230 
1231 	int encvals[4];
1232 	encvals[0] = (ivals[0] & 0x3f) | 0xC0;
1233 	encvals[1] = (ivals[1] & 0x7f) | 0x80;
1234 	encvals[2] = (ivals[2] & 0x7f) | 0x80;
1235 	encvals[3] = (ivals[3] & 0x7f) | ((ivals[0] & 0x40) << 1);
1236 
1237 	for (uint8_t i = 0; i < 4; i++)
1238 	{
1239 		quantize_and_unquantize_retain_top_four_bits(
1240 		    quant_level, static_cast<uint8_t>(encvals[i]), output[i]);
1241 	}
1242 
1243 	return;
1244 }
1245 
1246 /**
1247  * @brief Quantize a HDR RGB color using direct RGB encoding.
1248  *
1249  * @param      color0        The input unquantized color0 endpoint.
1250  * @param      color1        The input unquantized color1 endpoint.
1251  * @param[out] output        The output endpoints, returned as packed RGB+RGB pairs with mode bits.
1252  * @param      quant_level   The quantization level to use.
1253  */
quantize_hdr_rgb(vfloat4 color0,vfloat4 color1,uint8_t output[6],quant_method quant_level)1254 static void quantize_hdr_rgb(
1255 	vfloat4 color0,
1256 	vfloat4 color1,
1257 	uint8_t output[6],
1258 	quant_method quant_level
1259 ) {
1260 	// Note: color*.lane<3> is not used so we can ignore it
1261 	color0 = clamp(0.0f, 65535.0f, color0);
1262 	color1 = clamp(0.0f, 65535.0f, color1);
1263 
1264 	vfloat4 color0_bak = color0;
1265 	vfloat4 color1_bak = color1;
1266 
1267 	int majcomp;
1268 	if (color1.lane<0>() > color1.lane<1>() && color1.lane<0>() > color1.lane<2>())
1269 	{
1270 		majcomp = 0;
1271 	}
1272 	else if (color1.lane<1>() > color1.lane<2>())
1273 	{
1274 		majcomp = 1;
1275 	}
1276 	else
1277 	{
1278 		majcomp = 2;
1279 	}
1280 
1281 	// Swizzle the components
1282 	switch (majcomp)
1283 	{
1284 	case 1:  // red-green swap
1285 		color0 = color0.swz<1, 0, 2, 3>();
1286 		color1 = color1.swz<1, 0, 2, 3>();
1287 		break;
1288 	case 2:  // red-blue swap
1289 		color0 = color0.swz<2, 1, 0, 3>();
1290 		color1 = color1.swz<2, 1, 0, 3>();
1291 		break;
1292 	default:
1293 		break;
1294 	}
1295 
1296 	float a_base = color1.lane<0>();
1297 	a_base = astc::clamp(a_base, 0.0f, 65535.0f);
1298 
1299 	float b0_base = a_base - color1.lane<1>();
1300 	float b1_base = a_base - color1.lane<2>();
1301 	float c_base = a_base - color0.lane<0>();
1302 	float d0_base = a_base - b0_base - c_base - color0.lane<1>();
1303 	float d1_base = a_base - b1_base - c_base - color0.lane<2>();
1304 
1305 	// Number of bits in the various fields in the various modes
1306 	static const int mode_bits[8][4] {
1307 		{9, 7, 6, 7},
1308 		{9, 8, 6, 6},
1309 		{10, 6, 7, 7},
1310 		{10, 7, 7, 6},
1311 		{11, 8, 6, 5},
1312 		{11, 6, 8, 6},
1313 		{12, 7, 7, 5},
1314 		{12, 6, 7, 6}
1315 	};
1316 
1317 	// Cutoffs to use for the computed values of a,b,c,d, assuming the
1318 	// range 0..65535 are LNS values corresponding to fp16.
1319 	static const float mode_cutoffs[8][4] {
1320 		{16384, 8192, 8192, 8},	// mode 0: 9,7,6,7
1321 		{32768, 8192, 4096, 8},	// mode 1: 9,8,6,6
1322 		{4096, 8192, 4096, 4},	// mode 2: 10,6,7,7
1323 		{8192, 8192, 2048, 4},	// mode 3: 10,7,7,6
1324 		{8192, 2048, 512, 2},	// mode 4: 11,8,6,5
1325 		{2048, 8192, 1024, 2},	// mode 5: 11,6,8,6
1326 		{2048, 2048, 256, 1},	// mode 6: 12,7,7,5
1327 		{1024, 2048, 512, 1},	// mode 7: 12,6,7,6
1328 	};
1329 
1330 	static const float mode_scales[8] {
1331 		1.0f / 128.0f,
1332 		1.0f / 128.0f,
1333 		1.0f / 64.0f,
1334 		1.0f / 64.0f,
1335 		1.0f / 32.0f,
1336 		1.0f / 32.0f,
1337 		1.0f / 16.0f,
1338 		1.0f / 16.0f,
1339 	};
1340 
1341 	// Scaling factors when going from what was encoded in the mode to 16 bits.
1342 	static const float mode_rscales[8] {
1343 		128.0f,
1344 		128.0f,
1345 		64.0f,
1346 		64.0f,
1347 		32.0f,
1348 		32.0f,
1349 		16.0f,
1350 		16.0f
1351 	};
1352 
1353 	// Try modes one by one, with the highest-precision mode first.
1354 	for (int mode = 7; mode >= 0; mode--)
1355 	{
1356 		// For each mode, test if we can in fact accommodate the computed b, c, and d values.
1357 		// If we clearly can't, then we skip to the next mode.
1358 
1359 		float b_cutoff = mode_cutoffs[mode][0];
1360 		float c_cutoff = mode_cutoffs[mode][1];
1361 		float d_cutoff = mode_cutoffs[mode][2];
1362 
1363 		if (b0_base > b_cutoff || b1_base > b_cutoff || c_base > c_cutoff || fabsf(d0_base) > d_cutoff || fabsf(d1_base) > d_cutoff)
1364 		{
1365 			continue;
1366 		}
1367 
1368 		float mode_scale = mode_scales[mode];
1369 		float mode_rscale = mode_rscales[mode];
1370 
1371 		int b_intcutoff = 1 << mode_bits[mode][1];
1372 		int c_intcutoff = 1 << mode_bits[mode][2];
1373 		int d_intcutoff = 1 << (mode_bits[mode][3] - 1);
1374 
1375 		// Quantize and unquantize A, with the assumption that its high bits can be handled safely.
1376 		int a_intval = astc::flt2int_rtn(a_base * mode_scale);
1377 		int a_lowbits = a_intval & 0xFF;
1378 
1379 		int a_quantval = quant_color(quant_level, a_lowbits);
1380 		int a_uquantval = a_quantval;
1381 		a_intval = (a_intval & ~0xFF) | a_uquantval;
1382 		float a_fval = static_cast<float>(a_intval) * mode_rscale;
1383 
1384 		// Recompute C, then quantize and unquantize it
1385 		float c_fval = a_fval - color0.lane<0>();
1386 		c_fval = astc::clamp(c_fval, 0.0f, 65535.0f);
1387 
1388 		int c_intval = astc::flt2int_rtn(c_fval * mode_scale);
1389 
1390 		if (c_intval >= c_intcutoff)
1391 		{
1392 			continue;
1393 		}
1394 
1395 		int c_lowbits = c_intval & 0x3f;
1396 
1397 		c_lowbits |= (mode & 1) << 7;
1398 		c_lowbits |= (a_intval & 0x100) >> 2;
1399 
1400 		uint8_t c_quantval;
1401 
1402 		quantize_and_unquantize_retain_top_two_bits(
1403 		    quant_level, static_cast<uint8_t>(c_lowbits), c_quantval);
1404 
1405 		c_intval = (c_intval & ~0x3F) | (c_quantval & 0x3F);
1406 		c_fval = static_cast<float>(c_intval) * mode_rscale;
1407 
1408 		// Recompute B0 and B1, then quantize and unquantize them
1409 		float b0_fval = a_fval - color1.lane<1>();
1410 		float b1_fval = a_fval - color1.lane<2>();
1411 
1412 		b0_fval = astc::clamp(b0_fval, 0.0f, 65535.0f);
1413 		b1_fval = astc::clamp(b1_fval, 0.0f, 65535.0f);
1414 		int b0_intval = astc::flt2int_rtn(b0_fval * mode_scale);
1415 		int b1_intval = astc::flt2int_rtn(b1_fval * mode_scale);
1416 
1417 		if (b0_intval >= b_intcutoff || b1_intval >= b_intcutoff)
1418 		{
1419 			continue;
1420 		}
1421 
1422 		int b0_lowbits = b0_intval & 0x3f;
1423 		int b1_lowbits = b1_intval & 0x3f;
1424 
1425 		int bit0 = 0;
1426 		int bit1 = 0;
1427 		switch (mode)
1428 		{
1429 		case 0:
1430 		case 1:
1431 		case 3:
1432 		case 4:
1433 		case 6:
1434 			bit0 = (b0_intval >> 6) & 1;
1435 			break;
1436 		case 2:
1437 		case 5:
1438 		case 7:
1439 			bit0 = (a_intval >> 9) & 1;
1440 			break;
1441 		}
1442 
1443 		switch (mode)
1444 		{
1445 		case 0:
1446 		case 1:
1447 		case 3:
1448 		case 4:
1449 		case 6:
1450 			bit1 = (b1_intval >> 6) & 1;
1451 			break;
1452 		case 2:
1453 			bit1 = (c_intval >> 6) & 1;
1454 			break;
1455 		case 5:
1456 		case 7:
1457 			bit1 = (a_intval >> 10) & 1;
1458 			break;
1459 		}
1460 
1461 		b0_lowbits |= bit0 << 6;
1462 		b1_lowbits |= bit1 << 6;
1463 
1464 		b0_lowbits |= ((mode >> 1) & 1) << 7;
1465 		b1_lowbits |= ((mode >> 2) & 1) << 7;
1466 
1467 		uint8_t b0_quantval;
1468 		uint8_t b1_quantval;
1469 
1470 		quantize_and_unquantize_retain_top_two_bits(
1471 		    quant_level, static_cast<uint8_t>(b0_lowbits), b0_quantval);
1472 		quantize_and_unquantize_retain_top_two_bits(
1473 		    quant_level, static_cast<uint8_t>(b1_lowbits), b1_quantval);
1474 
1475 		b0_intval = (b0_intval & ~0x3f) | (b0_quantval & 0x3f);
1476 		b1_intval = (b1_intval & ~0x3f) | (b1_quantval & 0x3f);
1477 		b0_fval = static_cast<float>(b0_intval) * mode_rscale;
1478 		b1_fval = static_cast<float>(b1_intval) * mode_rscale;
1479 
1480 		// Recompute D0 and D1, then quantize and unquantize them
1481 		float d0_fval = a_fval - b0_fval - c_fval - color0.lane<1>();
1482 		float d1_fval = a_fval - b1_fval - c_fval - color0.lane<2>();
1483 
1484 		d0_fval = astc::clamp(d0_fval, -65535.0f, 65535.0f);
1485 		d1_fval = astc::clamp(d1_fval, -65535.0f, 65535.0f);
1486 
1487 		int d0_intval = astc::flt2int_rtn(d0_fval * mode_scale);
1488 		int d1_intval = astc::flt2int_rtn(d1_fval * mode_scale);
1489 
1490 		if (abs(d0_intval) >= d_intcutoff || abs(d1_intval) >= d_intcutoff)
1491 		{
1492 			continue;
1493 		}
1494 
1495 		int d0_lowbits = d0_intval & 0x1f;
1496 		int d1_lowbits = d1_intval & 0x1f;
1497 
1498 		int bit2 = 0;
1499 		int bit3 = 0;
1500 		int bit4;
1501 		int bit5;
1502 		switch (mode)
1503 		{
1504 		case 0:
1505 		case 2:
1506 			bit2 = (d0_intval >> 6) & 1;
1507 			break;
1508 		case 1:
1509 		case 4:
1510 			bit2 = (b0_intval >> 7) & 1;
1511 			break;
1512 		case 3:
1513 			bit2 = (a_intval >> 9) & 1;
1514 			break;
1515 		case 5:
1516 			bit2 = (c_intval >> 7) & 1;
1517 			break;
1518 		case 6:
1519 		case 7:
1520 			bit2 = (a_intval >> 11) & 1;
1521 			break;
1522 		}
1523 		switch (mode)
1524 		{
1525 		case 0:
1526 		case 2:
1527 			bit3 = (d1_intval >> 6) & 1;
1528 			break;
1529 		case 1:
1530 		case 4:
1531 			bit3 = (b1_intval >> 7) & 1;
1532 			break;
1533 		case 3:
1534 		case 5:
1535 		case 6:
1536 		case 7:
1537 			bit3 = (c_intval >> 6) & 1;
1538 			break;
1539 		}
1540 
1541 		switch (mode)
1542 		{
1543 		case 4:
1544 		case 6:
1545 			bit4 = (a_intval >> 9) & 1;
1546 			bit5 = (a_intval >> 10) & 1;
1547 			break;
1548 		default:
1549 			bit4 = (d0_intval >> 5) & 1;
1550 			bit5 = (d1_intval >> 5) & 1;
1551 			break;
1552 		}
1553 
1554 		d0_lowbits |= bit2 << 6;
1555 		d1_lowbits |= bit3 << 6;
1556 		d0_lowbits |= bit4 << 5;
1557 		d1_lowbits |= bit5 << 5;
1558 
1559 		d0_lowbits |= (majcomp & 1) << 7;
1560 		d1_lowbits |= ((majcomp >> 1) & 1) << 7;
1561 
1562 		uint8_t d0_quantval;
1563 		uint8_t d1_quantval;
1564 
1565 		quantize_and_unquantize_retain_top_four_bits(
1566 		    quant_level, static_cast<uint8_t>(d0_lowbits), d0_quantval);
1567 		quantize_and_unquantize_retain_top_four_bits(
1568 		    quant_level, static_cast<uint8_t>(d1_lowbits), d1_quantval);
1569 
1570 		output[0] = static_cast<uint8_t>(a_quantval);
1571 		output[1] = c_quantval;
1572 		output[2] = b0_quantval;
1573 		output[3] = b1_quantval;
1574 		output[4] = d0_quantval;
1575 		output[5] = d1_quantval;
1576 		return;
1577 	}
1578 
1579 	// If neither of the modes fit we will use a flat representation for storing data, using 8 bits
1580 	// for red and green, and 7 bits for blue. This gives color accuracy roughly similar to LDR
1581 	// 4:4:3 which is not at all great but usable. This representation is used if the light color is
1582 	// more than 4x the color value of the dark color.
1583 	float vals[6];
1584 	vals[0] = color0_bak.lane<0>();
1585 	vals[1] = color1_bak.lane<0>();
1586 	vals[2] = color0_bak.lane<1>();
1587 	vals[3] = color1_bak.lane<1>();
1588 	vals[4] = color0_bak.lane<2>();
1589 	vals[5] = color1_bak.lane<2>();
1590 
1591 	for (int i = 0; i < 6; i++)
1592 	{
1593 		vals[i] = astc::clamp(vals[i], 0.0f, 65020.0f);
1594 	}
1595 
1596 	for (int i = 0; i < 4; i++)
1597 	{
1598 		int idx = astc::flt2int_rtn(vals[i] * 1.0f / 256.0f);
1599 		output[i] = quant_color(quant_level, idx);
1600 	}
1601 
1602 	for (int i = 4; i < 6; i++)
1603 	{
1604 		int idx = astc::flt2int_rtn(vals[i] * 1.0f / 512.0f) + 128;
1605 		quantize_and_unquantize_retain_top_two_bits(
1606 		    quant_level, static_cast<uint8_t>(idx), output[i]);
1607 	}
1608 
1609 	return;
1610 }
1611 
1612 /**
1613  * @brief Quantize a HDR RGB + LDR A color using direct RGBA encoding.
1614  *
1615  * @param      color0        The input unquantized color0 endpoint.
1616  * @param      color1        The input unquantized color1 endpoint.
1617  * @param[out] output        The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
1618  * @param      quant_level   The quantization level to use.
1619  */
quantize_hdr_rgb_ldr_alpha(vfloat4 color0,vfloat4 color1,uint8_t output[8],quant_method quant_level)1620 static void quantize_hdr_rgb_ldr_alpha(
1621 	vfloat4 color0,
1622 	vfloat4 color1,
1623 	uint8_t output[8],
1624 	quant_method quant_level
1625 ) {
1626 	float scale = 1.0f / 257.0f;
1627 
1628 	float a0 = astc::clamp255f(color0.lane<3>() * scale);
1629 	float a1 = astc::clamp255f(color1.lane<3>() * scale);
1630 
1631 	output[6] = quant_color(quant_level, astc::flt2int_rtn(a0), a0);
1632 	output[7] = quant_color(quant_level, astc::flt2int_rtn(a1), a1);
1633 
1634 	quantize_hdr_rgb(color0, color1, output, quant_level);
1635 }
1636 
1637 /**
1638  * @brief Quantize a HDR L color using the large range encoding.
1639  *
1640  * @param      color0        The input unquantized color0 endpoint.
1641  * @param      color1        The input unquantized color1 endpoint.
1642  * @param[out] output        The output endpoints, returned as packed (l0, l1).
1643  * @param      quant_level   The quantization level to use.
1644  */
quantize_hdr_luminance_large_range(vfloat4 color0,vfloat4 color1,uint8_t output[2],quant_method quant_level)1645 static void quantize_hdr_luminance_large_range(
1646 	vfloat4 color0,
1647 	vfloat4 color1,
1648 	uint8_t output[2],
1649 	quant_method quant_level
1650 ) {
1651 	float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
1652 	float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
1653 
1654 	if (lum1 < lum0)
1655 	{
1656 		float avg = (lum0 + lum1) * 0.5f;
1657 		lum0 = avg;
1658 		lum1 = avg;
1659 	}
1660 
1661 	int ilum1 = astc::flt2int_rtn(lum1);
1662 	int ilum0 = astc::flt2int_rtn(lum0);
1663 
1664 	// Find the closest encodable point in the upper half of the code-point space
1665 	int upper_v0 = (ilum0 + 128) >> 8;
1666 	int upper_v1 = (ilum1 + 128) >> 8;
1667 
1668 	upper_v0 = astc::clamp(upper_v0, 0, 255);
1669 	upper_v1 = astc::clamp(upper_v1, 0, 255);
1670 
1671 	// Find the closest encodable point in the lower half of the code-point space
1672 	int lower_v0 = (ilum1 + 256) >> 8;
1673 	int lower_v1 = ilum0 >> 8;
1674 
1675 	lower_v0 = astc::clamp(lower_v0, 0, 255);
1676 	lower_v1 = astc::clamp(lower_v1, 0, 255);
1677 
1678 	// Determine the distance between the point in code-point space and the input value
1679 	int upper0_dec = upper_v0 << 8;
1680 	int upper1_dec = upper_v1 << 8;
1681 	int lower0_dec = (lower_v1 << 8) + 128;
1682 	int lower1_dec = (lower_v0 << 8) - 128;
1683 
1684 	int upper0_diff = upper0_dec - ilum0;
1685 	int upper1_diff = upper1_dec - ilum1;
1686 	int lower0_diff = lower0_dec - ilum0;
1687 	int lower1_diff = lower1_dec - ilum1;
1688 
1689 	int upper_error = (upper0_diff * upper0_diff) + (upper1_diff * upper1_diff);
1690 	int lower_error = (lower0_diff * lower0_diff) + (lower1_diff * lower1_diff);
1691 
1692 	int v0, v1;
1693 	if (upper_error < lower_error)
1694 	{
1695 		v0 = upper_v0;
1696 		v1 = upper_v1;
1697 	}
1698 	else
1699 	{
1700 		v0 = lower_v0;
1701 		v1 = lower_v1;
1702 	}
1703 
1704 	// OK; encode
1705 	output[0] = quant_color(quant_level, v0);
1706 	output[1] = quant_color(quant_level, v1);
1707 }
1708 
1709 /**
1710  * @brief Quantize a HDR L color using the small range encoding.
1711  *
1712  * @param      color0        The input unquantized color0 endpoint.
1713  * @param      color1        The input unquantized color1 endpoint.
1714  * @param[out] output        The output endpoints, returned as packed (l0, l1) with mode bits.
1715  * @param      quant_level   The quantization level to use.
1716  *
1717  * @return Returns @c false on failure, @c true on success.
1718  */
try_quantize_hdr_luminance_small_range(vfloat4 color0,vfloat4 color1,uint8_t output[2],quant_method quant_level)1719 static bool try_quantize_hdr_luminance_small_range(
1720 	vfloat4 color0,
1721 	vfloat4 color1,
1722 	uint8_t output[2],
1723 	quant_method quant_level
1724 ) {
1725 	float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f);
1726 	float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f);
1727 
1728 	if (lum1 < lum0)
1729 	{
1730 		float avg = (lum0 + lum1) * 0.5f;
1731 		lum0 = avg;
1732 		lum1 = avg;
1733 	}
1734 
1735 	int ilum1 = astc::flt2int_rtn(lum1);
1736 	int ilum0 = astc::flt2int_rtn(lum0);
1737 
1738 	// Difference of more than a factor-of-2 results in immediate failure
1739 	if (ilum1 - ilum0 > 2048)
1740 	{
1741 		return false;
1742 	}
1743 
1744 	int lowval, highval, diffval;
1745 	int v0, v1;
1746 	int v0e, v1e;
1747 	int v0d, v1d;
1748 
1749 	// Try to encode the high-precision submode
1750 	lowval = (ilum0 + 16) >> 5;
1751 	highval = (ilum1 + 16) >> 5;
1752 
1753 	lowval = astc::clamp(lowval, 0, 2047);
1754 	highval = astc::clamp(highval, 0, 2047);
1755 
1756 	v0 = lowval & 0x7F;
1757 	v0e = quant_color(quant_level, v0);
1758 	v0d = v0e;
1759 
1760 	if (v0d < 0x80)
1761 	{
1762 		lowval = (lowval & ~0x7F) | v0d;
1763 		diffval = highval - lowval;
1764 		if (diffval >= 0 && diffval <= 15)
1765 		{
1766 			v1 = ((lowval >> 3) & 0xF0) | diffval;
1767 			v1e = quant_color(quant_level, v1);
1768 			v1d = v1e;
1769 			if ((v1d & 0xF0) == (v1 & 0xF0))
1770 			{
1771 				output[0] = static_cast<uint8_t>(v0e);
1772 				output[1] = static_cast<uint8_t>(v1e);
1773 				return true;
1774 			}
1775 		}
1776 	}
1777 
1778 	// Try to encode the low-precision submode
1779 	lowval = (ilum0 + 32) >> 6;
1780 	highval = (ilum1 + 32) >> 6;
1781 
1782 	lowval = astc::clamp(lowval, 0, 1023);
1783 	highval = astc::clamp(highval, 0, 1023);
1784 
1785 	v0 = (lowval & 0x7F) | 0x80;
1786 	v0e = quant_color(quant_level, v0);
1787 	v0d = v0e;
1788 	if ((v0d & 0x80) == 0)
1789 	{
1790 		return false;
1791 	}
1792 
1793 	lowval = (lowval & ~0x7F) | (v0d & 0x7F);
1794 	diffval = highval - lowval;
1795 	if (diffval < 0 || diffval > 31)
1796 	{
1797 		return false;
1798 	}
1799 
1800 	v1 = ((lowval >> 2) & 0xE0) | diffval;
1801 	v1e = quant_color(quant_level, v1);
1802 	v1d = v1e;
1803 	if ((v1d & 0xE0) != (v1 & 0xE0))
1804 	{
1805 		return false;
1806 	}
1807 
1808 	output[0] = static_cast<uint8_t>(v0e);
1809 	output[1] = static_cast<uint8_t>(v1e);
1810 	return true;
1811 }
1812 
1813 /**
1814  * @brief Quantize a HDR A color using either delta or direct RGBA encoding.
1815  *
1816  * @param      alpha0        The input unquantized color0 endpoint.
1817  * @param      alpha1        The input unquantized color1 endpoint.
1818  * @param[out] output        The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
1819  * @param      quant_level   The quantization level to use.
1820  */
quantize_hdr_alpha(float alpha0,float alpha1,uint8_t output[2],quant_method quant_level)1821 static void quantize_hdr_alpha(
1822 	float alpha0,
1823 	float alpha1,
1824 	uint8_t output[2],
1825 	quant_method quant_level
1826 ) {
1827 	alpha0 = astc::clamp(alpha0, 0.0f, 65280.0f);
1828 	alpha1 = astc::clamp(alpha1, 0.0f, 65280.0f);
1829 
1830 	int ialpha0 = astc::flt2int_rtn(alpha0);
1831 	int ialpha1 = astc::flt2int_rtn(alpha1);
1832 
1833 	int val0, val1, diffval;
1834 	int v6, v7;
1835 	int v6e, v7e;
1836 	int v6d, v7d;
1837 
1838 	// Try to encode one of the delta submodes, in decreasing-precision order
1839 	for (int i = 2; i >= 0; i--)
1840 	{
1841 		val0 = (ialpha0 + (128 >> i)) >> (8 - i);
1842 		val1 = (ialpha1 + (128 >> i)) >> (8 - i);
1843 
1844 		v6 = (val0 & 0x7F) | ((i & 1) << 7);
1845 		v6e = quant_color(quant_level, v6);
1846 		v6d = v6e;
1847 
1848 		if ((v6 ^ v6d) & 0x80)
1849 		{
1850 			continue;
1851 		}
1852 
1853 		val0 = (val0 & ~0x7f) | (v6d & 0x7f);
1854 		diffval = val1 - val0;
1855 		int cutoff = 32 >> i;
1856 		int mask = 2 * cutoff - 1;
1857 
1858 		if (diffval < -cutoff || diffval >= cutoff)
1859 		{
1860 			continue;
1861 		}
1862 
1863 		v7 = ((i & 2) << 6) | ((val0 >> 7) << (6 - i)) | (diffval & mask);
1864 		v7e = quant_color(quant_level, v7);
1865 		v7d = v7e;
1866 
1867 		static const int testbits[3] { 0xE0, 0xF0, 0xF8 };
1868 
1869 		if ((v7 ^ v7d) & testbits[i])
1870 		{
1871 			continue;
1872 		}
1873 
1874 		output[0] = static_cast<uint8_t>(v6e);
1875 		output[1] = static_cast<uint8_t>(v7e);
1876 		return;
1877 	}
1878 
1879 	// Could not encode any of the delta modes; instead encode a flat value
1880 	val0 = (ialpha0 + 256) >> 9;
1881 	val1 = (ialpha1 + 256) >> 9;
1882 	v6 = val0 | 0x80;
1883 	v7 = val1 | 0x80;
1884 
1885 	output[0] = quant_color(quant_level, v6);
1886 	output[1] = quant_color(quant_level, v7);
1887 
1888 	return;
1889 }
1890 
1891 /**
1892  * @brief Quantize a HDR RGBA color using either delta or direct RGBA encoding.
1893  *
1894  * @param      color0        The input unquantized color0 endpoint.
1895  * @param      color1        The input unquantized color1 endpoint.
1896  * @param[out] output        The output endpoints, returned as packed RGBA+RGBA pairs with mode bits.
1897  * @param      quant_level   The quantization level to use.
1898  */
quantize_hdr_rgb_alpha(vfloat4 color0,vfloat4 color1,uint8_t output[8],quant_method quant_level)1899 static void quantize_hdr_rgb_alpha(
1900 	vfloat4 color0,
1901 	vfloat4 color1,
1902 	uint8_t output[8],
1903 	quant_method quant_level
1904 ) {
1905 	quantize_hdr_rgb(color0, color1, output, quant_level);
1906 	quantize_hdr_alpha(color0.lane<3>(), color1.lane<3>(), output + 6, quant_level);
1907 }
1908 
1909 /* See header for documentation. */
pack_color_endpoints(QualityProfile privateProfile,vfloat4 color0,vfloat4 color1,vfloat4 rgbs_color,vfloat4 rgbo_color,int format,uint8_t * output,quant_method quant_level)1910 uint8_t pack_color_endpoints(
1911 	QualityProfile privateProfile,
1912 	vfloat4 color0,
1913 	vfloat4 color1,
1914 	vfloat4 rgbs_color,
1915 	vfloat4 rgbo_color,
1916 	int format,
1917 	uint8_t* output,
1918 	quant_method quant_level
1919 ) {
1920 	assert(QUANT_6 <= quant_level && quant_level <= QUANT_256);
1921 
1922 	// Clamp colors to a valid LDR range
1923 	// Note that HDR has a lower max, handled in the conversion functions
1924 	color0 = clamp(0.0f, 65535.0f, color0);
1925 	color1 = clamp(0.0f, 65535.0f, color1);
1926 
1927 	// Pre-scale the LDR value we need to the 0-255 quantizable range
1928 	vfloat4 color0_ldr = color0 * (1.0f  / 257.0f);
1929 	vfloat4 color1_ldr = color1 * (1.0f  / 257.0f);
1930 
1931 	uint8_t retval = 0;
1932 	float best_error = ERROR_CALC_DEFAULT;
1933 	vint4 color0_out, color1_out;
1934 	vint4 color0_out2, color1_out2;
1935 
1936 	switch (format)
1937 	{
1938 	case FMT_RGB:
1939 		if (quant_level <= QUANT_160)
1940 		{
1941 			if (try_quantize_rgb_delta_blue_contract(color0_ldr, color1_ldr, color0_out, color1_out, quant_level))
1942 			{
1943 				vint4 color0_unpack;
1944 				vint4 color1_unpack;
1945 				rgba_delta_unpack(color0_out, color1_out, color0_unpack, color1_unpack);
1946 
1947 				retval = FMT_RGB_DELTA;
1948 				best_error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
1949 			}
1950 
1951 			if (try_quantize_rgb_delta(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level))
1952 			{
1953 				vint4 color0_unpack;
1954 				vint4 color1_unpack;
1955 				rgba_delta_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
1956 
1957 				float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
1958 				if (error < best_error)
1959 				{
1960 					retval = FMT_RGB_DELTA;
1961 					best_error = error;
1962 					color0_out = color0_out2;
1963 					color1_out = color1_out2;
1964 				}
1965 			}
1966 		}
1967 
1968 		if (quant_level < QUANT_256)
1969 		{
1970 			if (try_quantize_rgb_blue_contract(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level))
1971 			{
1972 				vint4 color0_unpack;
1973 				vint4 color1_unpack;
1974 				rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
1975 
1976 				float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
1977 				if (error < best_error)
1978 				{
1979 					retval = FMT_RGB;
1980 					best_error = error;
1981 					color0_out = color0_out2;
1982 					color1_out = color1_out2;
1983 				}
1984 			}
1985 		}
1986 
1987 		{
1988 			quantize_rgb(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level);
1989 
1990 			vint4 color0_unpack;
1991 			vint4 color1_unpack;
1992 			rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
1993 
1994 			float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
1995 			if (error < best_error)
1996 			{
1997 				retval =  FMT_RGB;
1998 				color0_out = color0_out2;
1999 				color1_out = color1_out2;
2000 			}
2001 		}
2002 
2003 		// TODO: Can we vectorize this?
2004 		output[0] = static_cast<uint8_t>(color0_out.lane<0>());
2005 		output[1] = static_cast<uint8_t>(color1_out.lane<0>());
2006 		output[2] = static_cast<uint8_t>(color0_out.lane<1>());
2007 		output[3] = static_cast<uint8_t>(color1_out.lane<1>());
2008 		output[4] = static_cast<uint8_t>(color0_out.lane<2>());
2009 		output[5] = static_cast<uint8_t>(color1_out.lane<2>());
2010 		break;
2011 
2012 	case FMT_RGBA:
2013 		if ((privateProfile == HIGH_QUALITY_PROFILE) && (quant_level <= QUANT_160)) // only full quality profile to try
2014 		{
2015 			if (try_quantize_rgba_delta_blue_contract(color0_ldr, color1_ldr, color0_out, color1_out, quant_level))
2016 			{
2017 				vint4 color0_unpack;
2018 				vint4 color1_unpack;
2019 				rgba_delta_unpack(color0_out, color1_out, color0_unpack, color1_unpack);
2020 
2021 				retval = FMT_RGBA_DELTA;
2022 				best_error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
2023 			}
2024 
2025 			if (try_quantize_rgba_delta(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level))
2026 			{
2027 				vint4 color0_unpack;
2028 				vint4 color1_unpack;
2029 				rgba_delta_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
2030 
2031 				float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
2032 				if (error < best_error)
2033 				{
2034 					retval = FMT_RGBA_DELTA;
2035 					best_error = error;
2036 					color0_out = color0_out2;
2037 					color1_out = color1_out2;
2038 				}
2039 			}
2040 		}
2041 
2042 		if (quant_level < QUANT_256)
2043 		{
2044 			if (try_quantize_rgba_blue_contract(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level))
2045 			{
2046 				vint4 color0_unpack;
2047 				vint4 color1_unpack;
2048 				rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
2049 
2050 				float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
2051 				if (error < best_error)
2052 				{
2053 					retval = FMT_RGBA;
2054 					best_error = error;
2055 					color0_out = color0_out2;
2056 					color1_out = color1_out2;
2057 				}
2058 			}
2059 		}
2060 
2061 		{
2062 			quantize_rgba(color0_ldr, color1_ldr, color0_out2, color1_out2, quant_level);
2063 
2064 			vint4 color0_unpack;
2065 			vint4 color1_unpack;
2066 			rgba_unpack(color0_out2, color1_out2, color0_unpack, color1_unpack);
2067 
2068 			float error = get_rgba_encoding_error(color0_ldr, color1_ldr, color0_unpack, color1_unpack);
2069 			if (error < best_error)
2070 			{
2071 				retval =  FMT_RGBA;
2072 				color0_out = color0_out2;
2073 				color1_out = color1_out2;
2074 			}
2075 		}
2076 
2077 		// TODO: Can we vectorize this?
2078 		output[0] = static_cast<uint8_t>(color0_out.lane<0>());
2079 		output[1] = static_cast<uint8_t>(color1_out.lane<0>());
2080 		output[2] = static_cast<uint8_t>(color0_out.lane<1>());
2081 		output[3] = static_cast<uint8_t>(color1_out.lane<1>());
2082 		output[4] = static_cast<uint8_t>(color0_out.lane<2>());
2083 		output[5] = static_cast<uint8_t>(color1_out.lane<2>());
2084 		output[6] = static_cast<uint8_t>(color0_out.lane<3>());
2085 		output[7] = static_cast<uint8_t>(color1_out.lane<3>());
2086 		break;
2087 
2088 	case FMT_RGB_SCALE:
2089 		quantize_rgbs(rgbs_color, output, quant_level);
2090 		retval = FMT_RGB_SCALE;
2091 		break;
2092 
2093 	case FMT_HDR_RGB_SCALE:
2094 		quantize_hdr_rgbo(rgbo_color, output, quant_level);
2095 		retval = FMT_HDR_RGB_SCALE;
2096 		break;
2097 
2098 	case FMT_HDR_RGB:
2099 		quantize_hdr_rgb(color0, color1, output, quant_level);
2100 		retval = FMT_HDR_RGB;
2101 		break;
2102 
2103 	case FMT_RGB_SCALE_ALPHA:
2104 		quantize_rgbs_alpha(color0_ldr, color1_ldr, rgbs_color, output, quant_level);
2105 		retval = FMT_RGB_SCALE_ALPHA;
2106 		break;
2107 
2108 	case FMT_HDR_LUMINANCE_SMALL_RANGE:
2109 	case FMT_HDR_LUMINANCE_LARGE_RANGE:
2110 		if (try_quantize_hdr_luminance_small_range(color0, color1, output, quant_level))
2111 		{
2112 			retval = FMT_HDR_LUMINANCE_SMALL_RANGE;
2113 			break;
2114 		}
2115 		quantize_hdr_luminance_large_range(color0, color1, output, quant_level);
2116 		retval = FMT_HDR_LUMINANCE_LARGE_RANGE;
2117 		break;
2118 
2119 	case FMT_LUMINANCE:
2120 		quantize_luminance(color0_ldr, color1_ldr, output, quant_level);
2121 		retval = FMT_LUMINANCE;
2122 		break;
2123 
2124 	case FMT_LUMINANCE_ALPHA:
2125 		if (quant_level <= 18)
2126 		{
2127 			if (try_quantize_luminance_alpha_delta(color0_ldr, color1_ldr, output, quant_level))
2128 			{
2129 				retval = FMT_LUMINANCE_ALPHA_DELTA;
2130 				break;
2131 			}
2132 		}
2133 		quantize_luminance_alpha(color0_ldr, color1_ldr, output, quant_level);
2134 		retval = FMT_LUMINANCE_ALPHA;
2135 		break;
2136 
2137 	case FMT_HDR_RGB_LDR_ALPHA:
2138 		quantize_hdr_rgb_ldr_alpha(color0, color1, output, quant_level);
2139 		retval = FMT_HDR_RGB_LDR_ALPHA;
2140 		break;
2141 
2142 	case FMT_HDR_RGBA:
2143 		quantize_hdr_rgb_alpha(color0, color1, output, quant_level);
2144 		retval = FMT_HDR_RGBA;
2145 		break;
2146 	}
2147 
2148 	return retval;
2149 }
2150 
2151 #endif
2152