1 /*
2 * Copyright 2015 Philip Taylor <philip@zaynar.co.uk>
3 * Copyright 2018 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file texcompress_astc.c
27 *
28 * Decompression code for GL_KHR_texture_compression_astc_ldr, which is just
29 * ASTC 2D LDR.
30 *
31 * The ASTC 2D LDR decoder (without the sRGB part) was copied from the OASTC
32 * library written by Philip Taylor. I added sRGB support and adjusted it for
33 * Mesa. - Marek
34 */
35
36 #include "texcompress_astc.h"
37 #include "macros.h"
38 #include "util/half_float.h"
39 #include <stdio.h>
40 #include <cstdlib> // for abort() on windows
41
42 static bool VERBOSE_DECODE = false;
43 static bool VERBOSE_WRITE = false;
44
45 class decode_error
46 {
47 public:
48 enum type {
49 ok,
50 unsupported_hdr_void_extent,
51 reserved_block_mode_1,
52 reserved_block_mode_2,
53 dual_plane_and_too_many_partitions,
54 invalid_range_in_void_extent,
55 weight_grid_exceeds_block_size,
56 invalid_colour_endpoints_size,
57 invalid_colour_endpoints_count,
58 invalid_weight_bits,
59 invalid_num_weights,
60 };
61 };
62
63
64 struct cem_range {
65 uint8_t max;
66 uint8_t t, q, b;
67 };
68
69 /* Based on the Color Unquantization Parameters table,
70 * plus the bit-only representations, sorted by increasing size
71 */
72 static cem_range cem_ranges[] = {
73 { 5, 1, 0, 1 },
74 { 7, 0, 0, 3 },
75 { 9, 0, 1, 1 },
76 { 11, 1, 0, 2 },
77 { 15, 0, 0, 4 },
78 { 19, 0, 1, 2 },
79 { 23, 1, 0, 3 },
80 { 31, 0, 0, 5 },
81 { 39, 0, 1, 3 },
82 { 47, 1, 0, 4 },
83 { 63, 0, 0, 6 },
84 { 79, 0, 1, 4 },
85 { 95, 1, 0, 5 },
86 { 127, 0, 0, 7 },
87 { 159, 0, 1, 5 },
88 { 191, 1, 0, 6 },
89 { 255, 0, 0, 8 },
90 };
91
92 #define CAT_BITS_2(a, b) ( ((a) << 1) | (b) )
93 #define CAT_BITS_3(a, b, c) ( ((a) << 2) | ((b) << 1) | (c) )
94 #define CAT_BITS_4(a, b, c, d) ( ((a) << 3) | ((b) << 2) | ((c) << 1) | (d) )
95 #define CAT_BITS_5(a, b, c, d, e) ( ((a) << 4) | ((b) << 3) | ((c) << 2) | ((d) << 1) | (e) )
96
97 /**
98 * Unpack 5n+8 bits from 'in' into 5 output values.
99 * If n <= 4 then T should be uint32_t, else it must be uint64_t.
100 */
101 template <typename T>
unpack_trit_block(int n,T in,uint8_t * out)102 static void unpack_trit_block(int n, T in, uint8_t *out)
103 {
104 assert(n <= 6); /* else output will overflow uint8_t */
105
106 uint8_t T0 = (in >> (n)) & 0x1;
107 uint8_t T1 = (in >> (n+1)) & 0x1;
108 uint8_t T2 = (in >> (2*n+2)) & 0x1;
109 uint8_t T3 = (in >> (2*n+3)) & 0x1;
110 uint8_t T4 = (in >> (3*n+4)) & 0x1;
111 uint8_t T5 = (in >> (4*n+5)) & 0x1;
112 uint8_t T6 = (in >> (4*n+6)) & 0x1;
113 uint8_t T7 = (in >> (5*n+7)) & 0x1;
114 uint8_t mmask = (1 << n) - 1;
115 uint8_t m0 = (in >> (0)) & mmask;
116 uint8_t m1 = (in >> (n+2)) & mmask;
117 uint8_t m2 = (in >> (2*n+4)) & mmask;
118 uint8_t m3 = (in >> (3*n+5)) & mmask;
119 uint8_t m4 = (in >> (4*n+7)) & mmask;
120
121 uint8_t C;
122 uint8_t t4, t3, t2, t1, t0;
123 if (CAT_BITS_3(T4, T3, T2) == 0x7) {
124 C = CAT_BITS_5(T7, T6, T5, T1, T0);
125 t4 = t3 = 2;
126 } else {
127 C = CAT_BITS_5(T4, T3, T2, T1, T0);
128 if (CAT_BITS_2(T6, T5) == 0x3) {
129 t4 = 2;
130 t3 = T7;
131 } else {
132 t4 = T7;
133 t3 = CAT_BITS_2(T6, T5);
134 }
135 }
136
137 if ((C & 0x3) == 0x3) {
138 t2 = 2;
139 t1 = (C >> 4) & 0x1;
140 uint8_t C3 = (C >> 3) & 0x1;
141 uint8_t C2 = (C >> 2) & 0x1;
142 t0 = (C3 << 1) | (C2 & ~C3);
143 } else if (((C >> 2) & 0x3) == 0x3) {
144 t2 = 2;
145 t1 = 2;
146 t0 = C & 0x3;
147 } else {
148 t2 = (C >> 4) & 0x1;
149 t1 = (C >> 2) & 0x3;
150 uint8_t C1 = (C >> 1) & 0x1;
151 uint8_t C0 = (C >> 0) & 0x1;
152 t0 = (C1 << 1) | (C0 & ~C1);
153 }
154
155 out[0] = (t0 << n) | m0;
156 out[1] = (t1 << n) | m1;
157 out[2] = (t2 << n) | m2;
158 out[3] = (t3 << n) | m3;
159 out[4] = (t4 << n) | m4;
160 }
161
162 /**
163 * Unpack 3n+7 bits from 'in' into 3 output values
164 */
unpack_quint_block(int n,uint32_t in,uint8_t * out)165 static void unpack_quint_block(int n, uint32_t in, uint8_t *out)
166 {
167 assert(n <= 5); /* else output will overflow uint8_t */
168
169 uint8_t Q0 = (in >> (n)) & 0x1;
170 uint8_t Q1 = (in >> (n+1)) & 0x1;
171 uint8_t Q2 = (in >> (n+2)) & 0x1;
172 uint8_t Q3 = (in >> (2*n+3)) & 0x1;
173 uint8_t Q4 = (in >> (2*n+4)) & 0x1;
174 uint8_t Q5 = (in >> (3*n+5)) & 0x1;
175 uint8_t Q6 = (in >> (3*n+6)) & 0x1;
176 uint8_t mmask = (1 << n) - 1;
177 uint8_t m0 = (in >> (0)) & mmask;
178 uint8_t m1 = (in >> (n+3)) & mmask;
179 uint8_t m2 = (in >> (2*n+5)) & mmask;
180
181 uint8_t C;
182 uint8_t q2, q1, q0;
183 if (CAT_BITS_4(Q6, Q5, Q2, Q1) == 0x3) {
184 q2 = CAT_BITS_3(Q0, Q4 & ~Q0, Q3 & ~Q0);
185 q1 = 4;
186 q0 = 4;
187 } else {
188 if (CAT_BITS_2(Q2, Q1) == 0x3) {
189 q2 = 4;
190 C = CAT_BITS_5(Q4, Q3, 0x1 & ~Q6, 0x1 & ~Q5, Q0);
191 } else {
192 q2 = CAT_BITS_2(Q6, Q5);
193 C = CAT_BITS_5(Q4, Q3, Q2, Q1, Q0);
194 }
195 if ((C & 0x7) == 0x5) {
196 q1 = 4;
197 q0 = (C >> 3) & 0x3;
198 } else {
199 q1 = (C >> 3) & 0x3;
200 q0 = C & 0x7;
201 }
202 }
203 out[0] = (q0 << n) | m0;
204 out[1] = (q1 << n) | m1;
205 out[2] = (q2 << n) | m2;
206 }
207
208
209 struct uint8x4_t
210 {
211 uint8_t v[4];
212
uint8x4_tuint8x4_t213 uint8x4_t() { }
214
uint8x4_tuint8x4_t215 uint8x4_t(int a, int b, int c, int d)
216 {
217 assert(0 <= a && a <= 255);
218 assert(0 <= b && b <= 255);
219 assert(0 <= c && c <= 255);
220 assert(0 <= d && d <= 255);
221 v[0] = a;
222 v[1] = b;
223 v[2] = c;
224 v[3] = d;
225 }
226
clampeduint8x4_t227 static uint8x4_t clamped(int a, int b, int c, int d)
228 {
229 uint8x4_t r;
230 r.v[0] = MAX2(0, MIN2(255, a));
231 r.v[1] = MAX2(0, MIN2(255, b));
232 r.v[2] = MAX2(0, MIN2(255, c));
233 r.v[3] = MAX2(0, MIN2(255, d));
234 return r;
235 }
236 };
237
blue_contract(int r,int g,int b,int a)238 static uint8x4_t blue_contract(int r, int g, int b, int a)
239 {
240 return uint8x4_t((r+b) >> 1, (g+b) >> 1, b, a);
241 }
242
blue_contract_clamped(int r,int g,int b,int a)243 static uint8x4_t blue_contract_clamped(int r, int g, int b, int a)
244 {
245 return uint8x4_t::clamped((r+b) >> 1, (g+b) >> 1, b, a);
246 }
247
bit_transfer_signed(int & a,int & b)248 static void bit_transfer_signed(int &a, int &b)
249 {
250 b >>= 1;
251 b |= a & 0x80;
252 a >>= 1;
253 a &= 0x3f;
254 if (a & 0x20)
255 a -= 0x40;
256 }
257
hash52(uint32_t p)258 static uint32_t hash52(uint32_t p)
259 {
260 p ^= p >> 15;
261 p -= p << 17;
262 p += p << 7;
263 p += p << 4;
264 p ^= p >> 5;
265 p += p << 16;
266 p ^= p >> 7;
267 p ^= p >> 3;
268 p ^= p << 6;
269 p ^= p >> 17;
270 return p;
271 }
272
select_partition(int seed,int x,int y,int z,int partitioncount,int small_block)273 static int select_partition(int seed, int x, int y, int z, int partitioncount,
274 int small_block)
275 {
276 if (small_block) {
277 x <<= 1;
278 y <<= 1;
279 z <<= 1;
280 }
281 seed += (partitioncount - 1) * 1024;
282 uint32_t rnum = hash52(seed);
283 uint8_t seed1 = rnum & 0xF;
284 uint8_t seed2 = (rnum >> 4) & 0xF;
285 uint8_t seed3 = (rnum >> 8) & 0xF;
286 uint8_t seed4 = (rnum >> 12) & 0xF;
287 uint8_t seed5 = (rnum >> 16) & 0xF;
288 uint8_t seed6 = (rnum >> 20) & 0xF;
289 uint8_t seed7 = (rnum >> 24) & 0xF;
290 uint8_t seed8 = (rnum >> 28) & 0xF;
291 uint8_t seed9 = (rnum >> 18) & 0xF;
292 uint8_t seed10 = (rnum >> 22) & 0xF;
293 uint8_t seed11 = (rnum >> 26) & 0xF;
294 uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
295
296 seed1 *= seed1;
297 seed2 *= seed2;
298 seed3 *= seed3;
299 seed4 *= seed4;
300 seed5 *= seed5;
301 seed6 *= seed6;
302 seed7 *= seed7;
303 seed8 *= seed8;
304 seed9 *= seed9;
305 seed10 *= seed10;
306 seed11 *= seed11;
307 seed12 *= seed12;
308
309 int sh1, sh2, sh3;
310 if (seed & 1) {
311 sh1 = (seed & 2 ? 4 : 5);
312 sh2 = (partitioncount == 3 ? 6 : 5);
313 } else {
314 sh1 = (partitioncount == 3 ? 6 : 5);
315 sh2 = (seed & 2 ? 4 : 5);
316 }
317 sh3 = (seed & 0x10) ? sh1 : sh2;
318
319 seed1 >>= sh1;
320 seed2 >>= sh2;
321 seed3 >>= sh1;
322 seed4 >>= sh2;
323 seed5 >>= sh1;
324 seed6 >>= sh2;
325 seed7 >>= sh1;
326 seed8 >>= sh2;
327 seed9 >>= sh3;
328 seed10 >>= sh3;
329 seed11 >>= sh3;
330 seed12 >>= sh3;
331
332 int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
333 int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
334 int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
335 int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
336
337 a &= 0x3F;
338 b &= 0x3F;
339 c &= 0x3F;
340 d &= 0x3F;
341
342 if (partitioncount < 4)
343 d = 0;
344 if (partitioncount < 3)
345 c = 0;
346
347 if (a >= b && a >= c && a >= d)
348 return 0;
349 else if (b >= c && b >= d)
350 return 1;
351 else if (c >= d)
352 return 2;
353 else
354 return 3;
355 }
356
357
358 struct InputBitVector
359 {
360 uint32_t data[4];
361
printf_bitsInputBitVector362 void printf_bits(int offset, int count, const char *fmt = "", ...)
363 {
364 char out[129];
365 memset(out, '.', 128);
366 out[128] = '\0';
367 int idx = offset;
368 for (int i = 0; i < count; ++i) {
369 out[127 - idx] = ((data[idx >> 5] >> (idx & 31)) & 1) ? '1' : '0';
370 ++idx;
371 }
372 printf("%s ", out);
373 va_list ap;
374 va_start(ap, fmt);
375 vprintf(fmt, ap);
376 va_end(ap);
377 printf("\n");
378 }
379
get_bitsInputBitVector380 uint32_t get_bits(int offset, int count)
381 {
382 assert(count >= 0 && count < 32);
383
384 uint32_t out = 0;
385 if (offset < 32)
386 out |= data[0] >> offset;
387
388 if (0 < offset && offset <= 32)
389 out |= data[1] << (32 - offset);
390 if (32 < offset && offset < 64)
391 out |= data[1] >> (offset - 32);
392
393 if (32 < offset && offset <= 64)
394 out |= data[2] << (64 - offset);
395 if (64 < offset && offset < 96)
396 out |= data[2] >> (offset - 64);
397
398 if (64 < offset && offset <= 96)
399 out |= data[3] << (96 - offset);
400 if (96 < offset && offset < 128)
401 out |= data[3] >> (offset - 96);
402
403 out &= (1 << count) - 1;
404 return out;
405 }
406
get_bits64InputBitVector407 uint64_t get_bits64(int offset, int count)
408 {
409 assert(count >= 0 && count < 64);
410
411 uint64_t out = 0;
412 if (offset < 32)
413 out |= data[0] >> offset;
414
415 if (offset <= 32)
416 out |= (uint64_t)data[1] << (32 - offset);
417 if (32 < offset && offset < 64)
418 out |= data[1] >> (offset - 32);
419
420 if (0 < offset && offset <= 64)
421 out |= (uint64_t)data[2] << (64 - offset);
422 if (64 < offset && offset < 96)
423 out |= data[2] >> (offset - 64);
424
425 if (32 < offset && offset <= 96)
426 out |= (uint64_t)data[3] << (96 - offset);
427 if (96 < offset && offset < 128)
428 out |= data[3] >> (offset - 96);
429
430 out &= ((uint64_t)1 << count) - 1;
431 return out;
432 }
433
get_bits_revInputBitVector434 uint32_t get_bits_rev(int offset, int count)
435 {
436 assert(offset >= count);
437 uint32_t tmp = get_bits(offset - count, count);
438 uint32_t out = 0;
439 for (int i = 0; i < count; ++i)
440 out |= ((tmp >> i) & 1) << (count - 1 - i);
441 return out;
442 }
443 };
444
445 struct OutputBitVector
446 {
447 uint32_t data[4];
448 int offset;
449
OutputBitVectorOutputBitVector450 OutputBitVector()
451 : offset(0)
452 {
453 memset(data, 0, sizeof(data));
454 }
455
appendOutputBitVector456 void append(uint32_t value, int size)
457 {
458 if (VERBOSE_WRITE)
459 printf("append offset=%d size=%d values=0x%x\n", offset, size, value);
460
461 assert(offset + size <= 128);
462
463 assert(size <= 32);
464 if (size < 32)
465 assert((value >> size) == 0);
466
467 while (size) {
468 int c = MIN2(size, 32 - (offset & 31));
469 data[offset >> 5] |= (value << (offset & 31));
470 offset += c;
471 size -= c;
472 value >>= c;
473 }
474 }
475
append64OutputBitVector476 void append64(uint64_t value, int size)
477 {
478 if (VERBOSE_WRITE)
479 printf("append offset=%d size=%d values=0x%llx\n", offset, size, (unsigned long long)value);
480
481 assert(offset + size <= 128);
482
483 assert(size <= 64);
484 if (size < 64)
485 assert((value >> size) == 0);
486
487 while (size) {
488 int c = MIN2(size, 32 - (offset & 31));
489 data[offset >> 5] |= (value << (offset & 31));
490 offset += c;
491 size -= c;
492 value >>= c;
493 }
494 }
495
appendOutputBitVector496 void append(OutputBitVector &v, int size)
497 {
498 if (VERBOSE_WRITE)
499 printf("append vector offset=%d size=%d\n", offset, size);
500
501 assert(offset + size <= 128);
502 int i = 0;
503 while (size >= 32) {
504 append(v.data[i++], 32);
505 size -= 32;
506 }
507 if (size > 0)
508 append(v.data[i] & ((1 << size) - 1), size);
509 }
510
append_endOutputBitVector511 void append_end(OutputBitVector &v, int size)
512 {
513 for (int i = 0; i < size; ++i)
514 data[(127 - i) >> 5] |= ((v.data[i >> 5] >> (i & 31)) & 1) << ((127 - i) & 31);
515 }
516
517 /* Insert the given number of '1' bits. (We could use 0s instead, but 1s are
518 * more likely to flush out bugs where we accidentally read undefined bits.)
519 */
skipOutputBitVector520 void skip(int size)
521 {
522 if (VERBOSE_WRITE)
523 printf("skip offset=%d size=%d\n", offset, size);
524
525 assert(offset + size <= 128);
526 while (size >= 32) {
527 append(0xffffffff, 32);
528 size -= 32;
529 }
530 if (size > 0)
531 append(0xffffffff >> (32 - size), size);
532 }
533 };
534
535
536 class Decoder
537 {
538 public:
Decoder(int block_w,int block_h,int block_d,bool srgb,bool output_unorm8)539 Decoder(int block_w, int block_h, int block_d, bool srgb, bool output_unorm8)
540 : block_w(block_w), block_h(block_h), block_d(block_d), srgb(srgb),
541 output_unorm8(output_unorm8) {}
542
543 decode_error::type decode(const uint8_t *in, uint16_t *output) const;
544
545 int block_w, block_h, block_d;
546 bool srgb, output_unorm8;
547 };
548
549 struct Block
550 {
551 bool is_error;
552 bool bogus_colour_endpoints;
553 bool bogus_weights;
554
555 int high_prec;
556 int dual_plane;
557 int colour_component_selector;
558 int wt_range;
559 int wt_w, wt_h, wt_d;
560 int num_parts;
561 int partition_index;
562
563 bool is_void_extent;
564 int void_extent_d;
565 int void_extent_min_s;
566 int void_extent_max_s;
567 int void_extent_min_t;
568 int void_extent_max_t;
569 uint16_t void_extent_colour_r;
570 uint16_t void_extent_colour_g;
571 uint16_t void_extent_colour_b;
572 uint16_t void_extent_colour_a;
573
574 bool is_multi_cem;
575 int num_extra_cem_bits;
576 int colour_endpoint_data_offset;
577 int extra_cem_bits;
578 int cem_base_class;
579 int cems[4];
580
581 int num_cem_values;
582
583 /* Calculated by unpack_weights(): */
584 uint8_t weights_quant[64 + 4]; /* max 64 values, plus padding for overflows in trit parsing */
585
586 /* Calculated by unquantise_weights(): */
587 uint8_t weights[64 + 18]; /* max 64 values, plus padding for the infill interpolation */
588
589 /* Calculated by unpack_colour_endpoints(): */
590 uint8_t colour_endpoints_quant[18 + 4]; /* max 18 values, plus padding for overflows in trit parsing */
591
592 /* Calculated by unquantise_colour_endpoints(): */
593 uint8_t colour_endpoints[18];
594
595 /* Calculated by calculate_from_weights(): */
596 int wt_trits;
597 int wt_quints;
598 int wt_bits;
599 int wt_max;
600 int num_weights;
601 int weight_bits;
602
603 /* Calculated by calculate_remaining_bits(): */
604 int remaining_bits;
605
606 /* Calculated by calculate_colour_endpoints_size(): */
607 int colour_endpoint_bits;
608 int ce_max;
609 int ce_trits;
610 int ce_quints;
611 int ce_bits;
612
613 /* Calculated by compute_infill_weights(); */
614 uint8_t infill_weights[2][216]; /* large enough for 6x6x6 */
615
616 /* Calculated by decode_colour_endpoints(); */
617 uint8x4_t endpoints_decoded[2][4];
618
619 void calculate_from_weights();
620 void calculate_remaining_bits();
621 decode_error::type calculate_colour_endpoints_size();
622
623 void unquantise_weights();
624 void unquantise_colour_endpoints();
625
626 decode_error::type decode(const Decoder &decoder, InputBitVector in);
627
628 decode_error::type decode_block_mode(InputBitVector in);
629 decode_error::type decode_void_extent(InputBitVector in);
630 void decode_cem(InputBitVector in);
631 void unpack_colour_endpoints(InputBitVector in);
632 void decode_colour_endpoints();
633 void unpack_weights(InputBitVector in);
634 void compute_infill_weights(int block_w, int block_h, int block_d);
635
636 void write_decoded(const Decoder &decoder, uint16_t *output);
637 };
638
639
decode(const uint8_t * in,uint16_t * output) const640 decode_error::type Decoder::decode(const uint8_t *in, uint16_t *output) const
641 {
642 Block blk;
643 InputBitVector in_vec;
644 memcpy(&in_vec.data, in, 16);
645 decode_error::type err = blk.decode(*this, in_vec);
646 if (err == decode_error::ok) {
647 blk.write_decoded(*this, output);
648 } else {
649 /* Fill output with the error colour */
650 for (int i = 0; i < block_w * block_h * block_d; ++i) {
651 if (output_unorm8) {
652 output[i*4+0] = 0xff;
653 output[i*4+1] = 0;
654 output[i*4+2] = 0xff;
655 output[i*4+3] = 0xff;
656 } else {
657 assert(!srgb); /* srgb must use unorm8 */
658
659 output[i*4+0] = FP16_ONE;
660 output[i*4+1] = FP16_ZERO;
661 output[i*4+2] = FP16_ONE;
662 output[i*4+3] = FP16_ONE;
663 }
664 }
665 }
666 return err;
667 }
668
669
decode_void_extent(InputBitVector block)670 decode_error::type Block::decode_void_extent(InputBitVector block)
671 {
672 /* TODO: 3D */
673
674 is_void_extent = true;
675 void_extent_d = block.get_bits(9, 1);
676 void_extent_min_s = block.get_bits(12, 13);
677 void_extent_max_s = block.get_bits(25, 13);
678 void_extent_min_t = block.get_bits(38, 13);
679 void_extent_max_t = block.get_bits(51, 13);
680 void_extent_colour_r = block.get_bits(64, 16);
681 void_extent_colour_g = block.get_bits(80, 16);
682 void_extent_colour_b = block.get_bits(96, 16);
683 void_extent_colour_a = block.get_bits(112, 16);
684
685 /* TODO: maybe we should do something useful with the extent coordinates? */
686
687 if (void_extent_d) {
688 return decode_error::unsupported_hdr_void_extent;
689 }
690
691 if (void_extent_min_s == 0x1fff && void_extent_max_s == 0x1fff
692 && void_extent_min_t == 0x1fff && void_extent_max_t == 0x1fff) {
693
694 /* No extents */
695
696 } else {
697
698 /* Check for illegal encoding */
699 if (void_extent_min_s >= void_extent_max_s || void_extent_min_t >= void_extent_max_t) {
700 return decode_error::invalid_range_in_void_extent;
701 }
702 }
703
704 return decode_error::ok;
705 }
706
decode_block_mode(InputBitVector in)707 decode_error::type Block::decode_block_mode(InputBitVector in)
708 {
709 dual_plane = in.get_bits(10, 1);
710 high_prec = in.get_bits(9, 1);
711
712 if (in.get_bits(0, 2) != 0x0) {
713 wt_range = (in.get_bits(0, 2) << 1) | in.get_bits(4, 1);
714 int a = in.get_bits(5, 2);
715 int b = in.get_bits(7, 2);
716 switch (in.get_bits(2, 2)) {
717 case 0x0:
718 if (VERBOSE_DECODE)
719 in.printf_bits(0, 11, "DHBBAAR00RR");
720 wt_w = b + 4;
721 wt_h = a + 2;
722 break;
723 case 0x1:
724 if (VERBOSE_DECODE)
725 in.printf_bits(0, 11, "DHBBAAR01RR");
726 wt_w = b + 8;
727 wt_h = a + 2;
728 break;
729 case 0x2:
730 if (VERBOSE_DECODE)
731 in.printf_bits(0, 11, "DHBBAAR10RR");
732 wt_w = a + 2;
733 wt_h = b + 8;
734 break;
735 case 0x3:
736 if ((b & 0x2) == 0) {
737 if (VERBOSE_DECODE)
738 in.printf_bits(0, 11, "DH0BAAR11RR");
739 wt_w = a + 2;
740 wt_h = b + 6;
741 } else {
742 if (VERBOSE_DECODE)
743 in.printf_bits(0, 11, "DH1BAAR11RR");
744 wt_w = (b & 0x1) + 2;
745 wt_h = a + 2;
746 }
747 break;
748 }
749 } else {
750 if (in.get_bits(6, 3) == 0x7) {
751 if (in.get_bits(0, 9) == 0x1fc) {
752 if (VERBOSE_DECODE)
753 in.printf_bits(0, 11, "xx111111100 (void extent)");
754 return decode_void_extent(in);
755 } else {
756 if (VERBOSE_DECODE)
757 in.printf_bits(0, 11, "xx111xxxx00");
758 return decode_error::reserved_block_mode_1;
759 }
760 }
761 if (in.get_bits(0, 4) == 0x0) {
762 if (VERBOSE_DECODE)
763 in.printf_bits(0, 11, "xxxxxxx0000");
764 return decode_error::reserved_block_mode_2;
765 }
766
767 wt_range = in.get_bits(1, 3) | in.get_bits(4, 1);
768 int a = in.get_bits(5, 2);
769 int b;
770
771 switch (in.get_bits(7, 2)) {
772 case 0x0:
773 if (VERBOSE_DECODE)
774 in.printf_bits(0, 11, "DH00AARRR00");
775 wt_w = 12;
776 wt_h = a + 2;
777 break;
778 case 0x1:
779 if (VERBOSE_DECODE)
780 in.printf_bits(0, 11, "DH01AARRR00");
781 wt_w = a + 2;
782 wt_h = 12;
783 break;
784 case 0x3:
785 if (in.get_bits(5, 1) == 0) {
786 if (VERBOSE_DECODE)
787 in.printf_bits(0, 11, "DH1100RRR00");
788 wt_w = 6;
789 wt_h = 10;
790 } else {
791 if (VERBOSE_DECODE)
792 in.printf_bits(0, 11, "DH1101RRR00");
793 wt_w = 10;
794 wt_h = 6;
795 }
796 break;
797 case 0x2:
798 if (VERBOSE_DECODE)
799 in.printf_bits(0, 11, "BB10AARRR00");
800 b = in.get_bits(9, 2);
801 wt_w = a + 6;
802 wt_h = b + 6;
803 dual_plane = 0;
804 high_prec = 0;
805 break;
806 }
807 }
808 return decode_error::ok;
809 }
810
decode_cem(InputBitVector in)811 void Block::decode_cem(InputBitVector in)
812 {
813 cems[0] = cems[1] = cems[2] = cems[3] = -1;
814
815 num_extra_cem_bits = 0;
816 extra_cem_bits = 0;
817
818 if (num_parts > 1) {
819
820 partition_index = in.get_bits(13, 10);
821 if (VERBOSE_DECODE)
822 in.printf_bits(13, 10, "partition ID (%d)", partition_index);
823
824 uint32_t cem = in.get_bits(23, 6);
825
826 if ((cem & 0x3) == 0x0) {
827 cem >>= 2;
828 cem_base_class = cem >> 2;
829 is_multi_cem = false;
830
831 for (int i = 0; i < num_parts; ++i)
832 cems[i] = cem;
833
834 if (VERBOSE_DECODE)
835 in.printf_bits(23, 6, "CEM (single, %d)", cem);
836 } else {
837
838 cem_base_class = (cem & 0x3) - 1;
839 is_multi_cem = true;
840
841 if (VERBOSE_DECODE)
842 in.printf_bits(23, 6, "CEM (multi, base class %d)", cem_base_class);
843
844 int offset = 128 - weight_bits;
845
846 if (num_parts == 2) {
847 if (VERBOSE_DECODE) {
848 in.printf_bits(25, 4, "M0M0 C1 C0");
849 in.printf_bits(offset - 2, 2, "M1M1");
850 }
851
852 uint32_t c0 = in.get_bits(25, 1);
853 uint32_t c1 = in.get_bits(26, 1);
854
855 extra_cem_bits = c0 + c1;
856
857 num_extra_cem_bits = 2;
858
859 uint32_t m0 = in.get_bits(27, 2);
860 uint32_t m1 = in.get_bits(offset - 2, 2);
861
862 cems[0] = ((cem_base_class + c0) << 2) | m0;
863 cems[1] = ((cem_base_class + c1) << 2) | m1;
864
865 } else if (num_parts == 3) {
866 if (VERBOSE_DECODE) {
867 in.printf_bits(25, 4, "M0 C2 C1 C0");
868 in.printf_bits(offset - 5, 5, "M2M2 M1M1 M0");
869 }
870
871 uint32_t c0 = in.get_bits(25, 1);
872 uint32_t c1 = in.get_bits(26, 1);
873 uint32_t c2 = in.get_bits(27, 1);
874
875 extra_cem_bits = c0 + c1 + c2;
876
877 num_extra_cem_bits = 5;
878
879 uint32_t m0 = in.get_bits(28, 1) | (in.get_bits(128 - weight_bits - 5, 1) << 1);
880 uint32_t m1 = in.get_bits(offset - 4, 2);
881 uint32_t m2 = in.get_bits(offset - 2, 2);
882
883 cems[0] = ((cem_base_class + c0) << 2) | m0;
884 cems[1] = ((cem_base_class + c1) << 2) | m1;
885 cems[2] = ((cem_base_class + c2) << 2) | m2;
886
887 } else if (num_parts == 4) {
888 if (VERBOSE_DECODE) {
889 in.printf_bits(25, 4, "C3 C2 C1 C0");
890 in.printf_bits(offset - 8, 8, "M3M3 M2M2 M1M1 M0M0");
891 }
892
893 uint32_t c0 = in.get_bits(25, 1);
894 uint32_t c1 = in.get_bits(26, 1);
895 uint32_t c2 = in.get_bits(27, 1);
896 uint32_t c3 = in.get_bits(28, 1);
897
898 extra_cem_bits = c0 + c1 + c2 + c3;
899
900 num_extra_cem_bits = 8;
901
902 uint32_t m0 = in.get_bits(offset - 8, 2);
903 uint32_t m1 = in.get_bits(offset - 6, 2);
904 uint32_t m2 = in.get_bits(offset - 4, 2);
905 uint32_t m3 = in.get_bits(offset - 2, 2);
906
907 cems[0] = ((cem_base_class + c0) << 2) | m0;
908 cems[1] = ((cem_base_class + c1) << 2) | m1;
909 cems[2] = ((cem_base_class + c2) << 2) | m2;
910 cems[3] = ((cem_base_class + c3) << 2) | m3;
911 } else {
912 unreachable("");
913 }
914 }
915
916 colour_endpoint_data_offset = 29;
917
918 } else {
919 uint32_t cem = in.get_bits(13, 4);
920
921 cem_base_class = cem >> 2;
922 is_multi_cem = false;
923
924 cems[0] = cem;
925
926 partition_index = -1;
927
928 if (VERBOSE_DECODE)
929 in.printf_bits(13, 4, "CEM = %d (class %d)", cem, cem_base_class);
930
931 colour_endpoint_data_offset = 17;
932 }
933 }
934
unpack_colour_endpoints(InputBitVector in)935 void Block::unpack_colour_endpoints(InputBitVector in)
936 {
937 if (ce_trits) {
938 int offset = colour_endpoint_data_offset;
939 int bits_left = colour_endpoint_bits;
940 for (int i = 0; i < num_cem_values; i += 5) {
941 int bits_to_read = MIN2(bits_left, 8 + ce_bits * 5);
942 /* If ce_trits then ce_bits <= 6, so bits_to_read <= 38 and we have to use uint64_t */
943 uint64_t raw = in.get_bits64(offset, bits_to_read);
944 unpack_trit_block(ce_bits, raw, &colour_endpoints_quant[i]);
945
946 if (VERBOSE_DECODE)
947 in.printf_bits(offset, bits_to_read,
948 "trits [%d,%d,%d,%d,%d]",
949 colour_endpoints_quant[i+0], colour_endpoints_quant[i+1],
950 colour_endpoints_quant[i+2], colour_endpoints_quant[i+3],
951 colour_endpoints_quant[i+4]);
952
953 offset += 8 + ce_bits * 5;
954 bits_left -= 8 + ce_bits * 5;
955 }
956 } else if (ce_quints) {
957 int offset = colour_endpoint_data_offset;
958 int bits_left = colour_endpoint_bits;
959 for (int i = 0; i < num_cem_values; i += 3) {
960 int bits_to_read = MIN2(bits_left, 7 + ce_bits * 3);
961 /* If ce_quints then ce_bits <= 5, so bits_to_read <= 22 and we can use uint32_t */
962 uint32_t raw = in.get_bits(offset, bits_to_read);
963 unpack_quint_block(ce_bits, raw, &colour_endpoints_quant[i]);
964
965 if (VERBOSE_DECODE)
966 in.printf_bits(offset, bits_to_read,
967 "quints [%d,%d,%d]",
968 colour_endpoints_quant[i], colour_endpoints_quant[i+1], colour_endpoints_quant[i+2]);
969
970 offset += 7 + ce_bits * 3;
971 bits_left -= 7 + ce_bits * 3;
972 }
973 } else {
974 assert((colour_endpoint_bits % ce_bits) == 0);
975 int offset = colour_endpoint_data_offset;
976 for (int i = 0; i < num_cem_values; i++) {
977 colour_endpoints_quant[i] = in.get_bits(offset, ce_bits);
978
979 if (VERBOSE_DECODE)
980 in.printf_bits(offset, ce_bits, "bits [%d]", colour_endpoints_quant[i]);
981
982 offset += ce_bits;
983 }
984 }
985 }
986
decode_colour_endpoints()987 void Block::decode_colour_endpoints()
988 {
989 int cem_values_idx = 0;
990 for (int part = 0; part < num_parts; ++part) {
991 uint8_t *v = &colour_endpoints[cem_values_idx];
992 int v0 = v[0];
993 int v1 = v[1];
994 int v2 = v[2];
995 int v3 = v[3];
996 int v4 = v[4];
997 int v5 = v[5];
998 int v6 = v[6];
999 int v7 = v[7];
1000 cem_values_idx += ((cems[part] >> 2) + 1) * 2;
1001
1002 uint8x4_t e0, e1;
1003 int s0, s1, L0, L1;
1004
1005 switch (cems[part])
1006 {
1007 case 0:
1008 e0 = uint8x4_t(v0, v0, v0, 0xff);
1009 e1 = uint8x4_t(v1, v1, v1, 0xff);
1010 break;
1011 case 1:
1012 L0 = (v0 >> 2) | (v1 & 0xc0);
1013 L1 = L0 + (v1 & 0x3f);
1014 if (L1 > 0xff)
1015 L1 = 0xff;
1016 e0 = uint8x4_t(L0, L0, L0, 0xff);
1017 e1 = uint8x4_t(L1, L1, L1, 0xff);
1018 break;
1019 case 4:
1020 e0 = uint8x4_t(v0, v0, v0, v2);
1021 e1 = uint8x4_t(v1, v1, v1, v3);
1022 break;
1023 case 5:
1024 bit_transfer_signed(v1, v0);
1025 bit_transfer_signed(v3, v2);
1026 e0 = uint8x4_t(v0, v0, v0, v2);
1027 e1 = uint8x4_t::clamped(v0+v1, v0+v1, v0+v1, v2+v3);
1028 break;
1029 case 6:
1030 e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, 0xff);
1031 e1 = uint8x4_t(v0, v1, v2, 0xff);
1032 break;
1033 case 8:
1034 s0 = v0 + v2 + v4;
1035 s1 = v1 + v3 + v5;
1036 if (s1 >= s0) {
1037 e0 = uint8x4_t(v0, v2, v4, 0xff);
1038 e1 = uint8x4_t(v1, v3, v5, 0xff);
1039 } else {
1040 e0 = blue_contract(v1, v3, v5, 0xff);
1041 e1 = blue_contract(v0, v2, v4, 0xff);
1042 }
1043 break;
1044 case 9:
1045 bit_transfer_signed(v1, v0);
1046 bit_transfer_signed(v3, v2);
1047 bit_transfer_signed(v5, v4);
1048 if (v1 + v3 + v5 >= 0) {
1049 e0 = uint8x4_t(v0, v2, v4, 0xff);
1050 e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, 0xff);
1051 } else {
1052 e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, 0xff);
1053 e1 = blue_contract(v0, v2, v4, 0xff);
1054 }
1055 break;
1056 case 10:
1057 e0 = uint8x4_t(v0*v3 >> 8, v1*v3 >> 8, v2*v3 >> 8, v4);
1058 e1 = uint8x4_t(v0, v1, v2, v5);
1059 break;
1060 case 12:
1061 s0 = v0 + v2 + v4;
1062 s1 = v1 + v3 + v5;
1063 if (s1 >= s0) {
1064 e0 = uint8x4_t(v0, v2, v4, v6);
1065 e1 = uint8x4_t(v1, v3, v5, v7);
1066 } else {
1067 e0 = blue_contract(v1, v3, v5, v7);
1068 e1 = blue_contract(v0, v2, v4, v6);
1069 }
1070 break;
1071 case 13:
1072 bit_transfer_signed(v1, v0);
1073 bit_transfer_signed(v3, v2);
1074 bit_transfer_signed(v5, v4);
1075 bit_transfer_signed(v7, v6);
1076 if (v1 + v3 + v5 >= 0) {
1077 e0 = uint8x4_t(v0, v2, v4, v6);
1078 e1 = uint8x4_t::clamped(v0+v1, v2+v3, v4+v5, v6+v7);
1079 } else {
1080 e0 = blue_contract_clamped(v0+v1, v2+v3, v4+v5, v6+v7);
1081 e1 = blue_contract(v0, v2, v4, v6);
1082 }
1083 break;
1084 default:
1085 /* HDR endpoints not supported; return error colour */
1086 e0 = uint8x4_t(255, 0, 255, 255);
1087 e1 = uint8x4_t(255, 0, 255, 255);
1088 break;
1089 }
1090
1091 endpoints_decoded[0][part] = e0;
1092 endpoints_decoded[1][part] = e1;
1093
1094 if (VERBOSE_DECODE) {
1095 printf("cems[%d]=%d v=[", part, cems[part]);
1096 for (int i = 0; i < (cems[part] >> 2) + 1; ++i) {
1097 if (i)
1098 printf(", ");
1099 printf("%3d", v[i]);
1100 }
1101 printf("] e0=[%3d,%4d,%4d,%4d] e1=[%3d,%4d,%4d,%4d]\n",
1102 e0.v[0], e0.v[1], e0.v[2], e0.v[3],
1103 e1.v[0], e1.v[1], e1.v[2], e1.v[3]);
1104 }
1105 }
1106 }
1107
unpack_weights(InputBitVector in)1108 void Block::unpack_weights(InputBitVector in)
1109 {
1110 if (wt_trits) {
1111 int offset = 128;
1112 int bits_left = weight_bits;
1113 for (int i = 0; i < num_weights; i += 5) {
1114 int bits_to_read = MIN2(bits_left, 8 + 5*wt_bits);
1115 /* If wt_trits then wt_bits <= 3, so bits_to_read <= 23 and we can use uint32_t */
1116 uint32_t raw = in.get_bits_rev(offset, bits_to_read);
1117 unpack_trit_block(wt_bits, raw, &weights_quant[i]);
1118
1119 if (VERBOSE_DECODE)
1120 in.printf_bits(offset - bits_to_read, bits_to_read, "weight trits [%d,%d,%d,%d,%d]",
1121 weights_quant[i+0], weights_quant[i+1],
1122 weights_quant[i+2], weights_quant[i+3],
1123 weights_quant[i+4]);
1124
1125 offset -= 8 + wt_bits * 5;
1126 bits_left -= 8 + wt_bits * 5;
1127 }
1128
1129 } else if (wt_quints) {
1130
1131 int offset = 128;
1132 int bits_left = weight_bits;
1133 for (int i = 0; i < num_weights; i += 3) {
1134 int bits_to_read = MIN2(bits_left, 7 + 3*wt_bits);
1135 /* If wt_quints then wt_bits <= 2, so bits_to_read <= 13 and we can use uint32_t */
1136 uint32_t raw = in.get_bits_rev(offset, bits_to_read);
1137 unpack_quint_block(wt_bits, raw, &weights_quant[i]);
1138
1139 if (VERBOSE_DECODE)
1140 in.printf_bits(offset - bits_to_read, bits_to_read, "weight quints [%d,%d,%d]",
1141 weights_quant[i], weights_quant[i+1], weights_quant[i+2]);
1142
1143 offset -= 7 + wt_bits * 3;
1144 bits_left -= 7 + wt_bits * 3;
1145 }
1146
1147 } else {
1148 int offset = 128;
1149 assert((weight_bits % wt_bits) == 0);
1150 for (int i = 0; i < num_weights; ++i) {
1151 weights_quant[i] = in.get_bits_rev(offset, wt_bits);
1152
1153 if (VERBOSE_DECODE)
1154 in.printf_bits(offset - wt_bits, wt_bits, "weight bits [%d]", weights_quant[i]);
1155
1156 offset -= wt_bits;
1157 }
1158 }
1159 }
1160
unquantise_weights()1161 void Block::unquantise_weights()
1162 {
1163 assert(num_weights <= (int)ARRAY_SIZE(weights_quant));
1164 assert(num_weights <= (int)ARRAY_SIZE(weights));
1165
1166 memset(weights, 0, sizeof(weights));
1167
1168 for (int i = 0; i < num_weights; ++i) {
1169
1170 uint8_t v = weights_quant[i];
1171 uint8_t w;
1172
1173 if (wt_trits) {
1174
1175 if (wt_bits == 0) {
1176 w = v * 32;
1177 } else {
1178 uint8_t A, B, C, D;
1179 A = (v & 0x1) ? 0x7F : 0x00;
1180 switch (wt_bits) {
1181 case 1:
1182 B = 0;
1183 C = 50;
1184 D = v >> 1;
1185 break;
1186 case 2:
1187 B = (v & 0x2) ? 0x45 : 0x00;
1188 C = 23;
1189 D = v >> 2;
1190 break;
1191 case 3:
1192 B = ((v & 0x6) >> 1) | ((v & 0x6) << 4);
1193 C = 11;
1194 D = v >> 3;
1195 break;
1196 default:
1197 unreachable("");
1198 }
1199 uint16_t T = D * C + B;
1200 T = T ^ A;
1201 T = (A & 0x20) | (T >> 2);
1202 assert(T < 64);
1203 if (T > 32)
1204 T++;
1205 w = T;
1206 }
1207
1208 } else if (wt_quints) {
1209
1210 if (wt_bits == 0) {
1211 w = v * 16;
1212 } else {
1213 uint8_t A, B, C, D;
1214 A = (v & 0x1) ? 0x7F : 0x00;
1215 switch (wt_bits) {
1216 case 1:
1217 B = 0;
1218 C = 28;
1219 D = v >> 1;
1220 break;
1221 case 2:
1222 B = (v & 0x2) ? 0x42 : 0x00;
1223 C = 13;
1224 D = v >> 2;
1225 break;
1226 default:
1227 unreachable("");
1228 }
1229 uint16_t T = D * C + B;
1230 T = T ^ A;
1231 T = (A & 0x20) | (T >> 2);
1232 assert(T < 64);
1233 if (T > 32)
1234 T++;
1235 w = T;
1236 }
1237 weights[i] = w;
1238
1239 } else {
1240
1241 switch (wt_bits) {
1242 case 1: w = v ? 0x3F : 0x00; break;
1243 case 2: w = v | (v << 2) | (v << 4); break;
1244 case 3: w = v | (v << 3); break;
1245 case 4: w = (v >> 2) | (v << 2); break;
1246 case 5: w = (v >> 4) | (v << 1); break;
1247 default: unreachable("");
1248 }
1249 assert(w < 64);
1250 if (w > 32)
1251 w++;
1252 }
1253 weights[i] = w;
1254 }
1255 }
1256
compute_infill_weights(int block_w,int block_h,int block_d)1257 void Block::compute_infill_weights(int block_w, int block_h, int block_d)
1258 {
1259 int Ds = block_w <= 1 ? 0 : (1024 + block_w / 2) / (block_w - 1);
1260 int Dt = block_h <= 1 ? 0 : (1024 + block_h / 2) / (block_h - 1);
1261 int Dr = block_d <= 1 ? 0 : (1024 + block_d / 2) / (block_d - 1);
1262 for (int r = 0; r < block_d; ++r) {
1263 for (int t = 0; t < block_h; ++t) {
1264 for (int s = 0; s < block_w; ++s) {
1265 int cs = Ds * s;
1266 int ct = Dt * t;
1267 int cr = Dr * r;
1268 int gs = (cs * (wt_w - 1) + 32) >> 6;
1269 int gt = (ct * (wt_h - 1) + 32) >> 6;
1270 int gr = (cr * (wt_d - 1) + 32) >> 6;
1271 assert(gs >= 0 && gs <= 176);
1272 assert(gt >= 0 && gt <= 176);
1273 assert(gr >= 0 && gr <= 176);
1274 int js = gs >> 4;
1275 int fs = gs & 0xf;
1276 int jt = gt >> 4;
1277 int ft = gt & 0xf;
1278 int jr = gr >> 4;
1279 int fr = gr & 0xf;
1280
1281 /* TODO: 3D */
1282 (void)jr;
1283 (void)fr;
1284
1285 int w11 = (fs * ft + 8) >> 4;
1286 int w10 = ft - w11;
1287 int w01 = fs - w11;
1288 int w00 = 16 - fs - ft + w11;
1289
1290 if (dual_plane) {
1291 int p00, p01, p10, p11, i0, i1;
1292 int v0 = js + jt * wt_w;
1293 p00 = weights[(v0) * 2];
1294 p01 = weights[(v0 + 1) * 2];
1295 p10 = weights[(v0 + wt_w) * 2];
1296 p11 = weights[(v0 + wt_w + 1) * 2];
1297 i0 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1298 p00 = weights[(v0) * 2 + 1];
1299 p01 = weights[(v0 + 1) * 2 + 1];
1300 p10 = weights[(v0 + wt_w) * 2 + 1];
1301 p11 = weights[(v0 + wt_w + 1) * 2 + 1];
1302 assert((v0 + wt_w + 1) * 2 + 1 < (int)ARRAY_SIZE(weights));
1303 i1 = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1304 assert(0 <= i0 && i0 <= 64);
1305 infill_weights[0][s + t*block_w + r*block_w*block_h] = i0;
1306 infill_weights[1][s + t*block_w + r*block_w*block_h] = i1;
1307 } else {
1308 int p00, p01, p10, p11, i;
1309 int v0 = js + jt * wt_w;
1310 p00 = weights[v0];
1311 p01 = weights[v0 + 1];
1312 p10 = weights[v0 + wt_w];
1313 p11 = weights[v0 + wt_w + 1];
1314 assert(v0 + wt_w + 1 < (int)ARRAY_SIZE(weights));
1315 i = (p00*w00 + p01*w01 + p10*w10 + p11*w11 + 8) >> 4;
1316 assert(0 <= i && i <= 64);
1317 infill_weights[0][s + t*block_w + r*block_w*block_h] = i;
1318 }
1319 }
1320 }
1321 }
1322 }
1323
unquantise_colour_endpoints()1324 void Block::unquantise_colour_endpoints()
1325 {
1326 assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints_quant));
1327 assert(num_cem_values <= (int)ARRAY_SIZE(colour_endpoints));
1328
1329 for (int i = 0; i < num_cem_values; ++i) {
1330 uint8_t v = colour_endpoints_quant[i];
1331
1332 if (ce_trits) {
1333 uint16_t A, B, C, D;
1334 uint16_t t;
1335 A = (v & 0x1) ? 0x1FF : 0x000;
1336 switch (ce_bits) {
1337 case 1:
1338 B = 0;
1339 C = 204;
1340 D = v >> 1;
1341 break;
1342 case 2:
1343 B = (v & 0x2) ? 0x116 : 0x000;
1344 C = 93;
1345 D = v >> 2;
1346 break;
1347 case 3:
1348 t = ((v >> 1) & 0x3);
1349 B = t | (t << 2) | (t << 7);
1350 C = 44;
1351 D = v >> 3;
1352 break;
1353 case 4:
1354 t = ((v >> 1) & 0x7);
1355 B = t | (t << 6);
1356 C = 22;
1357 D = v >> 4;
1358 break;
1359 case 5:
1360 t = ((v >> 1) & 0xF);
1361 B = (t >> 2) | (t << 5);
1362 C = 11;
1363 D = v >> 5;
1364 break;
1365 case 6:
1366 B = ((v & 0x3E) << 3) | ((v >> 5) & 0x1);
1367 C = 5;
1368 D = v >> 6;
1369 break;
1370 default:
1371 unreachable("");
1372 }
1373 uint16_t T = D * C + B;
1374 T = T ^ A;
1375 T = (A & 0x80) | (T >> 2);
1376 assert(T < 256);
1377 colour_endpoints[i] = T;
1378 } else if (ce_quints) {
1379 uint16_t A, B, C, D;
1380 uint16_t t;
1381 A = (v & 0x1) ? 0x1FF : 0x000;
1382 switch (ce_bits) {
1383 case 1:
1384 B = 0;
1385 C = 113;
1386 D = v >> 1;
1387 break;
1388 case 2:
1389 B = (v & 0x2) ? 0x10C : 0x000;
1390 C = 54;
1391 D = v >> 2;
1392 break;
1393 case 3:
1394 t = ((v >> 1) & 0x3);
1395 B = (t >> 1) | (t << 1) | (t << 7);
1396 C = 26;
1397 D = v >> 3;
1398 break;
1399 case 4:
1400 t = ((v >> 1) & 0x7);
1401 B = (t >> 1) | (t << 6);
1402 C = 13;
1403 D = v >> 4;
1404 break;
1405 case 5:
1406 t = ((v >> 1) & 0xF);
1407 B = (t >> 4) | (t << 5);
1408 C = 6;
1409 D = v >> 5;
1410 break;
1411 default:
1412 unreachable("");
1413 }
1414 uint16_t T = D * C + B;
1415 T = T ^ A;
1416 T = (A & 0x80) | (T >> 2);
1417 assert(T < 256);
1418 colour_endpoints[i] = T;
1419 } else {
1420 switch (ce_bits) {
1421 case 1: v = v ? 0xFF : 0x00; break;
1422 case 2: v = (v << 6) | (v << 4) | (v << 2) | v; break;
1423 case 3: v = (v << 5) | (v << 2) | (v >> 1); break;
1424 case 4: v = (v << 4) | v; break;
1425 case 5: v = (v << 3) | (v >> 2); break;
1426 case 6: v = (v << 2) | (v >> 4); break;
1427 case 7: v = (v << 1) | (v >> 6); break;
1428 case 8: break;
1429 default: unreachable("");
1430 }
1431 colour_endpoints[i] = v;
1432 }
1433 }
1434 }
1435
decode(const Decoder & decoder,InputBitVector in)1436 decode_error::type Block::decode(const Decoder &decoder, InputBitVector in)
1437 {
1438 decode_error::type err;
1439
1440 is_error = false;
1441 bogus_colour_endpoints = false;
1442 bogus_weights = false;
1443 is_void_extent = false;
1444
1445 wt_d = 1;
1446 /* TODO: 3D */
1447
1448 /* TODO: test for all the illegal encodings */
1449
1450 if (VERBOSE_DECODE)
1451 in.printf_bits(0, 128);
1452
1453 err = decode_block_mode(in);
1454 if (err != decode_error::ok)
1455 return err;
1456
1457 if (is_void_extent)
1458 return decode_error::ok;
1459
1460 /* TODO: 3D */
1461
1462 calculate_from_weights();
1463
1464 if (VERBOSE_DECODE)
1465 printf("weights_grid=%dx%dx%d dual_plane=%d num_weights=%d high_prec=%d r=%d range=0..%d (%dt %dq %db) weight_bits=%d\n",
1466 wt_w, wt_h, wt_d, dual_plane, num_weights, high_prec, wt_range, wt_max, wt_trits, wt_quints, wt_bits, weight_bits);
1467
1468 if (wt_w > decoder.block_w || wt_h > decoder.block_h || wt_d > decoder.block_d)
1469 return decode_error::weight_grid_exceeds_block_size;
1470
1471 num_parts = in.get_bits(11, 2) + 1;
1472
1473 if (VERBOSE_DECODE)
1474 in.printf_bits(11, 2, "partitions = %d", num_parts);
1475
1476 if (dual_plane && num_parts > 3)
1477 return decode_error::dual_plane_and_too_many_partitions;
1478
1479 decode_cem(in);
1480
1481 if (VERBOSE_DECODE)
1482 printf("cem=[%d,%d,%d,%d] base_cem_class=%d\n", cems[0], cems[1], cems[2], cems[3], cem_base_class);
1483
1484 int num_cem_pairs = (cem_base_class + 1) * num_parts + extra_cem_bits;
1485 num_cem_values = num_cem_pairs * 2;
1486
1487 calculate_remaining_bits();
1488 err = calculate_colour_endpoints_size();
1489 if (err != decode_error::ok)
1490 return err;
1491
1492 if (VERBOSE_DECODE)
1493 in.printf_bits(colour_endpoint_data_offset, colour_endpoint_bits,
1494 "endpoint data (%d bits, %d vals, %dt %dq %db)",
1495 colour_endpoint_bits, num_cem_values, ce_trits, ce_quints, ce_bits);
1496
1497 unpack_colour_endpoints(in);
1498
1499 if (VERBOSE_DECODE) {
1500 printf("cem values raw =[");
1501 for (int i = 0; i < num_cem_values; i++) {
1502 if (i)
1503 printf(", ");
1504 printf("%3d", colour_endpoints_quant[i]);
1505 }
1506 printf("]\n");
1507 }
1508
1509 if (num_cem_values > 18)
1510 return decode_error::invalid_colour_endpoints_count;
1511
1512 unquantise_colour_endpoints();
1513
1514 if (VERBOSE_DECODE) {
1515 printf("cem values norm=[");
1516 for (int i = 0; i < num_cem_values; i++) {
1517 if (i)
1518 printf(", ");
1519 printf("%3d", colour_endpoints[i]);
1520 }
1521 printf("]\n");
1522 }
1523
1524 decode_colour_endpoints();
1525
1526 if (dual_plane) {
1527 int ccs_offset = 128 - weight_bits - num_extra_cem_bits - 2;
1528 colour_component_selector = in.get_bits(ccs_offset, 2);
1529
1530 if (VERBOSE_DECODE)
1531 in.printf_bits(ccs_offset, 2, "colour component selector = %d", colour_component_selector);
1532 } else {
1533 colour_component_selector = 0;
1534 }
1535
1536
1537 if (VERBOSE_DECODE)
1538 in.printf_bits(128 - weight_bits, weight_bits, "weights (%d bits)", weight_bits);
1539
1540 if (num_weights > 64)
1541 return decode_error::invalid_num_weights;
1542
1543 if (weight_bits < 24 || weight_bits > 96)
1544 return decode_error::invalid_weight_bits;
1545
1546 unpack_weights(in);
1547
1548 unquantise_weights();
1549
1550 if (VERBOSE_DECODE) {
1551 printf("weights=[");
1552 for (int i = 0; i < num_weights; ++i) {
1553 if (i)
1554 printf(", ");
1555 printf("%d", weights[i]);
1556 }
1557 printf("]\n");
1558
1559 for (int plane = 0; plane <= dual_plane; ++plane) {
1560 printf("weights (plane %d):\n", plane);
1561 int i = 0;
1562 (void)i;
1563
1564 for (int r = 0; r < wt_d; ++r) {
1565 for (int t = 0; t < wt_h; ++t) {
1566 for (int s = 0; s < wt_w; ++s) {
1567 printf("%3d", weights[i++ * (1 + dual_plane) + plane]);
1568 }
1569 printf("\n");
1570 }
1571 if (r < wt_d - 1)
1572 printf("\n");
1573 }
1574 }
1575 }
1576
1577 compute_infill_weights(decoder.block_w, decoder.block_h, decoder.block_d);
1578
1579 if (VERBOSE_DECODE) {
1580 for (int plane = 0; plane <= dual_plane; ++plane) {
1581 printf("infilled weights (plane %d):\n", plane);
1582 int i = 0;
1583 (void)i;
1584
1585 for (int r = 0; r < decoder.block_d; ++r) {
1586 for (int t = 0; t < decoder.block_h; ++t) {
1587 for (int s = 0; s < decoder.block_w; ++s) {
1588 printf("%3d", infill_weights[plane][i++]);
1589 }
1590 printf("\n");
1591 }
1592 if (r < decoder.block_d - 1)
1593 printf("\n");
1594 }
1595 }
1596 }
1597 if (VERBOSE_DECODE)
1598 printf("\n");
1599
1600 return decode_error::ok;
1601 }
1602
write_decoded(const Decoder & decoder,uint16_t * output)1603 void Block::write_decoded(const Decoder &decoder, uint16_t *output)
1604 {
1605 /* sRGB can only be stored as unorm8. */
1606 assert(!decoder.srgb || decoder.output_unorm8);
1607
1608 if (is_void_extent) {
1609 for (int idx = 0; idx < decoder.block_w*decoder.block_h*decoder.block_d; ++idx) {
1610 if (decoder.output_unorm8) {
1611 output[idx*4+0] = void_extent_colour_r >> 8;
1612 output[idx*4+1] = void_extent_colour_g >> 8;
1613 output[idx*4+2] = void_extent_colour_b >> 8;
1614 output[idx*4+3] = void_extent_colour_a >> 8;
1615 } else {
1616 /* Store the color as FP16. */
1617 output[idx*4+0] = _mesa_uint16_div_64k_to_half(void_extent_colour_r);
1618 output[idx*4+1] = _mesa_uint16_div_64k_to_half(void_extent_colour_g);
1619 output[idx*4+2] = _mesa_uint16_div_64k_to_half(void_extent_colour_b);
1620 output[idx*4+3] = _mesa_uint16_div_64k_to_half(void_extent_colour_a);
1621 }
1622 }
1623 return;
1624 }
1625
1626 int small_block = (decoder.block_w * decoder.block_h * decoder.block_d) < 31;
1627
1628 int idx = 0;
1629 for (int z = 0; z < decoder.block_d; ++z) {
1630 for (int y = 0; y < decoder.block_h; ++y) {
1631 for (int x = 0; x < decoder.block_w; ++x) {
1632
1633 int partition;
1634 if (num_parts > 1) {
1635 partition = select_partition(partition_index, x, y, z, num_parts, small_block);
1636 assert(partition < num_parts);
1637 } else {
1638 partition = 0;
1639 }
1640
1641 /* TODO: HDR */
1642
1643 uint8x4_t e0 = endpoints_decoded[0][partition];
1644 uint8x4_t e1 = endpoints_decoded[1][partition];
1645 uint16_t c0[4], c1[4];
1646
1647 /* Expand to 16 bits. */
1648 if (decoder.srgb) {
1649 c0[0] = (uint16_t)((e0.v[0] << 8) | 0x80);
1650 c0[1] = (uint16_t)((e0.v[1] << 8) | 0x80);
1651 c0[2] = (uint16_t)((e0.v[2] << 8) | 0x80);
1652 c0[3] = (uint16_t)((e0.v[3] << 8) | 0x80);
1653
1654 c1[0] = (uint16_t)((e1.v[0] << 8) | 0x80);
1655 c1[1] = (uint16_t)((e1.v[1] << 8) | 0x80);
1656 c1[2] = (uint16_t)((e1.v[2] << 8) | 0x80);
1657 c1[3] = (uint16_t)((e1.v[3] << 8) | 0x80);
1658 } else {
1659 c0[0] = (uint16_t)((e0.v[0] << 8) | e0.v[0]);
1660 c0[1] = (uint16_t)((e0.v[1] << 8) | e0.v[1]);
1661 c0[2] = (uint16_t)((e0.v[2] << 8) | e0.v[2]);
1662 c0[3] = (uint16_t)((e0.v[3] << 8) | e0.v[3]);
1663
1664 c1[0] = (uint16_t)((e1.v[0] << 8) | e1.v[0]);
1665 c1[1] = (uint16_t)((e1.v[1] << 8) | e1.v[1]);
1666 c1[2] = (uint16_t)((e1.v[2] << 8) | e1.v[2]);
1667 c1[3] = (uint16_t)((e1.v[3] << 8) | e1.v[3]);
1668 }
1669
1670 int w[4];
1671 if (dual_plane) {
1672 int w0 = infill_weights[0][idx];
1673 int w1 = infill_weights[1][idx];
1674 w[0] = w[1] = w[2] = w[3] = w0;
1675 w[colour_component_selector] = w1;
1676 } else {
1677 int w0 = infill_weights[0][idx];
1678 w[0] = w[1] = w[2] = w[3] = w0;
1679 }
1680
1681 /* Interpolate to produce UNORM16, applying weights. */
1682 uint16_t c[4] = {
1683 (uint16_t)((c0[0] * (64 - w[0]) + c1[0] * w[0] + 32) >> 6),
1684 (uint16_t)((c0[1] * (64 - w[1]) + c1[1] * w[1] + 32) >> 6),
1685 (uint16_t)((c0[2] * (64 - w[2]) + c1[2] * w[2] + 32) >> 6),
1686 (uint16_t)((c0[3] * (64 - w[3]) + c1[3] * w[3] + 32) >> 6),
1687 };
1688
1689 if (decoder.output_unorm8) {
1690 output[idx*4+0] = c[0] >> 8;
1691 output[idx*4+1] = c[1] >> 8;
1692 output[idx*4+2] = c[2] >> 8;
1693 output[idx*4+3] = c[3] >> 8;
1694 } else {
1695 /* Store the color as FP16. */
1696 output[idx*4+0] = c[0] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[0]);
1697 output[idx*4+1] = c[1] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[1]);
1698 output[idx*4+2] = c[2] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[2]);
1699 output[idx*4+3] = c[3] == 65535 ? FP16_ONE : _mesa_uint16_div_64k_to_half(c[3]);
1700 }
1701
1702 idx++;
1703 }
1704 }
1705 }
1706 }
1707
calculate_from_weights()1708 void Block::calculate_from_weights()
1709 {
1710 wt_trits = 0;
1711 wt_quints = 0;
1712 wt_bits = 0;
1713 switch (high_prec) {
1714 case 0:
1715 switch (wt_range) {
1716 case 0x2: wt_max = 1; wt_bits = 1; break;
1717 case 0x3: wt_max = 2; wt_trits = 1; break;
1718 case 0x4: wt_max = 3; wt_bits = 2; break;
1719 case 0x5: wt_max = 4; wt_quints = 1; break;
1720 case 0x6: wt_max = 5; wt_trits = 1; wt_bits = 1; break;
1721 case 0x7: wt_max = 7; wt_bits = 3; break;
1722 default: abort();
1723 }
1724 break;
1725 case 1:
1726 switch (wt_range) {
1727 case 0x2: wt_max = 9; wt_quints = 1; wt_bits = 1; break;
1728 case 0x3: wt_max = 11; wt_trits = 1; wt_bits = 2; break;
1729 case 0x4: wt_max = 15; wt_bits = 4; break;
1730 case 0x5: wt_max = 19; wt_quints = 1; wt_bits = 2; break;
1731 case 0x6: wt_max = 23; wt_trits = 1; wt_bits = 3; break;
1732 case 0x7: wt_max = 31; wt_bits = 5; break;
1733 default: abort();
1734 }
1735 break;
1736 }
1737
1738 assert(wt_trits || wt_quints || wt_bits);
1739
1740 num_weights = wt_w * wt_h * wt_d;
1741
1742 if (dual_plane)
1743 num_weights *= 2;
1744
1745 weight_bits =
1746 (num_weights * 8 * wt_trits + 4) / 5
1747 + (num_weights * 7 * wt_quints + 2) / 3
1748 + num_weights * wt_bits;
1749 }
1750
calculate_remaining_bits()1751 void Block::calculate_remaining_bits()
1752 {
1753 int config_bits;
1754 if (num_parts > 1) {
1755 if (!is_multi_cem)
1756 config_bits = 29;
1757 else
1758 config_bits = 25 + 3 * num_parts;
1759 } else {
1760 config_bits = 17;
1761 }
1762
1763 if (dual_plane)
1764 config_bits += 2;
1765
1766 remaining_bits = 128 - config_bits - weight_bits;
1767 }
1768
calculate_colour_endpoints_size()1769 decode_error::type Block::calculate_colour_endpoints_size()
1770 {
1771 /* Specified as illegal */
1772 if (remaining_bits < (13 * num_cem_values + 4) / 5) {
1773 colour_endpoint_bits = ce_max = ce_trits = ce_quints = ce_bits = 0;
1774 return decode_error::invalid_colour_endpoints_size;
1775 }
1776
1777 /* Find the largest cem_ranges that fits within remaining_bits */
1778 for (int i = ARRAY_SIZE(cem_ranges)-1; i >= 0; --i) {
1779 int cem_bits;
1780 cem_bits = (num_cem_values * 8 * cem_ranges[i].t + 4) / 5
1781 + (num_cem_values * 7 * cem_ranges[i].q + 2) / 3
1782 + num_cem_values * cem_ranges[i].b;
1783
1784 if (cem_bits <= remaining_bits)
1785 {
1786 colour_endpoint_bits = cem_bits;
1787 ce_max = cem_ranges[i].max;
1788 ce_trits = cem_ranges[i].t;
1789 ce_quints = cem_ranges[i].q;
1790 ce_bits = cem_ranges[i].b;
1791 return decode_error::ok;
1792 }
1793 }
1794
1795 assert(0);
1796 return decode_error::invalid_colour_endpoints_size;
1797 }
1798
1799 /**
1800 * Decode ASTC 2D LDR texture data.
1801 *
1802 * \param src_width in pixels
1803 * \param src_height in pixels
1804 * \param dst_stride in bytes
1805 */
1806 extern "C" void
_mesa_unpack_astc_2d_ldr(uint8_t * dst_row,unsigned dst_stride,const uint8_t * src_row,unsigned src_stride,unsigned src_width,unsigned src_height,mesa_format format)1807 _mesa_unpack_astc_2d_ldr(uint8_t *dst_row,
1808 unsigned dst_stride,
1809 const uint8_t *src_row,
1810 unsigned src_stride,
1811 unsigned src_width,
1812 unsigned src_height,
1813 mesa_format format)
1814 {
1815 assert(_mesa_is_format_astc_2d(format));
1816 bool srgb = _mesa_is_format_srgb(format);
1817
1818 unsigned blk_w, blk_h;
1819 _mesa_get_format_block_size(format, &blk_w, &blk_h);
1820
1821 const unsigned block_size = 16;
1822 unsigned x_blocks = (src_width + blk_w - 1) / blk_w;
1823 unsigned y_blocks = (src_height + blk_h - 1) / blk_h;
1824
1825 Decoder dec(blk_w, blk_h, 1, srgb, true);
1826
1827 for (unsigned y = 0; y < y_blocks; ++y) {
1828 for (unsigned x = 0; x < x_blocks; ++x) {
1829 /* Same size as the largest block. */
1830 uint16_t block_out[12 * 12 * 4];
1831
1832 dec.decode(src_row + x * block_size, block_out);
1833
1834 /* This can be smaller with NPOT dimensions. */
1835 unsigned dst_blk_w = MIN2(blk_w, src_width - x*blk_w);
1836 unsigned dst_blk_h = MIN2(blk_h, src_height - y*blk_h);
1837
1838 for (unsigned sub_y = 0; sub_y < dst_blk_h; ++sub_y) {
1839 for (unsigned sub_x = 0; sub_x < dst_blk_w; ++sub_x) {
1840 uint8_t *dst = dst_row + sub_y * dst_stride +
1841 (x * blk_w + sub_x) * 4;
1842 const uint16_t *src = &block_out[(sub_y * blk_w + sub_x) * 4];
1843
1844 dst[0] = src[0];
1845 dst[1] = src[1];
1846 dst[2] = src[2];
1847 dst[3] = src[3];
1848 }
1849 }
1850 }
1851 src_row += src_stride;
1852 dst_row += dst_stride * blk_h;
1853 }
1854 }
1855