• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 The Android Open Source Project
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// For implementation details, please refer to:
16// https://www.khronos.org/registry/OpenGL/extensions/KHR/KHR_texture_compression_astc_hdr.txt
17
18// Please refer to this document for operator precendence (slightly different from C):
19// https://www.khronos.org/registry/OpenGL/specs/gl/GLSLangSpec.4.60.html#operators
20
21#version 450
22
23precision highp int;
24
25layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
26
27layout(push_constant) uniform ImageFormatBlock {
28    uvec2 blockSize;
29    uint compFormat;
30    uint baseLayer;
31    uint sRGB;
32    uint smallBlock;
33}
34u_pushConstant;
35
36layout(binding = 0, rgba32ui) readonly uniform uimage${type} u_image0;
37layout(binding = 1, rgba8ui) writeonly uniform uimage${type} u_image1;
38
39${UnquantTables}
40
41// HDR CEM: 2, 3, 7, 11, 14, 15
42
43const bool kHDRCEM[16] = {
44        false, false, true,  true, false, false, false, true,
45        false, false, false, true, false, false, true,  true,
46};
47
48// Encoding table for C.2.12
49
50const uint kTritEncodings[256][5] = {
51        {0, 0, 0, 0, 0}, {1, 0, 0, 0, 0}, {2, 0, 0, 0, 0}, {0, 0, 2, 0, 0},
52        {0, 1, 0, 0, 0}, {1, 1, 0, 0, 0}, {2, 1, 0, 0, 0}, {1, 0, 2, 0, 0},
53        {0, 2, 0, 0, 0}, {1, 2, 0, 0, 0}, {2, 2, 0, 0, 0}, {2, 0, 2, 0, 0},
54        {0, 2, 2, 0, 0}, {1, 2, 2, 0, 0}, {2, 2, 2, 0, 0}, {2, 0, 2, 0, 0},
55        {0, 0, 1, 0, 0}, {1, 0, 1, 0, 0}, {2, 0, 1, 0, 0}, {0, 1, 2, 0, 0},
56        {0, 1, 1, 0, 0}, {1, 1, 1, 0, 0}, {2, 1, 1, 0, 0}, {1, 1, 2, 0, 0},
57        {0, 2, 1, 0, 0}, {1, 2, 1, 0, 0}, {2, 2, 1, 0, 0}, {2, 1, 2, 0, 0},
58        {0, 0, 0, 2, 2}, {1, 0, 0, 2, 2}, {2, 0, 0, 2, 2}, {0, 0, 2, 2, 2},
59        {0, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {2, 0, 0, 1, 0}, {0, 0, 2, 1, 0},
60        {0, 1, 0, 1, 0}, {1, 1, 0, 1, 0}, {2, 1, 0, 1, 0}, {1, 0, 2, 1, 0},
61        {0, 2, 0, 1, 0}, {1, 2, 0, 1, 0}, {2, 2, 0, 1, 0}, {2, 0, 2, 1, 0},
62        {0, 2, 2, 1, 0}, {1, 2, 2, 1, 0}, {2, 2, 2, 1, 0}, {2, 0, 2, 1, 0},
63        {0, 0, 1, 1, 0}, {1, 0, 1, 1, 0}, {2, 0, 1, 1, 0}, {0, 1, 2, 1, 0},
64        {0, 1, 1, 1, 0}, {1, 1, 1, 1, 0}, {2, 1, 1, 1, 0}, {1, 1, 2, 1, 0},
65        {0, 2, 1, 1, 0}, {1, 2, 1, 1, 0}, {2, 2, 1, 1, 0}, {2, 1, 2, 1, 0},
66        {0, 1, 0, 2, 2}, {1, 1, 0, 2, 2}, {2, 1, 0, 2, 2}, {1, 0, 2, 2, 2},
67        {0, 0, 0, 2, 0}, {1, 0, 0, 2, 0}, {2, 0, 0, 2, 0}, {0, 0, 2, 2, 0},
68        {0, 1, 0, 2, 0}, {1, 1, 0, 2, 0}, {2, 1, 0, 2, 0}, {1, 0, 2, 2, 0},
69        {0, 2, 0, 2, 0}, {1, 2, 0, 2, 0}, {2, 2, 0, 2, 0}, {2, 0, 2, 2, 0},
70        {0, 2, 2, 2, 0}, {1, 2, 2, 2, 0}, {2, 2, 2, 2, 0}, {2, 0, 2, 2, 0},
71        {0, 0, 1, 2, 0}, {1, 0, 1, 2, 0}, {2, 0, 1, 2, 0}, {0, 1, 2, 2, 0},
72        {0, 1, 1, 2, 0}, {1, 1, 1, 2, 0}, {2, 1, 1, 2, 0}, {1, 1, 2, 2, 0},
73        {0, 2, 1, 2, 0}, {1, 2, 1, 2, 0}, {2, 2, 1, 2, 0}, {2, 1, 2, 2, 0},
74        {0, 2, 0, 2, 2}, {1, 2, 0, 2, 2}, {2, 2, 0, 2, 2}, {2, 0, 2, 2, 2},
75        {0, 0, 0, 0, 2}, {1, 0, 0, 0, 2}, {2, 0, 0, 0, 2}, {0, 0, 2, 0, 2},
76        {0, 1, 0, 0, 2}, {1, 1, 0, 0, 2}, {2, 1, 0, 0, 2}, {1, 0, 2, 0, 2},
77        {0, 2, 0, 0, 2}, {1, 2, 0, 0, 2}, {2, 2, 0, 0, 2}, {2, 0, 2, 0, 2},
78        {0, 2, 2, 0, 2}, {1, 2, 2, 0, 2}, {2, 2, 2, 0, 2}, {2, 0, 2, 0, 2},
79        {0, 0, 1, 0, 2}, {1, 0, 1, 0, 2}, {2, 0, 1, 0, 2}, {0, 1, 2, 0, 2},
80        {0, 1, 1, 0, 2}, {1, 1, 1, 0, 2}, {2, 1, 1, 0, 2}, {1, 1, 2, 0, 2},
81        {0, 2, 1, 0, 2}, {1, 2, 1, 0, 2}, {2, 2, 1, 0, 2}, {2, 1, 2, 0, 2},
82        {0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 0, 2, 2, 2},
83        {0, 0, 0, 0, 1}, {1, 0, 0, 0, 1}, {2, 0, 0, 0, 1}, {0, 0, 2, 0, 1},
84        {0, 1, 0, 0, 1}, {1, 1, 0, 0, 1}, {2, 1, 0, 0, 1}, {1, 0, 2, 0, 1},
85        {0, 2, 0, 0, 1}, {1, 2, 0, 0, 1}, {2, 2, 0, 0, 1}, {2, 0, 2, 0, 1},
86        {0, 2, 2, 0, 1}, {1, 2, 2, 0, 1}, {2, 2, 2, 0, 1}, {2, 0, 2, 0, 1},
87        {0, 0, 1, 0, 1}, {1, 0, 1, 0, 1}, {2, 0, 1, 0, 1}, {0, 1, 2, 0, 1},
88        {0, 1, 1, 0, 1}, {1, 1, 1, 0, 1}, {2, 1, 1, 0, 1}, {1, 1, 2, 0, 1},
89        {0, 2, 1, 0, 1}, {1, 2, 1, 0, 1}, {2, 2, 1, 0, 1}, {2, 1, 2, 0, 1},
90        {0, 0, 1, 2, 2}, {1, 0, 1, 2, 2}, {2, 0, 1, 2, 2}, {0, 1, 2, 2, 2},
91        {0, 0, 0, 1, 1}, {1, 0, 0, 1, 1}, {2, 0, 0, 1, 1}, {0, 0, 2, 1, 1},
92        {0, 1, 0, 1, 1}, {1, 1, 0, 1, 1}, {2, 1, 0, 1, 1}, {1, 0, 2, 1, 1},
93        {0, 2, 0, 1, 1}, {1, 2, 0, 1, 1}, {2, 2, 0, 1, 1}, {2, 0, 2, 1, 1},
94        {0, 2, 2, 1, 1}, {1, 2, 2, 1, 1}, {2, 2, 2, 1, 1}, {2, 0, 2, 1, 1},
95        {0, 0, 1, 1, 1}, {1, 0, 1, 1, 1}, {2, 0, 1, 1, 1}, {0, 1, 2, 1, 1},
96        {0, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {2, 1, 1, 1, 1}, {1, 1, 2, 1, 1},
97        {0, 2, 1, 1, 1}, {1, 2, 1, 1, 1}, {2, 2, 1, 1, 1}, {2, 1, 2, 1, 1},
98        {0, 1, 1, 2, 2}, {1, 1, 1, 2, 2}, {2, 1, 1, 2, 2}, {1, 1, 2, 2, 2},
99        {0, 0, 0, 2, 1}, {1, 0, 0, 2, 1}, {2, 0, 0, 2, 1}, {0, 0, 2, 2, 1},
100        {0, 1, 0, 2, 1}, {1, 1, 0, 2, 1}, {2, 1, 0, 2, 1}, {1, 0, 2, 2, 1},
101        {0, 2, 0, 2, 1}, {1, 2, 0, 2, 1}, {2, 2, 0, 2, 1}, {2, 0, 2, 2, 1},
102        {0, 2, 2, 2, 1}, {1, 2, 2, 2, 1}, {2, 2, 2, 2, 1}, {2, 0, 2, 2, 1},
103        {0, 0, 1, 2, 1}, {1, 0, 1, 2, 1}, {2, 0, 1, 2, 1}, {0, 1, 2, 2, 1},
104        {0, 1, 1, 2, 1}, {1, 1, 1, 2, 1}, {2, 1, 1, 2, 1}, {1, 1, 2, 2, 1},
105        {0, 2, 1, 2, 1}, {1, 2, 1, 2, 1}, {2, 2, 1, 2, 1}, {2, 1, 2, 2, 1},
106        {0, 2, 1, 2, 2}, {1, 2, 1, 2, 2}, {2, 2, 1, 2, 2}, {2, 1, 2, 2, 2},
107        {0, 0, 0, 1, 2}, {1, 0, 0, 1, 2}, {2, 0, 0, 1, 2}, {0, 0, 2, 1, 2},
108        {0, 1, 0, 1, 2}, {1, 1, 0, 1, 2}, {2, 1, 0, 1, 2}, {1, 0, 2, 1, 2},
109        {0, 2, 0, 1, 2}, {1, 2, 0, 1, 2}, {2, 2, 0, 1, 2}, {2, 0, 2, 1, 2},
110        {0, 2, 2, 1, 2}, {1, 2, 2, 1, 2}, {2, 2, 2, 1, 2}, {2, 0, 2, 1, 2},
111        {0, 0, 1, 1, 2}, {1, 0, 1, 1, 2}, {2, 0, 1, 1, 2}, {0, 1, 2, 1, 2},
112        {0, 1, 1, 1, 2}, {1, 1, 1, 1, 2}, {2, 1, 1, 1, 2}, {1, 1, 2, 1, 2},
113        {0, 2, 1, 1, 2}, {1, 2, 1, 1, 2}, {2, 2, 1, 1, 2}, {2, 1, 2, 1, 2},
114        {0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 1, 2, 2, 2},
115};
116
117const uint kQuintEncodings[128][3] = {
118        {0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0}, {4, 0, 0}, {0, 4, 0},
119        {4, 4, 0}, {4, 4, 4}, {0, 1, 0}, {1, 1, 0}, {2, 1, 0}, {3, 1, 0},
120        {4, 1, 0}, {1, 4, 0}, {4, 4, 1}, {4, 4, 4}, {0, 2, 0}, {1, 2, 0},
121        {2, 2, 0}, {3, 2, 0}, {4, 2, 0}, {2, 4, 0}, {4, 4, 2}, {4, 4, 4},
122        {0, 3, 0}, {1, 3, 0}, {2, 3, 0}, {3, 3, 0}, {4, 3, 0}, {3, 4, 0},
123        {4, 4, 3}, {4, 4, 4}, {0, 0, 1}, {1, 0, 1}, {2, 0, 1}, {3, 0, 1},
124        {4, 0, 1}, {0, 4, 1}, {4, 0, 4}, {0, 4, 4}, {0, 1, 1}, {1, 1, 1},
125        {2, 1, 1}, {3, 1, 1}, {4, 1, 1}, {1, 4, 1}, {4, 1, 4}, {1, 4, 4},
126        {0, 2, 1}, {1, 2, 1}, {2, 2, 1}, {3, 2, 1}, {4, 2, 1}, {2, 4, 1},
127        {4, 2, 4}, {2, 4, 4}, {0, 3, 1}, {1, 3, 1}, {2, 3, 1}, {3, 3, 1},
128        {4, 3, 1}, {3, 4, 1}, {4, 3, 4}, {3, 4, 4}, {0, 0, 2}, {1, 0, 2},
129        {2, 0, 2}, {3, 0, 2}, {4, 0, 2}, {0, 4, 2}, {2, 0, 4}, {3, 0, 4},
130        {0, 1, 2}, {1, 1, 2}, {2, 1, 2}, {3, 1, 2}, {4, 1, 2}, {1, 4, 2},
131        {2, 1, 4}, {3, 1, 4}, {0, 2, 2}, {1, 2, 2}, {2, 2, 2}, {3, 2, 2},
132        {4, 2, 2}, {2, 4, 2}, {2, 2, 4}, {3, 2, 4}, {0, 3, 2}, {1, 3, 2},
133        {2, 3, 2}, {3, 3, 2}, {4, 3, 2}, {3, 4, 2}, {2, 3, 4}, {3, 3, 4},
134        {0, 0, 3}, {1, 0, 3}, {2, 0, 3}, {3, 0, 3}, {4, 0, 3}, {0, 4, 3},
135        {0, 0, 4}, {1, 0, 4}, {0, 1, 3}, {1, 1, 3}, {2, 1, 3}, {3, 1, 3},
136        {4, 1, 3}, {1, 4, 3}, {0, 1, 4}, {1, 1, 4}, {0, 2, 3}, {1, 2, 3},
137        {2, 2, 3}, {3, 2, 3}, {4, 2, 3}, {2, 4, 3}, {0, 2, 4}, {1, 2, 4},
138        {0, 3, 3}, {1, 3, 3}, {2, 3, 3}, {3, 3, 3}, {4, 3, 3}, {3, 4, 3},
139        {0, 3, 4}, {1, 3, 4}};
140
141const int kRQuantParamTableLength = 19;
142// T, Q, B values in Table c.2.16, including binaries, in reversed order
143const uint kRQuantParamTable[kRQuantParamTableLength][3] = {
144        {0, 0, 8},  // 255
145        {1, 0, 6},  // 191
146        {0, 1, 5},  // 159
147        {0, 0, 7},  // 127
148        {1, 0, 5},  // 95
149        {0, 1, 4},  // 79
150        {0, 0, 6},  // 63
151        {1, 0, 4},  // 47
152        {0, 1, 3},  // 39
153        {0, 0, 5},  // 31
154        {1, 0, 3},  // 23
155        {0, 1, 2},  // 19
156        {0, 0, 4},  // 15
157        {1, 0, 2},  // 11
158        {0, 1, 1},  // 9
159        {0, 0, 3},  // 7
160        {1, 0, 1},  // 5
161        //{0, 1, 0}, // 4
162        {0, 0, 2},  // 3
163        //{1, 0, 0}, // 2
164        {0, 0, 1},  // 1
165};
166
167uint bit(uint u, int bit) {
168    return (u >> bit) & 1;
169}
170
171uint bits128(uvec4 u, uint bitStart, uint bitCount) {
172    uint firstIdx = bitStart / 32;
173    uint firstOffset = bitStart % 32;
174    uint bitMask = (1 << bitCount) - 1;
175    if (firstIdx == ((bitStart + bitCount - 1) / 32)) {
176        return (u[3 - firstIdx] >> firstOffset) & bitMask;
177    } else {
178        uint firstCount = 32 - firstOffset;
179        uint ret = u[3 - firstIdx - 1] << firstCount;
180        ret |= ((u[3 - firstIdx] >> firstOffset) & ((1 << firstCount) - 1));
181        return ret & bitMask;
182    }
183}
184
185uint bits128fillZeros(uvec4 u, uint bitStart, uint bitEnd, uint bitCount) {
186    if (bitEnd <= bitStart) {
187        return 0;
188    }
189    return bits128(u, bitStart, min(bitEnd - bitStart, bitCount));
190}
191
192uint get_bit_count(uint num_vals, uint trits, uint quints, uint bits) {
193    // See section C.2.22 for the formula used here.
194    uint trit_bit_count = ((num_vals * 8 * trits) + 4) / 5;
195    uint quint_bit_count = ((num_vals * 7 * quints) + 2) / 3;
196    uint base_bit_count = num_vals * bits;
197    return trit_bit_count + quint_bit_count + base_bit_count;
198}
199
200void get_pack_size(uint trits,
201                   uint quints,
202                   uint bits,
203                   out uint pack,
204                   out uint packedSize) {
205    if (trits == 1) {
206        pack = 5;
207        packedSize = 8 + 5 * bits;
208    } else if (quints == 1) {
209        pack = 3;
210        packedSize = 7 + 3 * bits;
211    } else {
212        pack = 1;
213        packedSize = bits;
214    }
215}
216
217uint[5] decode_trit(uvec4 data, uint start, uint end, uint n) {
218    // We either have three quints or five trits
219    const int kNumVals = 5;
220    const int kInterleavedBits[5] = {2, 2, 1, 2, 1};
221
222    // Decode the block
223    uint m[kNumVals];
224    uint encoded = 0;
225    uint encoded_bits_read = 0;
226    for (int i = 0; i < kNumVals; ++i) {
227        m[i] = bits128fillZeros(data, start, end, n);
228        start += n;
229
230        uint encoded_bits =
231                bits128fillZeros(data, start, end, kInterleavedBits[i]);
232        start += kInterleavedBits[i];
233        encoded |= encoded_bits << encoded_bits_read;
234        encoded_bits_read += kInterleavedBits[i];
235    }
236
237    uint[kNumVals] result;
238    for (int i = 0; i < kNumVals; ++i) {
239        result[i] = kTritEncodings[encoded][i] << n | m[i];
240    }
241    return result;
242}
243
244uint[3] decode_quint(uvec4 data, uint start, uint end, uint n) {
245    // We either have three quints or five trits
246    const int kNumVals = 3;
247    const int kInterleavedBits[3] = {3, 2, 2};
248
249    // Decode the block
250    uint m[kNumVals];
251    uint encoded = 0;
252    uint encoded_bits_read = 0;
253    uint bitMask = (1 << n) - 1;
254    for (int i = 0; i < kNumVals; ++i) {
255        m[i] = bits128fillZeros(data, start, end, n);
256        start += n;
257
258        uint encoded_bits =
259                bits128fillZeros(data, start, end, kInterleavedBits[i]);
260        start += kInterleavedBits[i];
261        encoded |= encoded_bits << encoded_bits_read;
262        encoded_bits_read += kInterleavedBits[i];
263    }
264
265    uint[kNumVals] result;
266    for (int i = 0; i < kNumVals; ++i) {
267        result[i] = kQuintEncodings[encoded][i] << n | m[i];
268    }
269    return result;
270}
271
272uint get_v_count(uint cem) {
273    return (cem / 4 + 1) * 2;
274}
275
276const uint kLDRLumaDirect = 0;
277const uint kLDRLumaBaseOffset = 1;
278const uint kHDRLumaLargeRange = 2;
279const uint kHDRLumaSmallRange = 3;
280const uint kLDRLumaAlphaDirect = 4;
281const uint kLDRLumaAlphaBaseOffset = 5;
282const uint kLDRRGBBaseScale = 6;
283const uint kHDRRGBBaseScale = 7;
284const uint kLDRRGBDirect = 8;
285const uint kLDRRGBBaseOffset = 9;
286const uint kLDRRGBBaseScaleTwoA = 10;
287const uint kHDRRGBDirect = 11;
288const uint kLDRRGBADirect = 12;
289const uint kLDRRGBABaseOffset = 13;
290const uint kHDRRGBDirectLDRAlpha = 14;
291const uint kHDRRGBDirectHDRAlpha = 15;
292
293void swap(inout ivec4 v1, inout ivec4 v2) {
294    ivec4 tmp = v1;
295    v1 = v2;
296    v2 = tmp;
297}
298
299void bit_transfer_signed(inout int a, inout int b) {
300    b >>= 1;
301    b |= (a & 0x80);
302    a >>= 1;
303    a &= 0x3F;
304    if ((a & 0x20) != 0)
305        a -= 0x40;
306}
307
308void blue_contract(inout ivec4 val) {
309    val.r = (val.r + val.b) / 2;
310    val.g = (val.g + val.b) / 2;
311}
312
313void decode_ldr_for_mode(const uint[40] vals,
314                         uint start_idx,
315                         uint mode,
316                         out uvec4 c1,
317                         out uvec4 c2) {
318    int v0 = int(vals[start_idx + 0]);
319    int v1 = int(vals[start_idx + 1]);
320    int v2 = int(vals[start_idx + 2]);
321    int v3 = int(vals[start_idx + 3]);
322    int v4 = int(vals[start_idx + 4]);
323    int v5 = int(vals[start_idx + 5]);
324    int v6 = int(vals[start_idx + 6]);
325    int v7 = int(vals[start_idx + 7]);
326    ivec4 endpoint_low_rgba;
327    ivec4 endpoint_high_rgba;
328    switch (mode) {
329        case kLDRLumaDirect: {
330            endpoint_low_rgba = ivec4(v0, v0, v0, 255);
331            endpoint_high_rgba = ivec4(v1, v1, v1, 255);
332        } break;
333
334        case kLDRLumaBaseOffset: {
335            const int l0 = (v0 >> 2) | (v1 & 0xC0);
336            const int l1 = min(l0 + (v1 & 0x3F), 0xFF);
337
338            endpoint_low_rgba = ivec4(l0, l0, l0, 255);
339            endpoint_high_rgba = ivec4(l1, l1, l1, 255);
340        } break;
341
342        case kLDRLumaAlphaDirect: {
343            endpoint_low_rgba = ivec4(v0, v0, v0, v2);
344            endpoint_high_rgba = ivec4(v1, v1, v1, v3);
345        } break;
346
347        case kLDRLumaAlphaBaseOffset: {
348            bit_transfer_signed(v1, v0);
349            bit_transfer_signed(v3, v2);
350
351            endpoint_low_rgba = clamp(ivec4(v0, v0, v0, v2), 0, 255);
352            const int high_luma = v0 + v1;
353            endpoint_high_rgba = clamp(
354                    ivec4(high_luma, high_luma, high_luma, v2 + v3), 0, 255);
355        } break;
356
357        case kLDRRGBBaseScale: {
358            endpoint_high_rgba = ivec4(v0, v1, v2, 255);
359            for (int i = 0; i < 3; ++i) {
360                const int x = endpoint_high_rgba[i];
361                endpoint_low_rgba[i] = (x * v3) >> 8;
362            }
363            endpoint_low_rgba[3] = 255;
364        } break;
365
366        case kLDRRGBDirect: {
367            const int s0 = v0 + v2 + v4;
368            const int s1 = v1 + v3 + v5;
369
370            endpoint_low_rgba = ivec4(v0, v2, v4, 255);
371            endpoint_high_rgba = ivec4(v1, v3, v5, 255);
372
373            if (s1 < s0) {
374                swap(endpoint_low_rgba, endpoint_high_rgba);
375                blue_contract(endpoint_low_rgba);
376                blue_contract(endpoint_high_rgba);
377            }
378        } break;
379
380        case kLDRRGBBaseOffset: {
381            bit_transfer_signed(v1, v0);
382            bit_transfer_signed(v3, v2);
383            bit_transfer_signed(v5, v4);
384
385            endpoint_low_rgba = ivec4(v0, v2, v4, 255);
386            endpoint_high_rgba = ivec4(v0 + v1, v2 + v3, v4 + v5, 255);
387
388            if (v1 + v3 + v5 < 0) {
389                swap(endpoint_low_rgba, endpoint_high_rgba);
390                blue_contract(endpoint_low_rgba);
391                blue_contract(endpoint_high_rgba);
392            }
393
394            endpoint_low_rgba = clamp(endpoint_low_rgba, 0, 255);
395            endpoint_high_rgba = clamp(endpoint_high_rgba, 0, 255);
396        } break;
397
398        case kLDRRGBBaseScaleTwoA: {
399            // Base
400            endpoint_low_rgba = endpoint_high_rgba = ivec4(v0, v1, v2, 255);
401
402            // Scale
403            endpoint_low_rgba = (endpoint_low_rgba * v3) >> 8;
404
405            // Two A
406            endpoint_low_rgba[3] = v4;
407            endpoint_high_rgba[3] = v5;
408        } break;
409
410        case kLDRRGBADirect: {
411            const uint s0 = v0 + v2 + v4;
412            const uint s1 = v1 + v3 + v5;
413
414            endpoint_low_rgba = ivec4(v0, v2, v4, v6);
415            endpoint_high_rgba = ivec4(v1, v3, v5, v7);
416
417            if (s1 < s0) {
418                swap(endpoint_low_rgba, endpoint_high_rgba);
419                blue_contract(endpoint_low_rgba);
420                blue_contract(endpoint_high_rgba);
421            }
422        } break;
423
424        case kLDRRGBABaseOffset: {
425            bit_transfer_signed(v1, v0);
426            bit_transfer_signed(v3, v2);
427            bit_transfer_signed(v5, v4);
428            bit_transfer_signed(v7, v6);
429
430            endpoint_low_rgba = ivec4(v0, v2, v4, v6);
431            endpoint_high_rgba = ivec4(v0 + v1, v2 + v3, v4 + v5, v6 + v7);
432
433            if (v1 + v3 + v5 < 0) {
434                swap(endpoint_low_rgba, endpoint_high_rgba);
435                blue_contract(endpoint_low_rgba);
436                blue_contract(endpoint_high_rgba);
437            }
438
439            endpoint_low_rgba = clamp(endpoint_low_rgba, 0, 255);
440            endpoint_high_rgba = clamp(endpoint_high_rgba, 0, 255);
441        } break;
442
443        default:
444            // Unimplemented color encoding.
445            // TODO(google): Is this the correct error handling?
446            endpoint_high_rgba = endpoint_low_rgba = ivec4(0, 0, 0, 0);
447    }
448    c1 = uvec4(endpoint_low_rgba);
449    c2 = uvec4(endpoint_high_rgba);
450}
451
452uint hash52(uint p) {
453    p ^= p >> 15;
454    p -= p << 17;
455    p += p << 7;
456    p += p << 4;
457    p ^= p >> 5;
458    p += p << 16;
459    p ^= p >> 7;
460    p ^= p >> 3;
461    p ^= p << 6;
462    p ^= p >> 17;
463    return p;
464}
465
466uint select_partition(uint seed, uint x, uint y, uint partitioncount) {
467    if (partitioncount == 1) {
468        return 0;
469    }
470    uint z = 0;
471    if (u_pushConstant.smallBlock != 0) {
472        x <<= 1;
473        y <<= 1;
474    }
475    seed += (partitioncount - 1) * 1024;
476    uint rnum = hash52(seed);
477    uint seed1 = rnum & 0xF;
478    uint seed2 = (rnum >> 4) & 0xF;
479    uint seed3 = (rnum >> 8) & 0xF;
480    uint seed4 = (rnum >> 12) & 0xF;
481    uint seed5 = (rnum >> 16) & 0xF;
482    uint seed6 = (rnum >> 20) & 0xF;
483    uint seed7 = (rnum >> 24) & 0xF;
484    uint seed8 = (rnum >> 28) & 0xF;
485    uint seed9 = (rnum >> 18) & 0xF;
486    uint seed10 = (rnum >> 22) & 0xF;
487    uint seed11 = (rnum >> 26) & 0xF;
488    uint seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
489
490    seed1 *= seed1;
491    seed2 *= seed2;
492    seed3 *= seed3;
493    seed4 *= seed4;
494    seed5 *= seed5;
495    seed6 *= seed6;
496    seed7 *= seed7;
497    seed8 *= seed8;
498    seed9 *= seed9;
499    seed10 *= seed10;
500    seed11 *= seed11;
501    seed12 *= seed12;
502
503    uint sh1, sh2, sh3;
504    if ((seed & 1) != 0) {
505        sh1 = ((seed & 2) != 0 ? 4 : 5);
506        sh2 = (partitioncount == 3 ? 6 : 5);
507    } else {
508        sh1 = (partitioncount == 3 ? 6 : 5);
509        sh2 = ((seed & 2) != 0 ? 4 : 5);
510    }
511    sh3 = ((seed & 0x10) != 0) ? sh1 : sh2;
512
513    seed1 >>= sh1;
514    seed2 >>= sh2;
515    seed3 >>= sh1;
516    seed4 >>= sh2;
517    seed5 >>= sh1;
518    seed6 >>= sh2;
519    seed7 >>= sh1;
520    seed8 >>= sh2;
521    seed9 >>= sh3;
522    seed10 >>= sh3;
523    seed11 >>= sh3;
524    seed12 >>= sh3;
525
526    uint a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
527    uint b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
528    uint c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
529    uint d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
530
531    a &= 0x3F;
532    b &= 0x3F;
533    c &= 0x3F;
534    d &= 0x3F;
535
536    if (partitioncount < 4)
537        d = 0;
538    if (partitioncount < 3)
539        c = 0;
540
541    if (a >= b && a >= c && a >= d)
542        return 0;
543    else if (b >= c && b >= d)
544        return 1;
545    else if (c >= d)
546        return 2;
547    else
548        return 3;
549}
550
551uvec4[144] single_color_block(uvec4 color) {
552    uvec4 ret[144];
553    for (int h = 0; h < u_pushConstant.blockSize.y; h++) {
554        for (int w = 0; w < u_pushConstant.blockSize.x; w++) {
555            ret[h * u_pushConstant.blockSize.x + w] = color;
556        }
557    }
558    return ret;
559}
560
561uvec4[144] error_color_block() {
562    return single_color_block(uvec4(0xff, 0, 0xff, 0xff));
563}
564
565uvec4[144] astc_decode_block(const uvec4 u) {
566    uint d;
567    uint hdr;
568    uint b;
569    uint a;
570    uint r;
571    uint width;
572    uint height;
573    uvec4 cem;
574    uint weightGrid[120];
575    const uint u3 = u[3];
576    const uint b87 = u3 >> 7 & 3;
577    const uint b65 = u3 >> 5 & 3;
578    const uint b32 = u3 >> 2 & 3;
579    a = b65;
580    b = b87;
581    d = bit(u3, 10);
582    hdr = bit(u3, 9);
583    if ((u3 & 3) == 0) {
584        r = b32 << 1 | bit(u3, 4);
585        if (b87 == 0) {
586            width = 12;
587            height = a + 2;
588        } else if (b87 == 1) {
589            width = a + 2;
590            height = 12;
591        } else if (b87 == 3) {
592            if (b65 == 0) {
593                width = 6;
594                height = 10;
595            } else if (b65 == 1) {
596                width = 10;
597                height = 6;
598            } else if ((u3 & 0xDFF) == 0xDFC) {
599                // Void-extent
600                // In void extend, the last 12 bits should be
601                // 1 1 D 1 1 1 1 1 1 1 0 0
602                // Where D is the HDR bit
603
604                uvec4 color = uvec4(u[1] >> 8 & 0xff, u[1] >> 24 & 0xff,
605                                    u[0] >> 8 & 0xff, u[0] >> 24 & 0xff);
606                return single_color_block(color);
607            } else {  // reserved
608                return error_color_block();
609            }
610        } else {  // b87 == 2
611            b = u3 >> 9 & 3;
612            width = a + 6;
613            height = b + 6;
614            d = 0;
615            hdr = 0;
616        }
617    } else {
618        r = (u3 & 3) << 1 | bit(u3, 4);
619        if (b32 == 0) {
620            width = b + 4;
621            height = a + 2;
622        } else if (b32 == 1) {
623            width = b + 8;
624            height = a + 2;
625        } else if (b32 == 2) {
626            width = a + 2;
627            height = b + 8;
628        } else if (bit(u3, 8) == 0) {
629            width = a + 2;
630            height = (b & 1) + 6;
631        } else {
632            width = (b & 1) + 2;
633            height = a + 2;
634        }
635    }
636
637    if (width > u_pushConstant.blockSize.x ||
638        height > u_pushConstant.blockSize.y) {
639        return error_color_block();
640    }
641    // Decode weight
642    uint trits = 0;
643    uint quints = 0;
644    uint bits = 0;
645    const uint weightCounts = height * width * (d + 1);
646    const int kMaxNumWeights = 64;
647    if (kMaxNumWeights < weightCounts) {
648        return error_color_block();
649    }
650    {
651        if (hdr == 0) {
652            switch (r) {
653                case 2:
654                    bits = 1;
655                    break;
656                case 3:
657                    trits = 1;
658                    break;
659                case 4:
660                    bits = 2;
661                    break;
662                case 5:
663                    quints = 1;
664                    break;
665                case 6:
666                    trits = 1;
667                    bits = 1;
668                    break;
669                case 7:
670                    bits = 3;
671                    break;
672                default:
673                    return error_color_block();
674            }
675        } else {
676            switch (r) {
677                case 2:
678                    bits = 1;
679                    quints = 1;
680                    break;
681                case 3:
682                    trits = 1;
683                    bits = 2;
684                    break;
685                case 4:
686                    bits = 4;
687                    break;
688                case 5:
689                    quints = 1;
690                    bits = 2;
691                    break;
692                case 6:
693                    trits = 1;
694                    bits = 3;
695                    break;
696                case 7:
697                    bits = 5;
698                    break;
699                default:
700                    return error_color_block();
701            }
702        }
703        uint packedSize = 0;
704        uint pack = 0;
705        get_pack_size(trits, quints, bits, pack, packedSize);
706        uint srcIdx = 0;
707        uint dstIdx = 0;
708        uvec4 uReversed = bitfieldReverse(u);
709        const uint weightBitCount =
710                get_bit_count(weightCounts, trits, quints, bits);
711        const int kWeightGridMinBitLength = 24;
712        const int kWeightGridMaxBitLength = 96;
713        if (weightBitCount < kWeightGridMinBitLength ||
714            weightBitCount > kWeightGridMaxBitLength) {
715            return error_color_block();
716        }
717        uReversed =
718                uvec4(uReversed[3], uReversed[2], uReversed[1], uReversed[0]);
719        const uint kUnquantBinMulTable[] = {0x3f, 0x15, 0x9, 0x4, 0x2, 0x1};
720        const uint kUnquantBinMovTable[] = {0x8, 0x8, 0x8, 0x2, 0x4, 0x8};
721        while (dstIdx < weightCounts) {
722            if (trits == 1) {
723                uint decoded[5] =
724                        decode_trit(uReversed, srcIdx, weightBitCount, bits);
725                // uint decoded[5] = {0, 0, 0, 0, 0};
726                for (int i = 0; i < 5; i++) {
727                    weightGrid[dstIdx] = kUnquantTritWeightMap
728                            [kUnquantTritWeightMapBitIdx[bits] + decoded[i]];
729                    if (weightGrid[dstIdx] > 32) {
730                        weightGrid[dstIdx] += 1;
731                    }
732                    dstIdx++;
733                    if (dstIdx >= weightCounts) {
734                        break;
735                    }
736                }
737            } else if (quints == 1) {
738                uint decoded[3] =
739                        decode_quint(uReversed, srcIdx, weightBitCount, bits);
740                for (int i = 0; i < 3; i++) {
741                    // TODO: handle overflow in the last
742                    weightGrid[dstIdx] = kUnquantQuintWeightMap
743                            [kUnquantQuintWeightMapBitIdx[bits] + decoded[i]];
744                    if (weightGrid[dstIdx] > 32) {
745                        weightGrid[dstIdx] += 1;
746                    }
747                    dstIdx++;
748                    if (dstIdx >= weightCounts) {
749                        break;
750                    }
751                }
752            } else {
753                uint decodedRaw = bits128(uReversed, srcIdx, packedSize);
754                uint decoded = decodedRaw * kUnquantBinMulTable[bits - 1] |
755                               decodedRaw >> kUnquantBinMovTable[bits - 1];
756                weightGrid[dstIdx] = decoded;
757                if (weightGrid[dstIdx] > 32) {
758                    weightGrid[dstIdx] += 1;
759                }
760                dstIdx++;
761            }
762            srcIdx += packedSize;
763        }
764    }
765    uint partitionCount = (u3 >> 11 & 3) + 1;
766    if (d == 1 && partitionCount == 4) {
767        return error_color_block();
768    }
769    const uint weightStart =
770            128 - get_bit_count(weightCounts, trits, quints, bits);
771    uint dualPlaneStart = 0;
772    // Decode cem mode
773    if (partitionCount == 1) {
774        // Single-partition mode
775        cem[0] = u3 >> 13 & 0xf;
776        dualPlaneStart = weightStart - d * 2;
777    } else {
778        // Multi-partition mode
779        // Calculate CEM for all 4 partitions, even when partitionCount < 4
780        uint partMode = u3 >> 23 & 3;
781        const uint kExtraMBitsTable[4] = {0, 2, 5, 8};
782        const uint extraMBitCount =
783                (partMode == 0) ? 0 : kExtraMBitsTable[partitionCount - 1];
784        const uint extraMStart = weightStart - extraMBitCount;
785        dualPlaneStart = extraMStart - d * 2;
786
787        if (partMode == 0) {
788            uint cem_all = u3 >> 25 & 0xf;
789            cem = uvec4(cem_all, cem_all, cem_all, cem_all);
790        } else {
791            uint cemBase = partMode - 1;
792            uvec4 cemHigh = cemBase + uvec4(bit(u3, 25), bit(u3, 26),
793                                            bit(u3, 27), bit(u3, 28));
794            const uint extraM = bits128(u, extraMStart, extraMBitCount);
795            const uint kMainMBitsTable[4] = {0, 2, 1, 0};
796            const uint mainMBitCount = kMainMBitsTable[partitionCount - 1];
797            const uint m = extraM << mainMBitCount |
798                           ((u3 >> 27 & 3) >> (2 - mainMBitCount));
799            cem = cemHigh << 2 |
800                  uvec4(m & 3, m >> 2 & 3, m >> 4 & 3, m >> 6 & 3);
801        }
802    }
803    // Decode end points
804    uvec4 endPoints[4][2];
805    {
806        uint totalV = 0;
807        for (uint part = 0; part < partitionCount; part++) {
808            totalV += get_v_count(cem[part]);
809        }
810        const uint epStart = (partitionCount == 1) ? 17 : 29;
811        const uint totalAvailBits = dualPlaneStart - epStart;
812        if (totalAvailBits >= 128) {
813            // overflowed
814            return error_color_block();
815        }
816        uint epQuints = 0;
817        uint epTrits = 0;
818        uint epBits = 0;
819        uint i;
820        for (i = 0; i < kRQuantParamTableLength; i++) {
821            epTrits = kRQuantParamTable[i][0];
822            epQuints = kRQuantParamTable[i][1];
823            epBits = kRQuantParamTable[i][2];
824            if (get_bit_count(totalV, epTrits, epQuints, epBits) <=
825                totalAvailBits) {
826                break;
827            }
828        }
829        if (i >= kRQuantParamTableLength) {
830            return error_color_block();
831        }
832
833        const uint epBitCount =
834                get_bit_count(totalV, epTrits, epQuints, epBits);
835        const uint epEnd = epStart + epBitCount;
836        uint packedSize = 0;
837        uint pack = 0;
838        get_pack_size(epTrits, epQuints, epBits, pack, packedSize);
839
840        // Decode end point parameters into buffer
841        uint vBuffer[40];
842        uint srcIdx = epStart;
843        uint dstIdx = 0;
844        const uint kUnquantBinMulTable[8] = {0xff, 0x55, 0x24, 0x11,
845                                             0x8,  0x4,  0x2,  0x1};
846        const uint kUnquantBinMovTable[8] = {8, 8, 1, 8, 2, 4, 6, 8};
847        while (dstIdx < totalV) {
848            if (epTrits == 1) {
849                uint decoded[5] = decode_trit(u, srcIdx, epEnd, epBits);
850                for (int i = 0; i < 5; i++) {
851                    vBuffer[dstIdx] = kUnquantTritColorMap
852                            [kUnquantTritColorMapBitIdx[epBits] + decoded[i]];
853                    dstIdx++;
854                    if (dstIdx >= totalV) {
855                        break;
856                    }
857                }
858            } else if (epQuints == 1) {
859                uint decoded[3] = decode_quint(u, srcIdx, epEnd, epBits);
860                for (int i = 0; i < 3; i++) {
861                    vBuffer[dstIdx] = kUnquantQuintColorMap
862                            [kUnquantQuintColorMapBitIdx[epBits] + decoded[i]];
863                    dstIdx++;
864                    if (dstIdx >= totalV) {
865                        break;
866                    }
867                }
868            } else {
869                uint src = bits128(u, srcIdx, packedSize);
870                uint decoded = src * kUnquantBinMulTable[epBits - 1] |
871                               src >> kUnquantBinMovTable[epBits - 1];
872                vBuffer[dstIdx] = decoded;
873                dstIdx++;
874            }
875            srcIdx += packedSize;
876        }
877        uint bufferIdx = 0;
878        for (uint part = 0; part < partitionCount; part++) {
879            // TODO: HDR support
880            decode_ldr_for_mode(vBuffer, bufferIdx, cem[part],
881                                endPoints[part][0], endPoints[part][1]);
882            bufferIdx += get_v_count(cem[part]);
883        }
884    }
885    uvec4 ret[144];
886    {
887        uvec2 dst = (1024 + u_pushConstant.blockSize / 2) /
888                    (u_pushConstant.blockSize - 1);
889        uint dd = d + 1;
890        for (uint h = 0; h < u_pushConstant.blockSize.y; h++) {
891            for (uint w = 0; w < u_pushConstant.blockSize.x; w++) {
892                uint part =
893                        select_partition(u3 >> 13 & 1023, w, h, partitionCount);
894                if (kHDRCEM[cem[part]]) {
895                    // HDR not supported
896                    ret[h * u_pushConstant.blockSize.x + w] = uvec4(0xff, 0,
897                                                                    0xff, 0xff);
898                    continue;
899                }
900                // Calculate weight
901                uvec2 st = uvec2(w, h);
902                uvec2 cst = dst * st;
903                uvec2 gst = (cst * (uvec2(width, height) - 1) + 32) >> 6;
904                uvec2 jst = gst >> 4;
905                uvec2 fst = gst & 0xf;
906                uint v0 = jst.x + jst.y * width;
907                uvec2 p00 = uvec2(weightGrid[v0 * dd], weightGrid[v0 * dd + 1]);
908                uvec2 p01 = uvec2(weightGrid[(v0 + 1) * dd],
909                                  weightGrid[(v0 + 1) * dd + 1]);
910                uvec2 p10 = uvec2(weightGrid[(v0 + width) * dd],
911                                  weightGrid[(v0 + width) * dd + 1]);
912                uvec2 p11 = uvec2(weightGrid[(v0 + width + 1) * dd],
913                                  weightGrid[(v0 + width + 1) * dd + 1]);
914                uint w11 = (fst.x * fst.y + 8) >> 4;
915                uint w10 = fst.y - w11;
916                uint w01 = fst.x - w11;
917                uint w00 = 16 - fst.x - fst.y + w11;
918                uvec2 i = (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >>
919                          4;
920
921                uvec4 c0 = endPoints[part][0];
922                uvec4 c1 = endPoints[part][1];
923                uvec4 c = (c0 * (64 - i[0]) + c1 * i[0] + 32) / 64;
924                if (d == 1) {
925                    uint ccs = bits128(u, dualPlaneStart, 2);
926                    c[ccs] = (c0[ccs] * (64 - i[1]) + c1[ccs] * i[1] + 32) / 64;
927                }
928                ret[h * u_pushConstant.blockSize.x + w] = c;
929            }
930        }
931    }
932    return ret;
933}
934
935ivec2 getPos1DArray(ivec3 pos) {
936    return ivec2(pos.x, pos.z);
937}
938
939ivec3 getPos2DArray(ivec3 pos) {
940    return pos;
941}
942
943ivec3 getPos3D(ivec3 pos) {
944    return pos;
945}
946
947uint block_y_size_1DArray() {
948    return 1;
949}
950
951uint block_y_size_2DArray() {
952    return u_pushConstant.blockSize.y;
953}
954
955uint block_y_size_3D() {
956    return u_pushConstant.blockSize.y;
957}
958
959uvec4 flip32(uvec4 a) {
960    return ((a & 0xff) << 24) | ((a & 0xff00) << 8) | ((a & 0xff0000) >> 8) |
961           ((a & 0xff000000) >> 24);
962}
963
964void main(void) {
965    ivec3 pos = ivec3(gl_GlobalInvocationID.xyz);
966    pos.z += int(u_pushConstant.baseLayer);
967    uvec4 srcBlock = uvec4(imageLoad(u_image0, getPos${type}(pos)));
968    srcBlock = uvec4(srcBlock[3], srcBlock[2], srcBlock[1], srcBlock[0]);
969    uvec4[144] decompressed = astc_decode_block(srcBlock);
970
971    for (uint y = 0; y < block_y_size_${type}(); y++) {
972        for (uint x = 0; x < u_pushConstant.blockSize.x; x++) {
973            imageStore(u_image1,
974                       getPos${type}(ivec3(
975                               pos.xy * u_pushConstant.blockSize + ivec2(x, y),
976                               pos.z)),
977                       decompressed[y * u_pushConstant.blockSize.x + x]);
978        }
979    }
980}
981