1// Copyright 2019 The Android Open Source Project 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// For implementation details, please refer to: 16// https://www.khronos.org/registry/OpenGL/extensions/KHR/KHR_texture_compression_astc_hdr.txt 17 18// Please refer to this document for operator precendence (slightly different from C): 19// https://www.khronos.org/registry/OpenGL/specs/gl/GLSLangSpec.4.60.html#operators 20 21#version 450 22 23precision highp int; 24 25layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; 26 27layout(push_constant) uniform ImageFormatBlock { 28 uvec2 blockSize; 29 uint compFormat; 30 uint baseLayer; 31 uint sRGB; 32 uint smallBlock; 33} 34u_pushConstant; 35 36layout(binding = 0, rgba32ui) readonly uniform uimage${type} u_image0; 37layout(binding = 1, rgba8ui) writeonly uniform uimage${type} u_image1; 38 39${UnquantTables} 40 41// HDR CEM: 2, 3, 7, 11, 14, 15 42 43const bool kHDRCEM[16] = { 44 false, false, true, true, false, false, false, true, 45 false, false, false, true, false, false, true, true, 46}; 47 48// Encoding table for C.2.12 49 50const uint kTritEncodings[256][5] = { 51 {0, 0, 0, 0, 0}, {1, 0, 0, 0, 0}, {2, 0, 0, 0, 0}, {0, 0, 2, 0, 0}, 52 {0, 1, 0, 0, 0}, {1, 1, 0, 0, 0}, {2, 1, 0, 0, 0}, {1, 0, 2, 0, 0}, 53 {0, 2, 0, 0, 0}, {1, 2, 0, 0, 0}, {2, 2, 0, 0, 0}, {2, 0, 2, 0, 0}, 54 {0, 2, 2, 0, 0}, {1, 2, 2, 0, 0}, {2, 2, 2, 0, 0}, {2, 0, 2, 0, 0}, 55 {0, 0, 1, 0, 0}, {1, 0, 1, 0, 0}, {2, 0, 1, 0, 0}, {0, 1, 2, 0, 0}, 56 {0, 1, 1, 0, 0}, {1, 1, 1, 0, 0}, {2, 1, 1, 0, 0}, {1, 1, 2, 0, 0}, 57 {0, 2, 1, 0, 0}, {1, 2, 1, 0, 0}, {2, 2, 1, 0, 0}, {2, 1, 2, 0, 0}, 58 {0, 0, 0, 2, 2}, {1, 0, 0, 2, 2}, {2, 0, 0, 2, 2}, {0, 0, 2, 2, 2}, 59 {0, 0, 0, 1, 0}, {1, 0, 0, 1, 0}, {2, 0, 0, 1, 0}, {0, 0, 2, 1, 0}, 60 {0, 1, 0, 1, 0}, {1, 1, 0, 1, 0}, {2, 1, 0, 1, 0}, {1, 0, 2, 1, 0}, 61 {0, 2, 0, 1, 0}, {1, 2, 0, 1, 0}, {2, 2, 0, 1, 0}, {2, 0, 2, 1, 0}, 62 {0, 2, 2, 1, 0}, {1, 2, 2, 1, 0}, {2, 2, 2, 1, 0}, {2, 0, 2, 1, 0}, 63 {0, 0, 1, 1, 0}, {1, 0, 1, 1, 0}, {2, 0, 1, 1, 0}, {0, 1, 2, 1, 0}, 64 {0, 1, 1, 1, 0}, {1, 1, 1, 1, 0}, {2, 1, 1, 1, 0}, {1, 1, 2, 1, 0}, 65 {0, 2, 1, 1, 0}, {1, 2, 1, 1, 0}, {2, 2, 1, 1, 0}, {2, 1, 2, 1, 0}, 66 {0, 1, 0, 2, 2}, {1, 1, 0, 2, 2}, {2, 1, 0, 2, 2}, {1, 0, 2, 2, 2}, 67 {0, 0, 0, 2, 0}, {1, 0, 0, 2, 0}, {2, 0, 0, 2, 0}, {0, 0, 2, 2, 0}, 68 {0, 1, 0, 2, 0}, {1, 1, 0, 2, 0}, {2, 1, 0, 2, 0}, {1, 0, 2, 2, 0}, 69 {0, 2, 0, 2, 0}, {1, 2, 0, 2, 0}, {2, 2, 0, 2, 0}, {2, 0, 2, 2, 0}, 70 {0, 2, 2, 2, 0}, {1, 2, 2, 2, 0}, {2, 2, 2, 2, 0}, {2, 0, 2, 2, 0}, 71 {0, 0, 1, 2, 0}, {1, 0, 1, 2, 0}, {2, 0, 1, 2, 0}, {0, 1, 2, 2, 0}, 72 {0, 1, 1, 2, 0}, {1, 1, 1, 2, 0}, {2, 1, 1, 2, 0}, {1, 1, 2, 2, 0}, 73 {0, 2, 1, 2, 0}, {1, 2, 1, 2, 0}, {2, 2, 1, 2, 0}, {2, 1, 2, 2, 0}, 74 {0, 2, 0, 2, 2}, {1, 2, 0, 2, 2}, {2, 2, 0, 2, 2}, {2, 0, 2, 2, 2}, 75 {0, 0, 0, 0, 2}, {1, 0, 0, 0, 2}, {2, 0, 0, 0, 2}, {0, 0, 2, 0, 2}, 76 {0, 1, 0, 0, 2}, {1, 1, 0, 0, 2}, {2, 1, 0, 0, 2}, {1, 0, 2, 0, 2}, 77 {0, 2, 0, 0, 2}, {1, 2, 0, 0, 2}, {2, 2, 0, 0, 2}, {2, 0, 2, 0, 2}, 78 {0, 2, 2, 0, 2}, {1, 2, 2, 0, 2}, {2, 2, 2, 0, 2}, {2, 0, 2, 0, 2}, 79 {0, 0, 1, 0, 2}, {1, 0, 1, 0, 2}, {2, 0, 1, 0, 2}, {0, 1, 2, 0, 2}, 80 {0, 1, 1, 0, 2}, {1, 1, 1, 0, 2}, {2, 1, 1, 0, 2}, {1, 1, 2, 0, 2}, 81 {0, 2, 1, 0, 2}, {1, 2, 1, 0, 2}, {2, 2, 1, 0, 2}, {2, 1, 2, 0, 2}, 82 {0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 0, 2, 2, 2}, 83 {0, 0, 0, 0, 1}, {1, 0, 0, 0, 1}, {2, 0, 0, 0, 1}, {0, 0, 2, 0, 1}, 84 {0, 1, 0, 0, 1}, {1, 1, 0, 0, 1}, {2, 1, 0, 0, 1}, {1, 0, 2, 0, 1}, 85 {0, 2, 0, 0, 1}, {1, 2, 0, 0, 1}, {2, 2, 0, 0, 1}, {2, 0, 2, 0, 1}, 86 {0, 2, 2, 0, 1}, {1, 2, 2, 0, 1}, {2, 2, 2, 0, 1}, {2, 0, 2, 0, 1}, 87 {0, 0, 1, 0, 1}, {1, 0, 1, 0, 1}, {2, 0, 1, 0, 1}, {0, 1, 2, 0, 1}, 88 {0, 1, 1, 0, 1}, {1, 1, 1, 0, 1}, {2, 1, 1, 0, 1}, {1, 1, 2, 0, 1}, 89 {0, 2, 1, 0, 1}, {1, 2, 1, 0, 1}, {2, 2, 1, 0, 1}, {2, 1, 2, 0, 1}, 90 {0, 0, 1, 2, 2}, {1, 0, 1, 2, 2}, {2, 0, 1, 2, 2}, {0, 1, 2, 2, 2}, 91 {0, 0, 0, 1, 1}, {1, 0, 0, 1, 1}, {2, 0, 0, 1, 1}, {0, 0, 2, 1, 1}, 92 {0, 1, 0, 1, 1}, {1, 1, 0, 1, 1}, {2, 1, 0, 1, 1}, {1, 0, 2, 1, 1}, 93 {0, 2, 0, 1, 1}, {1, 2, 0, 1, 1}, {2, 2, 0, 1, 1}, {2, 0, 2, 1, 1}, 94 {0, 2, 2, 1, 1}, {1, 2, 2, 1, 1}, {2, 2, 2, 1, 1}, {2, 0, 2, 1, 1}, 95 {0, 0, 1, 1, 1}, {1, 0, 1, 1, 1}, {2, 0, 1, 1, 1}, {0, 1, 2, 1, 1}, 96 {0, 1, 1, 1, 1}, {1, 1, 1, 1, 1}, {2, 1, 1, 1, 1}, {1, 1, 2, 1, 1}, 97 {0, 2, 1, 1, 1}, {1, 2, 1, 1, 1}, {2, 2, 1, 1, 1}, {2, 1, 2, 1, 1}, 98 {0, 1, 1, 2, 2}, {1, 1, 1, 2, 2}, {2, 1, 1, 2, 2}, {1, 1, 2, 2, 2}, 99 {0, 0, 0, 2, 1}, {1, 0, 0, 2, 1}, {2, 0, 0, 2, 1}, {0, 0, 2, 2, 1}, 100 {0, 1, 0, 2, 1}, {1, 1, 0, 2, 1}, {2, 1, 0, 2, 1}, {1, 0, 2, 2, 1}, 101 {0, 2, 0, 2, 1}, {1, 2, 0, 2, 1}, {2, 2, 0, 2, 1}, {2, 0, 2, 2, 1}, 102 {0, 2, 2, 2, 1}, {1, 2, 2, 2, 1}, {2, 2, 2, 2, 1}, {2, 0, 2, 2, 1}, 103 {0, 0, 1, 2, 1}, {1, 0, 1, 2, 1}, {2, 0, 1, 2, 1}, {0, 1, 2, 2, 1}, 104 {0, 1, 1, 2, 1}, {1, 1, 1, 2, 1}, {2, 1, 1, 2, 1}, {1, 1, 2, 2, 1}, 105 {0, 2, 1, 2, 1}, {1, 2, 1, 2, 1}, {2, 2, 1, 2, 1}, {2, 1, 2, 2, 1}, 106 {0, 2, 1, 2, 2}, {1, 2, 1, 2, 2}, {2, 2, 1, 2, 2}, {2, 1, 2, 2, 2}, 107 {0, 0, 0, 1, 2}, {1, 0, 0, 1, 2}, {2, 0, 0, 1, 2}, {0, 0, 2, 1, 2}, 108 {0, 1, 0, 1, 2}, {1, 1, 0, 1, 2}, {2, 1, 0, 1, 2}, {1, 0, 2, 1, 2}, 109 {0, 2, 0, 1, 2}, {1, 2, 0, 1, 2}, {2, 2, 0, 1, 2}, {2, 0, 2, 1, 2}, 110 {0, 2, 2, 1, 2}, {1, 2, 2, 1, 2}, {2, 2, 2, 1, 2}, {2, 0, 2, 1, 2}, 111 {0, 0, 1, 1, 2}, {1, 0, 1, 1, 2}, {2, 0, 1, 1, 2}, {0, 1, 2, 1, 2}, 112 {0, 1, 1, 1, 2}, {1, 1, 1, 1, 2}, {2, 1, 1, 1, 2}, {1, 1, 2, 1, 2}, 113 {0, 2, 1, 1, 2}, {1, 2, 1, 1, 2}, {2, 2, 1, 1, 2}, {2, 1, 2, 1, 2}, 114 {0, 2, 2, 2, 2}, {1, 2, 2, 2, 2}, {2, 2, 2, 2, 2}, {2, 1, 2, 2, 2}, 115}; 116 117const uint kQuintEncodings[128][3] = { 118 {0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0}, {4, 0, 0}, {0, 4, 0}, 119 {4, 4, 0}, {4, 4, 4}, {0, 1, 0}, {1, 1, 0}, {2, 1, 0}, {3, 1, 0}, 120 {4, 1, 0}, {1, 4, 0}, {4, 4, 1}, {4, 4, 4}, {0, 2, 0}, {1, 2, 0}, 121 {2, 2, 0}, {3, 2, 0}, {4, 2, 0}, {2, 4, 0}, {4, 4, 2}, {4, 4, 4}, 122 {0, 3, 0}, {1, 3, 0}, {2, 3, 0}, {3, 3, 0}, {4, 3, 0}, {3, 4, 0}, 123 {4, 4, 3}, {4, 4, 4}, {0, 0, 1}, {1, 0, 1}, {2, 0, 1}, {3, 0, 1}, 124 {4, 0, 1}, {0, 4, 1}, {4, 0, 4}, {0, 4, 4}, {0, 1, 1}, {1, 1, 1}, 125 {2, 1, 1}, {3, 1, 1}, {4, 1, 1}, {1, 4, 1}, {4, 1, 4}, {1, 4, 4}, 126 {0, 2, 1}, {1, 2, 1}, {2, 2, 1}, {3, 2, 1}, {4, 2, 1}, {2, 4, 1}, 127 {4, 2, 4}, {2, 4, 4}, {0, 3, 1}, {1, 3, 1}, {2, 3, 1}, {3, 3, 1}, 128 {4, 3, 1}, {3, 4, 1}, {4, 3, 4}, {3, 4, 4}, {0, 0, 2}, {1, 0, 2}, 129 {2, 0, 2}, {3, 0, 2}, {4, 0, 2}, {0, 4, 2}, {2, 0, 4}, {3, 0, 4}, 130 {0, 1, 2}, {1, 1, 2}, {2, 1, 2}, {3, 1, 2}, {4, 1, 2}, {1, 4, 2}, 131 {2, 1, 4}, {3, 1, 4}, {0, 2, 2}, {1, 2, 2}, {2, 2, 2}, {3, 2, 2}, 132 {4, 2, 2}, {2, 4, 2}, {2, 2, 4}, {3, 2, 4}, {0, 3, 2}, {1, 3, 2}, 133 {2, 3, 2}, {3, 3, 2}, {4, 3, 2}, {3, 4, 2}, {2, 3, 4}, {3, 3, 4}, 134 {0, 0, 3}, {1, 0, 3}, {2, 0, 3}, {3, 0, 3}, {4, 0, 3}, {0, 4, 3}, 135 {0, 0, 4}, {1, 0, 4}, {0, 1, 3}, {1, 1, 3}, {2, 1, 3}, {3, 1, 3}, 136 {4, 1, 3}, {1, 4, 3}, {0, 1, 4}, {1, 1, 4}, {0, 2, 3}, {1, 2, 3}, 137 {2, 2, 3}, {3, 2, 3}, {4, 2, 3}, {2, 4, 3}, {0, 2, 4}, {1, 2, 4}, 138 {0, 3, 3}, {1, 3, 3}, {2, 3, 3}, {3, 3, 3}, {4, 3, 3}, {3, 4, 3}, 139 {0, 3, 4}, {1, 3, 4}}; 140 141const int kRQuantParamTableLength = 19; 142// T, Q, B values in Table c.2.16, including binaries, in reversed order 143const uint kRQuantParamTable[kRQuantParamTableLength][3] = { 144 {0, 0, 8}, // 255 145 {1, 0, 6}, // 191 146 {0, 1, 5}, // 159 147 {0, 0, 7}, // 127 148 {1, 0, 5}, // 95 149 {0, 1, 4}, // 79 150 {0, 0, 6}, // 63 151 {1, 0, 4}, // 47 152 {0, 1, 3}, // 39 153 {0, 0, 5}, // 31 154 {1, 0, 3}, // 23 155 {0, 1, 2}, // 19 156 {0, 0, 4}, // 15 157 {1, 0, 2}, // 11 158 {0, 1, 1}, // 9 159 {0, 0, 3}, // 7 160 {1, 0, 1}, // 5 161 //{0, 1, 0}, // 4 162 {0, 0, 2}, // 3 163 //{1, 0, 0}, // 2 164 {0, 0, 1}, // 1 165}; 166 167uint bit(uint u, int bit) { 168 return (u >> bit) & 1; 169} 170 171uint bits128(uvec4 u, uint bitStart, uint bitCount) { 172 uint firstIdx = bitStart / 32; 173 uint firstOffset = bitStart % 32; 174 uint bitMask = (1 << bitCount) - 1; 175 if (firstIdx == ((bitStart + bitCount - 1) / 32)) { 176 return (u[3 - firstIdx] >> firstOffset) & bitMask; 177 } else { 178 uint firstCount = 32 - firstOffset; 179 uint ret = u[3 - firstIdx - 1] << firstCount; 180 ret |= ((u[3 - firstIdx] >> firstOffset) & ((1 << firstCount) - 1)); 181 return ret & bitMask; 182 } 183} 184 185uint bits128fillZeros(uvec4 u, uint bitStart, uint bitEnd, uint bitCount) { 186 if (bitEnd <= bitStart) { 187 return 0; 188 } 189 return bits128(u, bitStart, min(bitEnd - bitStart, bitCount)); 190} 191 192uint get_bit_count(uint num_vals, uint trits, uint quints, uint bits) { 193 // See section C.2.22 for the formula used here. 194 uint trit_bit_count = ((num_vals * 8 * trits) + 4) / 5; 195 uint quint_bit_count = ((num_vals * 7 * quints) + 2) / 3; 196 uint base_bit_count = num_vals * bits; 197 return trit_bit_count + quint_bit_count + base_bit_count; 198} 199 200void get_pack_size(uint trits, 201 uint quints, 202 uint bits, 203 out uint pack, 204 out uint packedSize) { 205 if (trits == 1) { 206 pack = 5; 207 packedSize = 8 + 5 * bits; 208 } else if (quints == 1) { 209 pack = 3; 210 packedSize = 7 + 3 * bits; 211 } else { 212 pack = 1; 213 packedSize = bits; 214 } 215} 216 217uint[5] decode_trit(uvec4 data, uint start, uint end, uint n) { 218 // We either have three quints or five trits 219 const int kNumVals = 5; 220 const int kInterleavedBits[5] = {2, 2, 1, 2, 1}; 221 222 // Decode the block 223 uint m[kNumVals]; 224 uint encoded = 0; 225 uint encoded_bits_read = 0; 226 for (int i = 0; i < kNumVals; ++i) { 227 m[i] = bits128fillZeros(data, start, end, n); 228 start += n; 229 230 uint encoded_bits = 231 bits128fillZeros(data, start, end, kInterleavedBits[i]); 232 start += kInterleavedBits[i]; 233 encoded |= encoded_bits << encoded_bits_read; 234 encoded_bits_read += kInterleavedBits[i]; 235 } 236 237 uint[kNumVals] result; 238 for (int i = 0; i < kNumVals; ++i) { 239 result[i] = kTritEncodings[encoded][i] << n | m[i]; 240 } 241 return result; 242} 243 244uint[3] decode_quint(uvec4 data, uint start, uint end, uint n) { 245 // We either have three quints or five trits 246 const int kNumVals = 3; 247 const int kInterleavedBits[3] = {3, 2, 2}; 248 249 // Decode the block 250 uint m[kNumVals]; 251 uint encoded = 0; 252 uint encoded_bits_read = 0; 253 uint bitMask = (1 << n) - 1; 254 for (int i = 0; i < kNumVals; ++i) { 255 m[i] = bits128fillZeros(data, start, end, n); 256 start += n; 257 258 uint encoded_bits = 259 bits128fillZeros(data, start, end, kInterleavedBits[i]); 260 start += kInterleavedBits[i]; 261 encoded |= encoded_bits << encoded_bits_read; 262 encoded_bits_read += kInterleavedBits[i]; 263 } 264 265 uint[kNumVals] result; 266 for (int i = 0; i < kNumVals; ++i) { 267 result[i] = kQuintEncodings[encoded][i] << n | m[i]; 268 } 269 return result; 270} 271 272uint get_v_count(uint cem) { 273 return (cem / 4 + 1) * 2; 274} 275 276const uint kLDRLumaDirect = 0; 277const uint kLDRLumaBaseOffset = 1; 278const uint kHDRLumaLargeRange = 2; 279const uint kHDRLumaSmallRange = 3; 280const uint kLDRLumaAlphaDirect = 4; 281const uint kLDRLumaAlphaBaseOffset = 5; 282const uint kLDRRGBBaseScale = 6; 283const uint kHDRRGBBaseScale = 7; 284const uint kLDRRGBDirect = 8; 285const uint kLDRRGBBaseOffset = 9; 286const uint kLDRRGBBaseScaleTwoA = 10; 287const uint kHDRRGBDirect = 11; 288const uint kLDRRGBADirect = 12; 289const uint kLDRRGBABaseOffset = 13; 290const uint kHDRRGBDirectLDRAlpha = 14; 291const uint kHDRRGBDirectHDRAlpha = 15; 292 293void swap(inout ivec4 v1, inout ivec4 v2) { 294 ivec4 tmp = v1; 295 v1 = v2; 296 v2 = tmp; 297} 298 299void bit_transfer_signed(inout int a, inout int b) { 300 b >>= 1; 301 b |= (a & 0x80); 302 a >>= 1; 303 a &= 0x3F; 304 if ((a & 0x20) != 0) 305 a -= 0x40; 306} 307 308void blue_contract(inout ivec4 val) { 309 val.r = (val.r + val.b) / 2; 310 val.g = (val.g + val.b) / 2; 311} 312 313void decode_ldr_for_mode(const uint[40] vals, 314 uint start_idx, 315 uint mode, 316 out uvec4 c1, 317 out uvec4 c2) { 318 int v0 = int(vals[start_idx + 0]); 319 int v1 = int(vals[start_idx + 1]); 320 int v2 = int(vals[start_idx + 2]); 321 int v3 = int(vals[start_idx + 3]); 322 int v4 = int(vals[start_idx + 4]); 323 int v5 = int(vals[start_idx + 5]); 324 int v6 = int(vals[start_idx + 6]); 325 int v7 = int(vals[start_idx + 7]); 326 ivec4 endpoint_low_rgba; 327 ivec4 endpoint_high_rgba; 328 switch (mode) { 329 case kLDRLumaDirect: { 330 endpoint_low_rgba = ivec4(v0, v0, v0, 255); 331 endpoint_high_rgba = ivec4(v1, v1, v1, 255); 332 } break; 333 334 case kLDRLumaBaseOffset: { 335 const int l0 = (v0 >> 2) | (v1 & 0xC0); 336 const int l1 = min(l0 + (v1 & 0x3F), 0xFF); 337 338 endpoint_low_rgba = ivec4(l0, l0, l0, 255); 339 endpoint_high_rgba = ivec4(l1, l1, l1, 255); 340 } break; 341 342 case kLDRLumaAlphaDirect: { 343 endpoint_low_rgba = ivec4(v0, v0, v0, v2); 344 endpoint_high_rgba = ivec4(v1, v1, v1, v3); 345 } break; 346 347 case kLDRLumaAlphaBaseOffset: { 348 bit_transfer_signed(v1, v0); 349 bit_transfer_signed(v3, v2); 350 351 endpoint_low_rgba = clamp(ivec4(v0, v0, v0, v2), 0, 255); 352 const int high_luma = v0 + v1; 353 endpoint_high_rgba = clamp( 354 ivec4(high_luma, high_luma, high_luma, v2 + v3), 0, 255); 355 } break; 356 357 case kLDRRGBBaseScale: { 358 endpoint_high_rgba = ivec4(v0, v1, v2, 255); 359 for (int i = 0; i < 3; ++i) { 360 const int x = endpoint_high_rgba[i]; 361 endpoint_low_rgba[i] = (x * v3) >> 8; 362 } 363 endpoint_low_rgba[3] = 255; 364 } break; 365 366 case kLDRRGBDirect: { 367 const int s0 = v0 + v2 + v4; 368 const int s1 = v1 + v3 + v5; 369 370 endpoint_low_rgba = ivec4(v0, v2, v4, 255); 371 endpoint_high_rgba = ivec4(v1, v3, v5, 255); 372 373 if (s1 < s0) { 374 swap(endpoint_low_rgba, endpoint_high_rgba); 375 blue_contract(endpoint_low_rgba); 376 blue_contract(endpoint_high_rgba); 377 } 378 } break; 379 380 case kLDRRGBBaseOffset: { 381 bit_transfer_signed(v1, v0); 382 bit_transfer_signed(v3, v2); 383 bit_transfer_signed(v5, v4); 384 385 endpoint_low_rgba = ivec4(v0, v2, v4, 255); 386 endpoint_high_rgba = ivec4(v0 + v1, v2 + v3, v4 + v5, 255); 387 388 if (v1 + v3 + v5 < 0) { 389 swap(endpoint_low_rgba, endpoint_high_rgba); 390 blue_contract(endpoint_low_rgba); 391 blue_contract(endpoint_high_rgba); 392 } 393 394 endpoint_low_rgba = clamp(endpoint_low_rgba, 0, 255); 395 endpoint_high_rgba = clamp(endpoint_high_rgba, 0, 255); 396 } break; 397 398 case kLDRRGBBaseScaleTwoA: { 399 // Base 400 endpoint_low_rgba = endpoint_high_rgba = ivec4(v0, v1, v2, 255); 401 402 // Scale 403 endpoint_low_rgba = (endpoint_low_rgba * v3) >> 8; 404 405 // Two A 406 endpoint_low_rgba[3] = v4; 407 endpoint_high_rgba[3] = v5; 408 } break; 409 410 case kLDRRGBADirect: { 411 const uint s0 = v0 + v2 + v4; 412 const uint s1 = v1 + v3 + v5; 413 414 endpoint_low_rgba = ivec4(v0, v2, v4, v6); 415 endpoint_high_rgba = ivec4(v1, v3, v5, v7); 416 417 if (s1 < s0) { 418 swap(endpoint_low_rgba, endpoint_high_rgba); 419 blue_contract(endpoint_low_rgba); 420 blue_contract(endpoint_high_rgba); 421 } 422 } break; 423 424 case kLDRRGBABaseOffset: { 425 bit_transfer_signed(v1, v0); 426 bit_transfer_signed(v3, v2); 427 bit_transfer_signed(v5, v4); 428 bit_transfer_signed(v7, v6); 429 430 endpoint_low_rgba = ivec4(v0, v2, v4, v6); 431 endpoint_high_rgba = ivec4(v0 + v1, v2 + v3, v4 + v5, v6 + v7); 432 433 if (v1 + v3 + v5 < 0) { 434 swap(endpoint_low_rgba, endpoint_high_rgba); 435 blue_contract(endpoint_low_rgba); 436 blue_contract(endpoint_high_rgba); 437 } 438 439 endpoint_low_rgba = clamp(endpoint_low_rgba, 0, 255); 440 endpoint_high_rgba = clamp(endpoint_high_rgba, 0, 255); 441 } break; 442 443 default: 444 // Unimplemented color encoding. 445 // TODO(google): Is this the correct error handling? 446 endpoint_high_rgba = endpoint_low_rgba = ivec4(0, 0, 0, 0); 447 } 448 c1 = uvec4(endpoint_low_rgba); 449 c2 = uvec4(endpoint_high_rgba); 450} 451 452uint hash52(uint p) { 453 p ^= p >> 15; 454 p -= p << 17; 455 p += p << 7; 456 p += p << 4; 457 p ^= p >> 5; 458 p += p << 16; 459 p ^= p >> 7; 460 p ^= p >> 3; 461 p ^= p << 6; 462 p ^= p >> 17; 463 return p; 464} 465 466uint select_partition(uint seed, uint x, uint y, uint partitioncount) { 467 if (partitioncount == 1) { 468 return 0; 469 } 470 uint z = 0; 471 if (u_pushConstant.smallBlock != 0) { 472 x <<= 1; 473 y <<= 1; 474 } 475 seed += (partitioncount - 1) * 1024; 476 uint rnum = hash52(seed); 477 uint seed1 = rnum & 0xF; 478 uint seed2 = (rnum >> 4) & 0xF; 479 uint seed3 = (rnum >> 8) & 0xF; 480 uint seed4 = (rnum >> 12) & 0xF; 481 uint seed5 = (rnum >> 16) & 0xF; 482 uint seed6 = (rnum >> 20) & 0xF; 483 uint seed7 = (rnum >> 24) & 0xF; 484 uint seed8 = (rnum >> 28) & 0xF; 485 uint seed9 = (rnum >> 18) & 0xF; 486 uint seed10 = (rnum >> 22) & 0xF; 487 uint seed11 = (rnum >> 26) & 0xF; 488 uint seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; 489 490 seed1 *= seed1; 491 seed2 *= seed2; 492 seed3 *= seed3; 493 seed4 *= seed4; 494 seed5 *= seed5; 495 seed6 *= seed6; 496 seed7 *= seed7; 497 seed8 *= seed8; 498 seed9 *= seed9; 499 seed10 *= seed10; 500 seed11 *= seed11; 501 seed12 *= seed12; 502 503 uint sh1, sh2, sh3; 504 if ((seed & 1) != 0) { 505 sh1 = ((seed & 2) != 0 ? 4 : 5); 506 sh2 = (partitioncount == 3 ? 6 : 5); 507 } else { 508 sh1 = (partitioncount == 3 ? 6 : 5); 509 sh2 = ((seed & 2) != 0 ? 4 : 5); 510 } 511 sh3 = ((seed & 0x10) != 0) ? sh1 : sh2; 512 513 seed1 >>= sh1; 514 seed2 >>= sh2; 515 seed3 >>= sh1; 516 seed4 >>= sh2; 517 seed5 >>= sh1; 518 seed6 >>= sh2; 519 seed7 >>= sh1; 520 seed8 >>= sh2; 521 seed9 >>= sh3; 522 seed10 >>= sh3; 523 seed11 >>= sh3; 524 seed12 >>= sh3; 525 526 uint a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); 527 uint b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); 528 uint c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); 529 uint d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); 530 531 a &= 0x3F; 532 b &= 0x3F; 533 c &= 0x3F; 534 d &= 0x3F; 535 536 if (partitioncount < 4) 537 d = 0; 538 if (partitioncount < 3) 539 c = 0; 540 541 if (a >= b && a >= c && a >= d) 542 return 0; 543 else if (b >= c && b >= d) 544 return 1; 545 else if (c >= d) 546 return 2; 547 else 548 return 3; 549} 550 551uvec4[144] single_color_block(uvec4 color) { 552 uvec4 ret[144]; 553 for (int h = 0; h < u_pushConstant.blockSize.y; h++) { 554 for (int w = 0; w < u_pushConstant.blockSize.x; w++) { 555 ret[h * u_pushConstant.blockSize.x + w] = color; 556 } 557 } 558 return ret; 559} 560 561uvec4[144] error_color_block() { 562 return single_color_block(uvec4(0xff, 0, 0xff, 0xff)); 563} 564 565uvec4[144] astc_decode_block(const uvec4 u) { 566 uint d; 567 uint hdr; 568 uint b; 569 uint a; 570 uint r; 571 uint width; 572 uint height; 573 uvec4 cem; 574 uint weightGrid[120]; 575 const uint u3 = u[3]; 576 const uint b87 = u3 >> 7 & 3; 577 const uint b65 = u3 >> 5 & 3; 578 const uint b32 = u3 >> 2 & 3; 579 a = b65; 580 b = b87; 581 d = bit(u3, 10); 582 hdr = bit(u3, 9); 583 if ((u3 & 3) == 0) { 584 r = b32 << 1 | bit(u3, 4); 585 if (b87 == 0) { 586 width = 12; 587 height = a + 2; 588 } else if (b87 == 1) { 589 width = a + 2; 590 height = 12; 591 } else if (b87 == 3) { 592 if (b65 == 0) { 593 width = 6; 594 height = 10; 595 } else if (b65 == 1) { 596 width = 10; 597 height = 6; 598 } else if ((u3 & 0xDFF) == 0xDFC) { 599 // Void-extent 600 // In void extend, the last 12 bits should be 601 // 1 1 D 1 1 1 1 1 1 1 0 0 602 // Where D is the HDR bit 603 604 uvec4 color = uvec4(u[1] >> 8 & 0xff, u[1] >> 24 & 0xff, 605 u[0] >> 8 & 0xff, u[0] >> 24 & 0xff); 606 return single_color_block(color); 607 } else { // reserved 608 return error_color_block(); 609 } 610 } else { // b87 == 2 611 b = u3 >> 9 & 3; 612 width = a + 6; 613 height = b + 6; 614 d = 0; 615 hdr = 0; 616 } 617 } else { 618 r = (u3 & 3) << 1 | bit(u3, 4); 619 if (b32 == 0) { 620 width = b + 4; 621 height = a + 2; 622 } else if (b32 == 1) { 623 width = b + 8; 624 height = a + 2; 625 } else if (b32 == 2) { 626 width = a + 2; 627 height = b + 8; 628 } else if (bit(u3, 8) == 0) { 629 width = a + 2; 630 height = (b & 1) + 6; 631 } else { 632 width = (b & 1) + 2; 633 height = a + 2; 634 } 635 } 636 637 if (width > u_pushConstant.blockSize.x || 638 height > u_pushConstant.blockSize.y) { 639 return error_color_block(); 640 } 641 // Decode weight 642 uint trits = 0; 643 uint quints = 0; 644 uint bits = 0; 645 const uint weightCounts = height * width * (d + 1); 646 const int kMaxNumWeights = 64; 647 if (kMaxNumWeights < weightCounts) { 648 return error_color_block(); 649 } 650 { 651 if (hdr == 0) { 652 switch (r) { 653 case 2: 654 bits = 1; 655 break; 656 case 3: 657 trits = 1; 658 break; 659 case 4: 660 bits = 2; 661 break; 662 case 5: 663 quints = 1; 664 break; 665 case 6: 666 trits = 1; 667 bits = 1; 668 break; 669 case 7: 670 bits = 3; 671 break; 672 default: 673 return error_color_block(); 674 } 675 } else { 676 switch (r) { 677 case 2: 678 bits = 1; 679 quints = 1; 680 break; 681 case 3: 682 trits = 1; 683 bits = 2; 684 break; 685 case 4: 686 bits = 4; 687 break; 688 case 5: 689 quints = 1; 690 bits = 2; 691 break; 692 case 6: 693 trits = 1; 694 bits = 3; 695 break; 696 case 7: 697 bits = 5; 698 break; 699 default: 700 return error_color_block(); 701 } 702 } 703 uint packedSize = 0; 704 uint pack = 0; 705 get_pack_size(trits, quints, bits, pack, packedSize); 706 uint srcIdx = 0; 707 uint dstIdx = 0; 708 uvec4 uReversed = bitfieldReverse(u); 709 const uint weightBitCount = 710 get_bit_count(weightCounts, trits, quints, bits); 711 const int kWeightGridMinBitLength = 24; 712 const int kWeightGridMaxBitLength = 96; 713 if (weightBitCount < kWeightGridMinBitLength || 714 weightBitCount > kWeightGridMaxBitLength) { 715 return error_color_block(); 716 } 717 uReversed = 718 uvec4(uReversed[3], uReversed[2], uReversed[1], uReversed[0]); 719 const uint kUnquantBinMulTable[] = {0x3f, 0x15, 0x9, 0x4, 0x2, 0x1}; 720 const uint kUnquantBinMovTable[] = {0x8, 0x8, 0x8, 0x2, 0x4, 0x8}; 721 while (dstIdx < weightCounts) { 722 if (trits == 1) { 723 uint decoded[5] = 724 decode_trit(uReversed, srcIdx, weightBitCount, bits); 725 // uint decoded[5] = {0, 0, 0, 0, 0}; 726 for (int i = 0; i < 5; i++) { 727 weightGrid[dstIdx] = kUnquantTritWeightMap 728 [kUnquantTritWeightMapBitIdx[bits] + decoded[i]]; 729 if (weightGrid[dstIdx] > 32) { 730 weightGrid[dstIdx] += 1; 731 } 732 dstIdx++; 733 if (dstIdx >= weightCounts) { 734 break; 735 } 736 } 737 } else if (quints == 1) { 738 uint decoded[3] = 739 decode_quint(uReversed, srcIdx, weightBitCount, bits); 740 for (int i = 0; i < 3; i++) { 741 // TODO: handle overflow in the last 742 weightGrid[dstIdx] = kUnquantQuintWeightMap 743 [kUnquantQuintWeightMapBitIdx[bits] + decoded[i]]; 744 if (weightGrid[dstIdx] > 32) { 745 weightGrid[dstIdx] += 1; 746 } 747 dstIdx++; 748 if (dstIdx >= weightCounts) { 749 break; 750 } 751 } 752 } else { 753 uint decodedRaw = bits128(uReversed, srcIdx, packedSize); 754 uint decoded = decodedRaw * kUnquantBinMulTable[bits - 1] | 755 decodedRaw >> kUnquantBinMovTable[bits - 1]; 756 weightGrid[dstIdx] = decoded; 757 if (weightGrid[dstIdx] > 32) { 758 weightGrid[dstIdx] += 1; 759 } 760 dstIdx++; 761 } 762 srcIdx += packedSize; 763 } 764 } 765 uint partitionCount = (u3 >> 11 & 3) + 1; 766 if (d == 1 && partitionCount == 4) { 767 return error_color_block(); 768 } 769 const uint weightStart = 770 128 - get_bit_count(weightCounts, trits, quints, bits); 771 uint dualPlaneStart = 0; 772 // Decode cem mode 773 if (partitionCount == 1) { 774 // Single-partition mode 775 cem[0] = u3 >> 13 & 0xf; 776 dualPlaneStart = weightStart - d * 2; 777 } else { 778 // Multi-partition mode 779 // Calculate CEM for all 4 partitions, even when partitionCount < 4 780 uint partMode = u3 >> 23 & 3; 781 const uint kExtraMBitsTable[4] = {0, 2, 5, 8}; 782 const uint extraMBitCount = 783 (partMode == 0) ? 0 : kExtraMBitsTable[partitionCount - 1]; 784 const uint extraMStart = weightStart - extraMBitCount; 785 dualPlaneStart = extraMStart - d * 2; 786 787 if (partMode == 0) { 788 uint cem_all = u3 >> 25 & 0xf; 789 cem = uvec4(cem_all, cem_all, cem_all, cem_all); 790 } else { 791 uint cemBase = partMode - 1; 792 uvec4 cemHigh = cemBase + uvec4(bit(u3, 25), bit(u3, 26), 793 bit(u3, 27), bit(u3, 28)); 794 const uint extraM = bits128(u, extraMStart, extraMBitCount); 795 const uint kMainMBitsTable[4] = {0, 2, 1, 0}; 796 const uint mainMBitCount = kMainMBitsTable[partitionCount - 1]; 797 const uint m = extraM << mainMBitCount | 798 ((u3 >> 27 & 3) >> (2 - mainMBitCount)); 799 cem = cemHigh << 2 | 800 uvec4(m & 3, m >> 2 & 3, m >> 4 & 3, m >> 6 & 3); 801 } 802 } 803 // Decode end points 804 uvec4 endPoints[4][2]; 805 { 806 uint totalV = 0; 807 for (uint part = 0; part < partitionCount; part++) { 808 totalV += get_v_count(cem[part]); 809 } 810 const uint epStart = (partitionCount == 1) ? 17 : 29; 811 const uint totalAvailBits = dualPlaneStart - epStart; 812 if (totalAvailBits >= 128) { 813 // overflowed 814 return error_color_block(); 815 } 816 uint epQuints = 0; 817 uint epTrits = 0; 818 uint epBits = 0; 819 uint i; 820 for (i = 0; i < kRQuantParamTableLength; i++) { 821 epTrits = kRQuantParamTable[i][0]; 822 epQuints = kRQuantParamTable[i][1]; 823 epBits = kRQuantParamTable[i][2]; 824 if (get_bit_count(totalV, epTrits, epQuints, epBits) <= 825 totalAvailBits) { 826 break; 827 } 828 } 829 if (i >= kRQuantParamTableLength) { 830 return error_color_block(); 831 } 832 833 const uint epBitCount = 834 get_bit_count(totalV, epTrits, epQuints, epBits); 835 const uint epEnd = epStart + epBitCount; 836 uint packedSize = 0; 837 uint pack = 0; 838 get_pack_size(epTrits, epQuints, epBits, pack, packedSize); 839 840 // Decode end point parameters into buffer 841 uint vBuffer[40]; 842 uint srcIdx = epStart; 843 uint dstIdx = 0; 844 const uint kUnquantBinMulTable[8] = {0xff, 0x55, 0x24, 0x11, 845 0x8, 0x4, 0x2, 0x1}; 846 const uint kUnquantBinMovTable[8] = {8, 8, 1, 8, 2, 4, 6, 8}; 847 while (dstIdx < totalV) { 848 if (epTrits == 1) { 849 uint decoded[5] = decode_trit(u, srcIdx, epEnd, epBits); 850 for (int i = 0; i < 5; i++) { 851 vBuffer[dstIdx] = kUnquantTritColorMap 852 [kUnquantTritColorMapBitIdx[epBits] + decoded[i]]; 853 dstIdx++; 854 if (dstIdx >= totalV) { 855 break; 856 } 857 } 858 } else if (epQuints == 1) { 859 uint decoded[3] = decode_quint(u, srcIdx, epEnd, epBits); 860 for (int i = 0; i < 3; i++) { 861 vBuffer[dstIdx] = kUnquantQuintColorMap 862 [kUnquantQuintColorMapBitIdx[epBits] + decoded[i]]; 863 dstIdx++; 864 if (dstIdx >= totalV) { 865 break; 866 } 867 } 868 } else { 869 uint src = bits128(u, srcIdx, packedSize); 870 uint decoded = src * kUnquantBinMulTable[epBits - 1] | 871 src >> kUnquantBinMovTable[epBits - 1]; 872 vBuffer[dstIdx] = decoded; 873 dstIdx++; 874 } 875 srcIdx += packedSize; 876 } 877 uint bufferIdx = 0; 878 for (uint part = 0; part < partitionCount; part++) { 879 // TODO: HDR support 880 decode_ldr_for_mode(vBuffer, bufferIdx, cem[part], 881 endPoints[part][0], endPoints[part][1]); 882 bufferIdx += get_v_count(cem[part]); 883 } 884 } 885 uvec4 ret[144]; 886 { 887 uvec2 dst = (1024 + u_pushConstant.blockSize / 2) / 888 (u_pushConstant.blockSize - 1); 889 uint dd = d + 1; 890 for (uint h = 0; h < u_pushConstant.blockSize.y; h++) { 891 for (uint w = 0; w < u_pushConstant.blockSize.x; w++) { 892 uint part = 893 select_partition(u3 >> 13 & 1023, w, h, partitionCount); 894 if (kHDRCEM[cem[part]]) { 895 // HDR not supported 896 ret[h * u_pushConstant.blockSize.x + w] = uvec4(0xff, 0, 897 0xff, 0xff); 898 continue; 899 } 900 // Calculate weight 901 uvec2 st = uvec2(w, h); 902 uvec2 cst = dst * st; 903 uvec2 gst = (cst * (uvec2(width, height) - 1) + 32) >> 6; 904 uvec2 jst = gst >> 4; 905 uvec2 fst = gst & 0xf; 906 uint v0 = jst.x + jst.y * width; 907 uvec2 p00 = uvec2(weightGrid[v0 * dd], weightGrid[v0 * dd + 1]); 908 uvec2 p01 = uvec2(weightGrid[(v0 + 1) * dd], 909 weightGrid[(v0 + 1) * dd + 1]); 910 uvec2 p10 = uvec2(weightGrid[(v0 + width) * dd], 911 weightGrid[(v0 + width) * dd + 1]); 912 uvec2 p11 = uvec2(weightGrid[(v0 + width + 1) * dd], 913 weightGrid[(v0 + width + 1) * dd + 1]); 914 uint w11 = (fst.x * fst.y + 8) >> 4; 915 uint w10 = fst.y - w11; 916 uint w01 = fst.x - w11; 917 uint w00 = 16 - fst.x - fst.y + w11; 918 uvec2 i = (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 919 4; 920 921 uvec4 c0 = endPoints[part][0]; 922 uvec4 c1 = endPoints[part][1]; 923 uvec4 c = (c0 * (64 - i[0]) + c1 * i[0] + 32) / 64; 924 if (d == 1) { 925 uint ccs = bits128(u, dualPlaneStart, 2); 926 c[ccs] = (c0[ccs] * (64 - i[1]) + c1[ccs] * i[1] + 32) / 64; 927 } 928 ret[h * u_pushConstant.blockSize.x + w] = c; 929 } 930 } 931 } 932 return ret; 933} 934 935ivec2 getPos1DArray(ivec3 pos) { 936 return ivec2(pos.x, pos.z); 937} 938 939ivec3 getPos2DArray(ivec3 pos) { 940 return pos; 941} 942 943ivec3 getPos3D(ivec3 pos) { 944 return pos; 945} 946 947uint block_y_size_1DArray() { 948 return 1; 949} 950 951uint block_y_size_2DArray() { 952 return u_pushConstant.blockSize.y; 953} 954 955uint block_y_size_3D() { 956 return u_pushConstant.blockSize.y; 957} 958 959uvec4 flip32(uvec4 a) { 960 return ((a & 0xff) << 24) | ((a & 0xff00) << 8) | ((a & 0xff0000) >> 8) | 961 ((a & 0xff000000) >> 24); 962} 963 964void main(void) { 965 ivec3 pos = ivec3(gl_GlobalInvocationID.xyz); 966 pos.z += int(u_pushConstant.baseLayer); 967 uvec4 srcBlock = uvec4(imageLoad(u_image0, getPos${type}(pos))); 968 srcBlock = uvec4(srcBlock[3], srcBlock[2], srcBlock[1], srcBlock[0]); 969 uvec4[144] decompressed = astc_decode_block(srcBlock); 970 971 for (uint y = 0; y < block_y_size_${type}(); y++) { 972 for (uint x = 0; x < u_pushConstant.blockSize.x; x++) { 973 imageStore(u_image1, 974 getPos${type}(ivec3( 975 pos.xy * u_pushConstant.blockSize + ivec2(x, y), 976 pos.z)), 977 decompressed[y * u_pushConstant.blockSize.x + x]); 978 } 979 } 980} 981