1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program Tester Core
3  * ----------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Compressed Texture Utilities.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "tcuCompressedTexture.hpp"
25 #include "tcuTextureUtil.hpp"
26 #include "tcuAstcUtil.hpp"
27 
28 #include "deStringUtil.hpp"
29 #include "deFloat16.h"
30 
31 #include <algorithm>
32 
33 namespace tcu
34 {
35 
getBlockSize(CompressedTexFormat format)36 int getBlockSize(CompressedTexFormat format)
37 {
38     if (isAstcFormat(format))
39     {
40         return astc::BLOCK_SIZE_BYTES;
41     }
42     else if (isEtcFormat(format))
43     {
44         switch (format)
45         {
46         case COMPRESSEDTEXFORMAT_ETC1_RGB8:
47             return 8;
48         case COMPRESSEDTEXFORMAT_EAC_R11:
49             return 8;
50         case COMPRESSEDTEXFORMAT_EAC_SIGNED_R11:
51             return 8;
52         case COMPRESSEDTEXFORMAT_EAC_RG11:
53             return 16;
54         case COMPRESSEDTEXFORMAT_EAC_SIGNED_RG11:
55             return 16;
56         case COMPRESSEDTEXFORMAT_ETC2_RGB8:
57             return 8;
58         case COMPRESSEDTEXFORMAT_ETC2_SRGB8:
59             return 8;
60         case COMPRESSEDTEXFORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
61             return 8;
62         case COMPRESSEDTEXFORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
63             return 8;
64         case COMPRESSEDTEXFORMAT_ETC2_EAC_RGBA8:
65             return 16;
66         case COMPRESSEDTEXFORMAT_ETC2_EAC_SRGB8_ALPHA8:
67             return 16;
68 
69         default:
70             DE_ASSERT(false);
71             return -1;
72         }
73     }
74     else if (isBcFormat(format))
75     {
76         switch (format)
77         {
78         case COMPRESSEDTEXFORMAT_BC1_RGB_UNORM_BLOCK:
79             return 8;
80         case COMPRESSEDTEXFORMAT_BC1_RGB_SRGB_BLOCK:
81             return 8;
82         case COMPRESSEDTEXFORMAT_BC1_RGBA_UNORM_BLOCK:
83             return 8;
84         case COMPRESSEDTEXFORMAT_BC1_RGBA_SRGB_BLOCK:
85             return 8;
86         case COMPRESSEDTEXFORMAT_BC2_UNORM_BLOCK:
87             return 16;
88         case COMPRESSEDTEXFORMAT_BC2_SRGB_BLOCK:
89             return 16;
90         case COMPRESSEDTEXFORMAT_BC3_UNORM_BLOCK:
91             return 16;
92         case COMPRESSEDTEXFORMAT_BC3_SRGB_BLOCK:
93             return 16;
94         case COMPRESSEDTEXFORMAT_BC4_UNORM_BLOCK:
95             return 8;
96         case COMPRESSEDTEXFORMAT_BC4_SNORM_BLOCK:
97             return 8;
98         case COMPRESSEDTEXFORMAT_BC5_UNORM_BLOCK:
99             return 16;
100         case COMPRESSEDTEXFORMAT_BC5_SNORM_BLOCK:
101             return 16;
102         case COMPRESSEDTEXFORMAT_BC6H_UFLOAT_BLOCK:
103             return 16;
104         case COMPRESSEDTEXFORMAT_BC6H_SFLOAT_BLOCK:
105             return 16;
106         case COMPRESSEDTEXFORMAT_BC7_UNORM_BLOCK:
107             return 16;
108         case COMPRESSEDTEXFORMAT_BC7_SRGB_BLOCK:
109             return 16;
110 
111         default:
112             DE_ASSERT(false);
113             return -1;
114         }
115     }
116     else if (isAhbRawFormat(format))
117     {
118         switch (format)
119         {
120         case COMPRESSEDTEXFORMAT_AHB_RAW10:
121             return 5;
122         case COMPRESSEDTEXFORMAT_AHB_RAW12:
123             return 3;
124 
125         default:
126             DE_ASSERT(false);
127             return -1;
128         }
129     }
130     else
131     {
132         DE_ASSERT(false);
133         return -1;
134     }
135 }
136 
getBlockPixelSize(CompressedTexFormat format)137 IVec3 getBlockPixelSize(CompressedTexFormat format)
138 {
139     if (isEtcFormat(format))
140     {
141         return IVec3(4, 4, 1);
142     }
143     else if (isAstcFormat(format))
144     {
145         switch (format)
146         {
147         case COMPRESSEDTEXFORMAT_ASTC_4x4_RGBA:
148             return IVec3(4, 4, 1);
149         case COMPRESSEDTEXFORMAT_ASTC_5x4_RGBA:
150             return IVec3(5, 4, 1);
151         case COMPRESSEDTEXFORMAT_ASTC_5x5_RGBA:
152             return IVec3(5, 5, 1);
153         case COMPRESSEDTEXFORMAT_ASTC_6x5_RGBA:
154             return IVec3(6, 5, 1);
155         case COMPRESSEDTEXFORMAT_ASTC_6x6_RGBA:
156             return IVec3(6, 6, 1);
157         case COMPRESSEDTEXFORMAT_ASTC_8x5_RGBA:
158             return IVec3(8, 5, 1);
159         case COMPRESSEDTEXFORMAT_ASTC_8x6_RGBA:
160             return IVec3(8, 6, 1);
161         case COMPRESSEDTEXFORMAT_ASTC_8x8_RGBA:
162             return IVec3(8, 8, 1);
163         case COMPRESSEDTEXFORMAT_ASTC_10x5_RGBA:
164             return IVec3(10, 5, 1);
165         case COMPRESSEDTEXFORMAT_ASTC_10x6_RGBA:
166             return IVec3(10, 6, 1);
167         case COMPRESSEDTEXFORMAT_ASTC_10x8_RGBA:
168             return IVec3(10, 8, 1);
169         case COMPRESSEDTEXFORMAT_ASTC_10x10_RGBA:
170             return IVec3(10, 10, 1);
171         case COMPRESSEDTEXFORMAT_ASTC_12x10_RGBA:
172             return IVec3(12, 10, 1);
173         case COMPRESSEDTEXFORMAT_ASTC_12x12_RGBA:
174             return IVec3(12, 12, 1);
175         case COMPRESSEDTEXFORMAT_ASTC_4x4_SRGB8_ALPHA8:
176             return IVec3(4, 4, 1);
177         case COMPRESSEDTEXFORMAT_ASTC_5x4_SRGB8_ALPHA8:
178             return IVec3(5, 4, 1);
179         case COMPRESSEDTEXFORMAT_ASTC_5x5_SRGB8_ALPHA8:
180             return IVec3(5, 5, 1);
181         case COMPRESSEDTEXFORMAT_ASTC_6x5_SRGB8_ALPHA8:
182             return IVec3(6, 5, 1);
183         case COMPRESSEDTEXFORMAT_ASTC_6x6_SRGB8_ALPHA8:
184             return IVec3(6, 6, 1);
185         case COMPRESSEDTEXFORMAT_ASTC_8x5_SRGB8_ALPHA8:
186             return IVec3(8, 5, 1);
187         case COMPRESSEDTEXFORMAT_ASTC_8x6_SRGB8_ALPHA8:
188             return IVec3(8, 6, 1);
189         case COMPRESSEDTEXFORMAT_ASTC_8x8_SRGB8_ALPHA8:
190             return IVec3(8, 8, 1);
191         case COMPRESSEDTEXFORMAT_ASTC_10x5_SRGB8_ALPHA8:
192             return IVec3(10, 5, 1);
193         case COMPRESSEDTEXFORMAT_ASTC_10x6_SRGB8_ALPHA8:
194             return IVec3(10, 6, 1);
195         case COMPRESSEDTEXFORMAT_ASTC_10x8_SRGB8_ALPHA8:
196             return IVec3(10, 8, 1);
197         case COMPRESSEDTEXFORMAT_ASTC_10x10_SRGB8_ALPHA8:
198             return IVec3(10, 10, 1);
199         case COMPRESSEDTEXFORMAT_ASTC_12x10_SRGB8_ALPHA8:
200             return IVec3(12, 10, 1);
201         case COMPRESSEDTEXFORMAT_ASTC_12x12_SRGB8_ALPHA8:
202             return IVec3(12, 12, 1);
203 
204         default:
205             DE_ASSERT(false);
206             return IVec3();
207         }
208     }
209     else if (isBcFormat(format))
210     {
211         return IVec3(4, 4, 1);
212     }
213     else if (isAhbRawFormat(format))
214     {
215         switch (format)
216         {
217         case COMPRESSEDTEXFORMAT_AHB_RAW10:
218             return IVec3(4, 1, 1);
219         case COMPRESSEDTEXFORMAT_AHB_RAW12:
220             return IVec3(2, 1, 1);
221 
222         default:
223             DE_ASSERT(false);
224             return IVec3();
225         }
226     }
227     else
228     {
229         DE_ASSERT(false);
230         return IVec3(-1);
231     }
232 }
233 
isEtcFormat(CompressedTexFormat format)234 bool isEtcFormat(CompressedTexFormat format)
235 {
236     switch (format)
237     {
238     case COMPRESSEDTEXFORMAT_ETC1_RGB8:
239     case COMPRESSEDTEXFORMAT_EAC_R11:
240     case COMPRESSEDTEXFORMAT_EAC_SIGNED_R11:
241     case COMPRESSEDTEXFORMAT_EAC_RG11:
242     case COMPRESSEDTEXFORMAT_EAC_SIGNED_RG11:
243     case COMPRESSEDTEXFORMAT_ETC2_RGB8:
244     case COMPRESSEDTEXFORMAT_ETC2_SRGB8:
245     case COMPRESSEDTEXFORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
246     case COMPRESSEDTEXFORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
247     case COMPRESSEDTEXFORMAT_ETC2_EAC_RGBA8:
248     case COMPRESSEDTEXFORMAT_ETC2_EAC_SRGB8_ALPHA8:
249         return true;
250 
251     default:
252         return false;
253     }
254 }
255 
isBcFormat(CompressedTexFormat format)256 bool isBcFormat(CompressedTexFormat format)
257 {
258     switch (format)
259     {
260     case COMPRESSEDTEXFORMAT_BC1_RGB_UNORM_BLOCK:
261     case COMPRESSEDTEXFORMAT_BC1_RGB_SRGB_BLOCK:
262     case COMPRESSEDTEXFORMAT_BC1_RGBA_UNORM_BLOCK:
263     case COMPRESSEDTEXFORMAT_BC1_RGBA_SRGB_BLOCK:
264     case COMPRESSEDTEXFORMAT_BC2_UNORM_BLOCK:
265     case COMPRESSEDTEXFORMAT_BC2_SRGB_BLOCK:
266     case COMPRESSEDTEXFORMAT_BC3_UNORM_BLOCK:
267     case COMPRESSEDTEXFORMAT_BC3_SRGB_BLOCK:
268     case COMPRESSEDTEXFORMAT_BC4_UNORM_BLOCK:
269     case COMPRESSEDTEXFORMAT_BC4_SNORM_BLOCK:
270     case COMPRESSEDTEXFORMAT_BC5_UNORM_BLOCK:
271     case COMPRESSEDTEXFORMAT_BC5_SNORM_BLOCK:
272     case COMPRESSEDTEXFORMAT_BC6H_UFLOAT_BLOCK:
273     case COMPRESSEDTEXFORMAT_BC6H_SFLOAT_BLOCK:
274     case COMPRESSEDTEXFORMAT_BC7_UNORM_BLOCK:
275     case COMPRESSEDTEXFORMAT_BC7_SRGB_BLOCK:
276         return true;
277 
278     default:
279         return false;
280     }
281 }
282 
isBcBitExactFormat(CompressedTexFormat format)283 bool isBcBitExactFormat(CompressedTexFormat format)
284 {
285     switch (format)
286     {
287     case COMPRESSEDTEXFORMAT_BC6H_UFLOAT_BLOCK:
288     case COMPRESSEDTEXFORMAT_BC6H_SFLOAT_BLOCK:
289     case COMPRESSEDTEXFORMAT_BC7_UNORM_BLOCK:
290     case COMPRESSEDTEXFORMAT_BC7_SRGB_BLOCK:
291         return true;
292 
293     default:
294         return false;
295     }
296 }
297 
isBcSRGBFormat(CompressedTexFormat format)298 bool isBcSRGBFormat(CompressedTexFormat format)
299 {
300     switch (format)
301     {
302     case COMPRESSEDTEXFORMAT_BC1_RGB_SRGB_BLOCK:
303     case COMPRESSEDTEXFORMAT_BC1_RGBA_SRGB_BLOCK:
304     case COMPRESSEDTEXFORMAT_BC2_SRGB_BLOCK:
305     case COMPRESSEDTEXFORMAT_BC3_SRGB_BLOCK:
306     case COMPRESSEDTEXFORMAT_BC7_SRGB_BLOCK:
307         return true;
308 
309     default:
310         return false;
311     }
312 }
313 
isAstcFormat(CompressedTexFormat format)314 bool isAstcFormat(CompressedTexFormat format)
315 {
316     switch (format)
317     {
318     case COMPRESSEDTEXFORMAT_ASTC_4x4_RGBA:
319     case COMPRESSEDTEXFORMAT_ASTC_5x4_RGBA:
320     case COMPRESSEDTEXFORMAT_ASTC_5x5_RGBA:
321     case COMPRESSEDTEXFORMAT_ASTC_6x5_RGBA:
322     case COMPRESSEDTEXFORMAT_ASTC_6x6_RGBA:
323     case COMPRESSEDTEXFORMAT_ASTC_8x5_RGBA:
324     case COMPRESSEDTEXFORMAT_ASTC_8x6_RGBA:
325     case COMPRESSEDTEXFORMAT_ASTC_8x8_RGBA:
326     case COMPRESSEDTEXFORMAT_ASTC_10x5_RGBA:
327     case COMPRESSEDTEXFORMAT_ASTC_10x6_RGBA:
328     case COMPRESSEDTEXFORMAT_ASTC_10x8_RGBA:
329     case COMPRESSEDTEXFORMAT_ASTC_10x10_RGBA:
330     case COMPRESSEDTEXFORMAT_ASTC_12x10_RGBA:
331     case COMPRESSEDTEXFORMAT_ASTC_12x12_RGBA:
332     case COMPRESSEDTEXFORMAT_ASTC_4x4_SRGB8_ALPHA8:
333     case COMPRESSEDTEXFORMAT_ASTC_5x4_SRGB8_ALPHA8:
334     case COMPRESSEDTEXFORMAT_ASTC_5x5_SRGB8_ALPHA8:
335     case COMPRESSEDTEXFORMAT_ASTC_6x5_SRGB8_ALPHA8:
336     case COMPRESSEDTEXFORMAT_ASTC_6x6_SRGB8_ALPHA8:
337     case COMPRESSEDTEXFORMAT_ASTC_8x5_SRGB8_ALPHA8:
338     case COMPRESSEDTEXFORMAT_ASTC_8x6_SRGB8_ALPHA8:
339     case COMPRESSEDTEXFORMAT_ASTC_8x8_SRGB8_ALPHA8:
340     case COMPRESSEDTEXFORMAT_ASTC_10x5_SRGB8_ALPHA8:
341     case COMPRESSEDTEXFORMAT_ASTC_10x6_SRGB8_ALPHA8:
342     case COMPRESSEDTEXFORMAT_ASTC_10x8_SRGB8_ALPHA8:
343     case COMPRESSEDTEXFORMAT_ASTC_10x10_SRGB8_ALPHA8:
344     case COMPRESSEDTEXFORMAT_ASTC_12x10_SRGB8_ALPHA8:
345     case COMPRESSEDTEXFORMAT_ASTC_12x12_SRGB8_ALPHA8:
346         return true;
347 
348     default:
349         return false;
350     }
351 }
352 
isAstcSRGBFormat(CompressedTexFormat format)353 bool isAstcSRGBFormat(CompressedTexFormat format)
354 {
355     switch (format)
356     {
357     case COMPRESSEDTEXFORMAT_ASTC_4x4_SRGB8_ALPHA8:
358     case COMPRESSEDTEXFORMAT_ASTC_5x4_SRGB8_ALPHA8:
359     case COMPRESSEDTEXFORMAT_ASTC_5x5_SRGB8_ALPHA8:
360     case COMPRESSEDTEXFORMAT_ASTC_6x5_SRGB8_ALPHA8:
361     case COMPRESSEDTEXFORMAT_ASTC_6x6_SRGB8_ALPHA8:
362     case COMPRESSEDTEXFORMAT_ASTC_8x5_SRGB8_ALPHA8:
363     case COMPRESSEDTEXFORMAT_ASTC_8x6_SRGB8_ALPHA8:
364     case COMPRESSEDTEXFORMAT_ASTC_8x8_SRGB8_ALPHA8:
365     case COMPRESSEDTEXFORMAT_ASTC_10x5_SRGB8_ALPHA8:
366     case COMPRESSEDTEXFORMAT_ASTC_10x6_SRGB8_ALPHA8:
367     case COMPRESSEDTEXFORMAT_ASTC_10x8_SRGB8_ALPHA8:
368     case COMPRESSEDTEXFORMAT_ASTC_10x10_SRGB8_ALPHA8:
369     case COMPRESSEDTEXFORMAT_ASTC_12x10_SRGB8_ALPHA8:
370     case COMPRESSEDTEXFORMAT_ASTC_12x12_SRGB8_ALPHA8:
371         return true;
372 
373     default:
374         return false;
375     }
376 }
377 
isAhbRawFormat(CompressedTexFormat format)378 bool isAhbRawFormat(CompressedTexFormat format)
379 {
380     switch (format)
381     {
382     case COMPRESSEDTEXFORMAT_AHB_RAW10:
383     case COMPRESSEDTEXFORMAT_AHB_RAW12:
384         return true;
385 
386     default:
387         return false;
388     }
389 }
390 
getUncompressedFormat(CompressedTexFormat format)391 TextureFormat getUncompressedFormat(CompressedTexFormat format)
392 {
393     if (isEtcFormat(format))
394     {
395         switch (format)
396         {
397         case COMPRESSEDTEXFORMAT_ETC1_RGB8:
398             return TextureFormat(TextureFormat::RGB, TextureFormat::UNORM_INT8);
399         case COMPRESSEDTEXFORMAT_EAC_R11:
400             return TextureFormat(TextureFormat::R, TextureFormat::UNORM_INT16);
401         case COMPRESSEDTEXFORMAT_EAC_SIGNED_R11:
402             return TextureFormat(TextureFormat::R, TextureFormat::SNORM_INT16);
403         case COMPRESSEDTEXFORMAT_EAC_RG11:
404             return TextureFormat(TextureFormat::RG, TextureFormat::UNORM_INT16);
405         case COMPRESSEDTEXFORMAT_EAC_SIGNED_RG11:
406             return TextureFormat(TextureFormat::RG, TextureFormat::SNORM_INT16);
407         case COMPRESSEDTEXFORMAT_ETC2_RGB8:
408             return TextureFormat(TextureFormat::RGB, TextureFormat::UNORM_INT8);
409         case COMPRESSEDTEXFORMAT_ETC2_SRGB8:
410             return TextureFormat(TextureFormat::sRGB, TextureFormat::UNORM_INT8);
411         case COMPRESSEDTEXFORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
412             return TextureFormat(TextureFormat::RGBA, TextureFormat::UNORM_INT8);
413         case COMPRESSEDTEXFORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
414             return TextureFormat(TextureFormat::sRGBA, TextureFormat::UNORM_INT8);
415         case COMPRESSEDTEXFORMAT_ETC2_EAC_RGBA8:
416             return TextureFormat(TextureFormat::RGBA, TextureFormat::UNORM_INT8);
417         case COMPRESSEDTEXFORMAT_ETC2_EAC_SRGB8_ALPHA8:
418             return TextureFormat(TextureFormat::sRGBA, TextureFormat::UNORM_INT8);
419 
420         default:
421             DE_ASSERT(false);
422             return TextureFormat();
423         }
424     }
425     else if (isAstcFormat(format))
426     {
427         if (isAstcSRGBFormat(format))
428             return TextureFormat(TextureFormat::sRGBA, TextureFormat::UNORM_INT8);
429         else
430             return TextureFormat(TextureFormat::RGBA, TextureFormat::HALF_FLOAT);
431     }
432     else if (isBcFormat(format))
433     {
434         if (format == COMPRESSEDTEXFORMAT_BC4_UNORM_BLOCK || format == COMPRESSEDTEXFORMAT_BC4_SNORM_BLOCK)
435             return TextureFormat(TextureFormat::R, TextureFormat::FLOAT);
436         else if (format == COMPRESSEDTEXFORMAT_BC5_UNORM_BLOCK || format == COMPRESSEDTEXFORMAT_BC5_SNORM_BLOCK)
437             return TextureFormat(TextureFormat::RG, TextureFormat::FLOAT);
438         else if (format == COMPRESSEDTEXFORMAT_BC6H_UFLOAT_BLOCK || format == COMPRESSEDTEXFORMAT_BC6H_SFLOAT_BLOCK)
439             return TextureFormat(TextureFormat::RGB, TextureFormat::HALF_FLOAT);
440         else if (isBcSRGBFormat(format))
441             return TextureFormat(TextureFormat::sRGBA, TextureFormat::UNORM_INT8);
442         else
443             return TextureFormat(TextureFormat::RGBA, TextureFormat::UNORM_INT8);
444     }
445     else if (isAhbRawFormat(format))
446     {
447         if (format == COMPRESSEDTEXFORMAT_AHB_RAW10)
448             return TextureFormat(TextureFormat::R,
449                                  TextureFormat::UNORM_SHORT_10); // Can be changed to a more fitting value if needed
450         else                                                     // COMPRESSEDTEXFORMAT_AHB_RAW12
451             return TextureFormat(TextureFormat::R,
452                                  TextureFormat::UNORM_SHORT_12); // Can be changed to a more fitting value if needed
453     }
454     else
455     {
456         DE_ASSERT(false);
457         return TextureFormat();
458     }
459 }
460 
getAstcFormatByBlockSize(const IVec3 & size,bool isSRGB)461 CompressedTexFormat getAstcFormatByBlockSize(const IVec3 &size, bool isSRGB)
462 {
463     if (size.z() > 1)
464         throw InternalError("3D ASTC textures not currently supported");
465 
466     for (int fmtI = 0; fmtI < COMPRESSEDTEXFORMAT_LAST; fmtI++)
467     {
468         const CompressedTexFormat fmt = (CompressedTexFormat)fmtI;
469 
470         if (isAstcFormat(fmt) && getBlockPixelSize(fmt) == size && isAstcSRGBFormat(fmt) == isSRGB)
471             return fmt;
472     }
473 
474     throw InternalError("Invalid ASTC block size " + de::toString(size.x()) + "x" + de::toString(size.y()) + "x" +
475                         de::toString(size.z()));
476 }
477 
478 namespace
479 {
480 
extend4To8(uint8_t src)481 inline uint8_t extend4To8(uint8_t src)
482 {
483     DE_ASSERT((src & ~((1 << 4) - 1)) == 0);
484     return (uint8_t)((src << 4) | src);
485 }
486 
extend5To8(uint8_t src)487 inline uint8_t extend5To8(uint8_t src)
488 {
489     DE_ASSERT((src & ~((1 << 5) - 1)) == 0);
490     return (uint8_t)((src << 3) | (src >> 2));
491 }
492 
extend6To8(uint8_t src)493 inline uint8_t extend6To8(uint8_t src)
494 {
495     DE_ASSERT((src & ~((1 << 6) - 1)) == 0);
496     return (uint8_t)((src << 2) | (src >> 4));
497 }
498 
499 // \todo [2013-08-06 nuutti] ETC and ASTC decompression codes are rather unrelated, and are already in their own "private" namespaces - should this be split to multiple files?
500 
501 namespace EtcDecompressInternal
502 {
503 
504 enum
505 {
506     ETC2_BLOCK_WIDTH                   = 4,
507     ETC2_BLOCK_HEIGHT                  = 4,
508     ETC2_UNCOMPRESSED_PIXEL_SIZE_A8    = 1,
509     ETC2_UNCOMPRESSED_PIXEL_SIZE_R11   = 2,
510     ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11  = 4,
511     ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8  = 3,
512     ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8 = 4,
513     ETC2_UNCOMPRESSED_BLOCK_SIZE_A8    = ETC2_BLOCK_WIDTH * ETC2_BLOCK_HEIGHT * ETC2_UNCOMPRESSED_PIXEL_SIZE_A8,
514     ETC2_UNCOMPRESSED_BLOCK_SIZE_R11   = ETC2_BLOCK_WIDTH * ETC2_BLOCK_HEIGHT * ETC2_UNCOMPRESSED_PIXEL_SIZE_R11,
515     ETC2_UNCOMPRESSED_BLOCK_SIZE_RG11  = ETC2_BLOCK_WIDTH * ETC2_BLOCK_HEIGHT * ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11,
516     ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8  = ETC2_BLOCK_WIDTH * ETC2_BLOCK_HEIGHT * ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8,
517     ETC2_UNCOMPRESSED_BLOCK_SIZE_RGBA8 = ETC2_BLOCK_WIDTH * ETC2_BLOCK_HEIGHT * ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8
518 };
519 
get64BitBlock(const uint8_t * src,int blockNdx)520 inline uint64_t get64BitBlock(const uint8_t *src, int blockNdx)
521 {
522     // Stored in big-endian form.
523     uint64_t block = 0;
524 
525     for (int i = 0; i < 8; i++)
526         block = (block << 8ull) | (uint64_t)(src[blockNdx * 8 + i]);
527 
528     return block;
529 }
530 
531 // Return the first 64 bits of a 128 bit block.
get128BitBlockStart(const uint8_t * src,int blockNdx)532 inline uint64_t get128BitBlockStart(const uint8_t *src, int blockNdx)
533 {
534     return get64BitBlock(src, 2 * blockNdx);
535 }
536 
537 // Return the last 64 bits of a 128 bit block.
get128BitBlockEnd(const uint8_t * src,int blockNdx)538 inline uint64_t get128BitBlockEnd(const uint8_t *src, int blockNdx)
539 {
540     return get64BitBlock(src, 2 * blockNdx + 1);
541 }
542 
getBit(uint64_t src,int bit)543 inline uint32_t getBit(uint64_t src, int bit)
544 {
545     return (src >> bit) & 1;
546 }
547 
getBits(uint64_t src,int low,int high)548 inline uint32_t getBits(uint64_t src, int low, int high)
549 {
550     const int numBits = (high - low) + 1;
551     DE_ASSERT(de::inRange(numBits, 1, 32));
552     if (numBits < 32)
553         return (uint32_t)((src >> low) & ((1u << numBits) - 1));
554     else
555         return (uint32_t)((src >> low) & 0xFFFFFFFFu);
556 }
557 
extend7To8(uint8_t src)558 inline uint8_t extend7To8(uint8_t src)
559 {
560     DE_ASSERT((src & ~((1 << 7) - 1)) == 0);
561     return (uint8_t)((src << 1) | (src >> 6));
562 }
563 
extendSigned3To8(uint8_t src)564 inline int8_t extendSigned3To8(uint8_t src)
565 {
566     const bool isNeg = (src & (1 << 2)) != 0;
567     return (int8_t)((isNeg ? ~((1 << 3) - 1) : 0) | src);
568 }
569 
extend5Delta3To8(uint8_t base5,uint8_t delta3)570 inline uint8_t extend5Delta3To8(uint8_t base5, uint8_t delta3)
571 {
572     const uint8_t t = (uint8_t)((int8_t)base5 + extendSigned3To8(delta3));
573     return extend5To8(t);
574 }
575 
extend11To16(uint16_t src)576 inline uint16_t extend11To16(uint16_t src)
577 {
578     DE_ASSERT((src & ~((1 << 11) - 1)) == 0);
579     return (uint16_t)((src << 5) | (src >> 6));
580 }
581 
extend11To16WithSign(int16_t src)582 inline int16_t extend11To16WithSign(int16_t src)
583 {
584     if (src < 0)
585         return (int16_t)(-(int16_t)extend11To16((uint16_t)(-src)));
586     else
587         return (int16_t)extend11To16(src);
588 }
589 
decompressETC1Block(uint8_t dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8],uint64_t src)590 void decompressETC1Block(uint8_t dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8], uint64_t src)
591 {
592     const int diffBit       = (int)getBit(src, 33);
593     const int flipBit       = (int)getBit(src, 32);
594     const uint32_t table[2] = {getBits(src, 37, 39), getBits(src, 34, 36)};
595     uint8_t baseR[2];
596     uint8_t baseG[2];
597     uint8_t baseB[2];
598 
599     if (diffBit == 0)
600     {
601         // Individual mode.
602         baseR[0] = extend4To8((uint8_t)getBits(src, 60, 63));
603         baseR[1] = extend4To8((uint8_t)getBits(src, 56, 59));
604         baseG[0] = extend4To8((uint8_t)getBits(src, 52, 55));
605         baseG[1] = extend4To8((uint8_t)getBits(src, 48, 51));
606         baseB[0] = extend4To8((uint8_t)getBits(src, 44, 47));
607         baseB[1] = extend4To8((uint8_t)getBits(src, 40, 43));
608     }
609     else
610     {
611         // Differential mode (diffBit == 1).
612         uint8_t bR = (uint8_t)getBits(src, 59, 63); // 5b
613         uint8_t dR = (uint8_t)getBits(src, 56, 58); // 3b
614         uint8_t bG = (uint8_t)getBits(src, 51, 55);
615         uint8_t dG = (uint8_t)getBits(src, 48, 50);
616         uint8_t bB = (uint8_t)getBits(src, 43, 47);
617         uint8_t dB = (uint8_t)getBits(src, 40, 42);
618 
619         baseR[0] = extend5To8(bR);
620         baseG[0] = extend5To8(bG);
621         baseB[0] = extend5To8(bB);
622 
623         baseR[1] = extend5Delta3To8(bR, dR);
624         baseG[1] = extend5Delta3To8(bG, dG);
625         baseB[1] = extend5Delta3To8(bB, dB);
626     }
627 
628     static const int modifierTable[8][4] = {//      00   01   10    11
629                                             {2, 8, -2, -8},       {5, 17, -5, -17},    {9, 29, -9, -29},
630                                             {13, 42, -13, -42},   {18, 60, -18, -60},  {24, 80, -24, -80},
631                                             {33, 106, -33, -106}, {47, 183, -47, -183}};
632 
633     // Write final pixels.
634     for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT * ETC2_BLOCK_WIDTH; pixelNdx++)
635     {
636         const int x                = pixelNdx / ETC2_BLOCK_HEIGHT;
637         const int y                = pixelNdx % ETC2_BLOCK_HEIGHT;
638         const int dstOffset        = (y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
639         const int subBlock         = ((flipBit ? y : x) >= 2) ? 1 : 0;
640         const uint32_t tableNdx    = table[subBlock];
641         const uint32_t modifierNdx = (getBit(src, 16 + pixelNdx) << 1) | getBit(src, pixelNdx);
642         const int modifier         = modifierTable[tableNdx][modifierNdx];
643 
644         dst[dstOffset + 0] = (uint8_t)deClamp32((int)baseR[subBlock] + modifier, 0, 255);
645         dst[dstOffset + 1] = (uint8_t)deClamp32((int)baseG[subBlock] + modifier, 0, 255);
646         dst[dstOffset + 2] = (uint8_t)deClamp32((int)baseB[subBlock] + modifier, 0, 255);
647     }
648 }
649 
650 // if alphaMode is true, do PUNCHTHROUGH and store alpha to alphaDst; otherwise do ordinary ETC2 RGB8.
decompressETC2Block(uint8_t dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8],uint64_t src,uint8_t alphaDst[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8],bool alphaMode)651 void decompressETC2Block(uint8_t dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8], uint64_t src,
652                          uint8_t alphaDst[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8], bool alphaMode)
653 {
654     enum Etc2Mode
655     {
656         MODE_INDIVIDUAL = 0,
657         MODE_DIFFERENTIAL,
658         MODE_T,
659         MODE_H,
660         MODE_PLANAR,
661 
662         MODE_LAST
663     };
664 
665     const int diffOpaqueBit = (int)getBit(src, 33);
666     const int8_t selBR      = (int8_t)getBits(src, 59, 63); // 5 bits.
667     const int8_t selBG      = (int8_t)getBits(src, 51, 55);
668     const int8_t selBB      = (int8_t)getBits(src, 43, 47);
669     const int8_t selDR      = extendSigned3To8((uint8_t)getBits(src, 56, 58)); // 3 bits.
670     const int8_t selDG      = extendSigned3To8((uint8_t)getBits(src, 48, 50));
671     const int8_t selDB      = extendSigned3To8((uint8_t)getBits(src, 40, 42));
672     Etc2Mode mode;
673 
674     if (!alphaMode && diffOpaqueBit == 0)
675         mode = MODE_INDIVIDUAL;
676     else if (!de::inRange(selBR + selDR, 0, 31))
677         mode = MODE_T;
678     else if (!de::inRange(selBG + selDG, 0, 31))
679         mode = MODE_H;
680     else if (!de::inRange(selBB + selDB, 0, 31))
681         mode = MODE_PLANAR;
682     else
683         mode = MODE_DIFFERENTIAL;
684 
685     if (mode == MODE_INDIVIDUAL || mode == MODE_DIFFERENTIAL)
686     {
687         // Individual and differential modes have some steps in common, handle them here.
688         static const int modifierTable[8][4] = {//      00   01   10    11
689                                                 {2, 8, -2, -8},       {5, 17, -5, -17},    {9, 29, -9, -29},
690                                                 {13, 42, -13, -42},   {18, 60, -18, -60},  {24, 80, -24, -80},
691                                                 {33, 106, -33, -106}, {47, 183, -47, -183}};
692 
693         const int flipBit       = (int)getBit(src, 32);
694         const uint32_t table[2] = {getBits(src, 37, 39), getBits(src, 34, 36)};
695         uint8_t baseR[2];
696         uint8_t baseG[2];
697         uint8_t baseB[2];
698 
699         if (mode == MODE_INDIVIDUAL)
700         {
701             // Individual mode, initial values.
702             baseR[0] = extend4To8((uint8_t)getBits(src, 60, 63));
703             baseR[1] = extend4To8((uint8_t)getBits(src, 56, 59));
704             baseG[0] = extend4To8((uint8_t)getBits(src, 52, 55));
705             baseG[1] = extend4To8((uint8_t)getBits(src, 48, 51));
706             baseB[0] = extend4To8((uint8_t)getBits(src, 44, 47));
707             baseB[1] = extend4To8((uint8_t)getBits(src, 40, 43));
708         }
709         else
710         {
711             // Differential mode, initial values.
712             baseR[0] = extend5To8(selBR);
713             baseG[0] = extend5To8(selBG);
714             baseB[0] = extend5To8(selBB);
715 
716             baseR[1] = extend5To8((uint8_t)(selBR + selDR));
717             baseG[1] = extend5To8((uint8_t)(selBG + selDG));
718             baseB[1] = extend5To8((uint8_t)(selBB + selDB));
719         }
720 
721         // Write final pixels for individual or differential mode.
722         for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT * ETC2_BLOCK_WIDTH; pixelNdx++)
723         {
724             const int x                = pixelNdx / ETC2_BLOCK_HEIGHT;
725             const int y                = pixelNdx % ETC2_BLOCK_HEIGHT;
726             const int dstOffset        = (y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
727             const int subBlock         = ((flipBit ? y : x) >= 2) ? 1 : 0;
728             const uint32_t tableNdx    = table[subBlock];
729             const uint32_t modifierNdx = (getBit(src, 16 + pixelNdx) << 1) | getBit(src, pixelNdx);
730             const int alphaDstOffset =
731                 (y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_A8; // Only needed for PUNCHTHROUGH version.
732 
733             // If doing PUNCHTHROUGH version (alphaMode), opaque bit may affect colors.
734             if (alphaMode && diffOpaqueBit == 0 && modifierNdx == 2)
735             {
736                 dst[dstOffset + 0]       = 0;
737                 dst[dstOffset + 1]       = 0;
738                 dst[dstOffset + 2]       = 0;
739                 alphaDst[alphaDstOffset] = 0;
740             }
741             else
742             {
743                 int modifier;
744 
745                 // PUNCHTHROUGH version and opaque bit may also affect modifiers.
746                 if (alphaMode && diffOpaqueBit == 0 && (modifierNdx == 0 || modifierNdx == 2))
747                     modifier = 0;
748                 else
749                     modifier = modifierTable[tableNdx][modifierNdx];
750 
751                 dst[dstOffset + 0] = (uint8_t)deClamp32((int)baseR[subBlock] + modifier, 0, 255);
752                 dst[dstOffset + 1] = (uint8_t)deClamp32((int)baseG[subBlock] + modifier, 0, 255);
753                 dst[dstOffset + 2] = (uint8_t)deClamp32((int)baseB[subBlock] + modifier, 0, 255);
754 
755                 if (alphaMode)
756                     alphaDst[alphaDstOffset] = 255;
757             }
758         }
759     }
760     else if (mode == MODE_T || mode == MODE_H)
761     {
762         // T and H modes have some steps in common, handle them here.
763         static const int distTable[8] = {3, 6, 11, 16, 23, 32, 41, 64};
764 
765         uint8_t paintR[4];
766         uint8_t paintG[4];
767         uint8_t paintB[4];
768 
769         if (mode == MODE_T)
770         {
771             // T mode, calculate paint values.
772             const uint8_t R1a      = (uint8_t)getBits(src, 59, 60);
773             const uint8_t R1b      = (uint8_t)getBits(src, 56, 57);
774             const uint8_t G1       = (uint8_t)getBits(src, 52, 55);
775             const uint8_t B1       = (uint8_t)getBits(src, 48, 51);
776             const uint8_t R2       = (uint8_t)getBits(src, 44, 47);
777             const uint8_t G2       = (uint8_t)getBits(src, 40, 43);
778             const uint8_t B2       = (uint8_t)getBits(src, 36, 39);
779             const uint32_t distNdx = (getBits(src, 34, 35) << 1) | getBit(src, 32);
780             const int dist         = distTable[distNdx];
781 
782             paintR[0] = extend4To8((uint8_t)((R1a << 2) | R1b));
783             paintG[0] = extend4To8(G1);
784             paintB[0] = extend4To8(B1);
785             paintR[2] = extend4To8(R2);
786             paintG[2] = extend4To8(G2);
787             paintB[2] = extend4To8(B2);
788             paintR[1] = (uint8_t)deClamp32((int)paintR[2] + dist, 0, 255);
789             paintG[1] = (uint8_t)deClamp32((int)paintG[2] + dist, 0, 255);
790             paintB[1] = (uint8_t)deClamp32((int)paintB[2] + dist, 0, 255);
791             paintR[3] = (uint8_t)deClamp32((int)paintR[2] - dist, 0, 255);
792             paintG[3] = (uint8_t)deClamp32((int)paintG[2] - dist, 0, 255);
793             paintB[3] = (uint8_t)deClamp32((int)paintB[2] - dist, 0, 255);
794         }
795         else
796         {
797             // H mode, calculate paint values.
798             const uint8_t R1  = (uint8_t)getBits(src, 59, 62);
799             const uint8_t G1a = (uint8_t)getBits(src, 56, 58);
800             const uint8_t G1b = (uint8_t)getBit(src, 52);
801             const uint8_t B1a = (uint8_t)getBit(src, 51);
802             const uint8_t B1b = (uint8_t)getBits(src, 47, 49);
803             const uint8_t R2  = (uint8_t)getBits(src, 43, 46);
804             const uint8_t G2  = (uint8_t)getBits(src, 39, 42);
805             const uint8_t B2  = (uint8_t)getBits(src, 35, 38);
806             uint8_t baseR[2];
807             uint8_t baseG[2];
808             uint8_t baseB[2];
809             uint32_t baseValue[2];
810             uint32_t distNdx;
811             int dist;
812 
813             baseR[0]     = extend4To8(R1);
814             baseG[0]     = extend4To8((uint8_t)((G1a << 1) | G1b));
815             baseB[0]     = extend4To8((uint8_t)((B1a << 3) | B1b));
816             baseR[1]     = extend4To8(R2);
817             baseG[1]     = extend4To8(G2);
818             baseB[1]     = extend4To8(B2);
819             baseValue[0] = (((uint32_t)baseR[0]) << 16) | (((uint32_t)baseG[0]) << 8) | baseB[0];
820             baseValue[1] = (((uint32_t)baseR[1]) << 16) | (((uint32_t)baseG[1]) << 8) | baseB[1];
821             distNdx      = (getBit(src, 34) << 2) | (getBit(src, 32) << 1) | (uint32_t)(baseValue[0] >= baseValue[1]);
822             dist         = distTable[distNdx];
823 
824             paintR[0] = (uint8_t)deClamp32((int)baseR[0] + dist, 0, 255);
825             paintG[0] = (uint8_t)deClamp32((int)baseG[0] + dist, 0, 255);
826             paintB[0] = (uint8_t)deClamp32((int)baseB[0] + dist, 0, 255);
827             paintR[1] = (uint8_t)deClamp32((int)baseR[0] - dist, 0, 255);
828             paintG[1] = (uint8_t)deClamp32((int)baseG[0] - dist, 0, 255);
829             paintB[1] = (uint8_t)deClamp32((int)baseB[0] - dist, 0, 255);
830             paintR[2] = (uint8_t)deClamp32((int)baseR[1] + dist, 0, 255);
831             paintG[2] = (uint8_t)deClamp32((int)baseG[1] + dist, 0, 255);
832             paintB[2] = (uint8_t)deClamp32((int)baseB[1] + dist, 0, 255);
833             paintR[3] = (uint8_t)deClamp32((int)baseR[1] - dist, 0, 255);
834             paintG[3] = (uint8_t)deClamp32((int)baseG[1] - dist, 0, 255);
835             paintB[3] = (uint8_t)deClamp32((int)baseB[1] - dist, 0, 255);
836         }
837 
838         // Write final pixels for T or H mode.
839         for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT * ETC2_BLOCK_WIDTH; pixelNdx++)
840         {
841             const int x             = pixelNdx / ETC2_BLOCK_HEIGHT;
842             const int y             = pixelNdx % ETC2_BLOCK_HEIGHT;
843             const int dstOffset     = (y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
844             const uint32_t paintNdx = (getBit(src, 16 + pixelNdx) << 1) | getBit(src, pixelNdx);
845             const int alphaDstOffset =
846                 (y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_A8; // Only needed for PUNCHTHROUGH version.
847 
848             if (alphaMode && diffOpaqueBit == 0 && paintNdx == 2)
849             {
850                 dst[dstOffset + 0]       = 0;
851                 dst[dstOffset + 1]       = 0;
852                 dst[dstOffset + 2]       = 0;
853                 alphaDst[alphaDstOffset] = 0;
854             }
855             else
856             {
857                 dst[dstOffset + 0] = (uint8_t)deClamp32((int)paintR[paintNdx], 0, 255);
858                 dst[dstOffset + 1] = (uint8_t)deClamp32((int)paintG[paintNdx], 0, 255);
859                 dst[dstOffset + 2] = (uint8_t)deClamp32((int)paintB[paintNdx], 0, 255);
860 
861                 if (alphaMode)
862                     alphaDst[alphaDstOffset] = 255;
863             }
864         }
865     }
866     else
867     {
868         // Planar mode.
869         const uint8_t GO1 = (uint8_t)getBit(src, 56);
870         const uint8_t GO2 = (uint8_t)getBits(src, 49, 54);
871         const uint8_t BO1 = (uint8_t)getBit(src, 48);
872         const uint8_t BO2 = (uint8_t)getBits(src, 43, 44);
873         const uint8_t BO3 = (uint8_t)getBits(src, 39, 41);
874         const uint8_t RH1 = (uint8_t)getBits(src, 34, 38);
875         const uint8_t RH2 = (uint8_t)getBit(src, 32);
876         const uint8_t RO  = extend6To8((uint8_t)getBits(src, 57, 62));
877         const uint8_t GO  = extend7To8((uint8_t)((GO1 << 6) | GO2));
878         const uint8_t BO  = extend6To8((uint8_t)((BO1 << 5) | (BO2 << 3) | BO3));
879         const uint8_t RH  = extend6To8((uint8_t)((RH1 << 1) | RH2));
880         const uint8_t GH  = extend7To8((uint8_t)getBits(src, 25, 31));
881         const uint8_t BH  = extend6To8((uint8_t)getBits(src, 19, 24));
882         const uint8_t RV  = extend6To8((uint8_t)getBits(src, 13, 18));
883         const uint8_t GV  = extend7To8((uint8_t)getBits(src, 6, 12));
884         const uint8_t BV  = extend6To8((uint8_t)getBits(src, 0, 5));
885 
886         // Write final pixels for planar mode.
887         for (int y = 0; y < 4; y++)
888         {
889             for (int x = 0; x < 4; x++)
890             {
891                 const int dstOffset      = (y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8;
892                 const int unclampedR     = (x * ((int)RH - (int)RO) + y * ((int)RV - (int)RO) + 4 * (int)RO + 2) >> 2;
893                 const int unclampedG     = (x * ((int)GH - (int)GO) + y * ((int)GV - (int)GO) + 4 * (int)GO + 2) >> 2;
894                 const int unclampedB     = (x * ((int)BH - (int)BO) + y * ((int)BV - (int)BO) + 4 * (int)BO + 2) >> 2;
895                 const int alphaDstOffset = (y * ETC2_BLOCK_WIDTH + x) *
896                                            ETC2_UNCOMPRESSED_PIXEL_SIZE_A8; // Only needed for PUNCHTHROUGH version.
897 
898                 dst[dstOffset + 0] = (uint8_t)deClamp32(unclampedR, 0, 255);
899                 dst[dstOffset + 1] = (uint8_t)deClamp32(unclampedG, 0, 255);
900                 dst[dstOffset + 2] = (uint8_t)deClamp32(unclampedB, 0, 255);
901 
902                 if (alphaMode)
903                     alphaDst[alphaDstOffset] = 255;
904             }
905         }
906     }
907 }
908 
decompressEAC8Block(uint8_t dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8],uint64_t src)909 void decompressEAC8Block(uint8_t dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8], uint64_t src)
910 {
911     static const int modifierTable[16][8] = {
912         {-3, -6, -9, -15, 2, 5, 8, 14}, {-3, -7, -10, -13, 2, 6, 9, 12}, {-2, -5, -8, -13, 1, 4, 7, 12},
913         {-2, -4, -6, -13, 1, 3, 5, 12}, {-3, -6, -8, -12, 2, 5, 7, 11},  {-3, -7, -9, -11, 2, 6, 8, 10},
914         {-4, -7, -8, -11, 3, 6, 7, 10}, {-3, -5, -8, -11, 2, 4, 7, 10},  {-2, -6, -8, -10, 1, 5, 7, 9},
915         {-2, -5, -8, -10, 1, 4, 7, 9},  {-2, -4, -8, -10, 1, 3, 7, 9},   {-2, -5, -7, -10, 1, 4, 6, 9},
916         {-3, -4, -7, -10, 2, 3, 6, 9},  {-1, -2, -3, -10, 0, 1, 2, 9},   {-4, -6, -8, -9, 3, 5, 7, 8},
917         {-3, -5, -7, -9, 2, 4, 6, 8}};
918 
919     const uint8_t baseCodeword = (uint8_t)getBits(src, 56, 63);
920     const uint8_t multiplier   = (uint8_t)getBits(src, 52, 55);
921     const uint32_t tableNdx    = getBits(src, 48, 51);
922 
923     for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT * ETC2_BLOCK_WIDTH; pixelNdx++)
924     {
925         const int x           = pixelNdx / ETC2_BLOCK_HEIGHT;
926         const int y           = pixelNdx % ETC2_BLOCK_HEIGHT;
927         const int dstOffset   = (y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_A8;
928         const int pixelBitNdx = 45 - 3 * pixelNdx;
929         const uint32_t modifierNdx =
930             (getBit(src, pixelBitNdx + 2) << 2) | (getBit(src, pixelBitNdx + 1) << 1) | getBit(src, pixelBitNdx);
931         const int modifier = modifierTable[tableNdx][modifierNdx];
932 
933         dst[dstOffset] = (uint8_t)deClamp32((int)baseCodeword + (int)multiplier * modifier, 0, 255);
934     }
935 }
936 
decompressEAC11Block(uint8_t dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11],uint64_t src,bool signedMode)937 void decompressEAC11Block(uint8_t dst[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11], uint64_t src, bool signedMode)
938 {
939     static const int modifierTable[16][8] = {
940         {-3, -6, -9, -15, 2, 5, 8, 14}, {-3, -7, -10, -13, 2, 6, 9, 12}, {-2, -5, -8, -13, 1, 4, 7, 12},
941         {-2, -4, -6, -13, 1, 3, 5, 12}, {-3, -6, -8, -12, 2, 5, 7, 11},  {-3, -7, -9, -11, 2, 6, 8, 10},
942         {-4, -7, -8, -11, 3, 6, 7, 10}, {-3, -5, -8, -11, 2, 4, 7, 10},  {-2, -6, -8, -10, 1, 5, 7, 9},
943         {-2, -5, -8, -10, 1, 4, 7, 9},  {-2, -4, -8, -10, 1, 3, 7, 9},   {-2, -5, -7, -10, 1, 4, 6, 9},
944         {-3, -4, -7, -10, 2, 3, 6, 9},  {-1, -2, -3, -10, 0, 1, 2, 9},   {-4, -6, -8, -9, 3, 5, 7, 8},
945         {-3, -5, -7, -9, 2, 4, 6, 8}};
946 
947     const int32_t multiplier = (int32_t)getBits(src, 52, 55);
948     const int32_t tableNdx   = (int32_t)getBits(src, 48, 51);
949     int32_t baseCodeword     = (int32_t)getBits(src, 56, 63);
950 
951     if (signedMode)
952     {
953         if (baseCodeword > 127)
954             baseCodeword -= 256;
955         if (baseCodeword == -128)
956             baseCodeword = -127;
957     }
958 
959     for (int pixelNdx = 0; pixelNdx < ETC2_BLOCK_HEIGHT * ETC2_BLOCK_WIDTH; pixelNdx++)
960     {
961         const int x           = pixelNdx / ETC2_BLOCK_HEIGHT;
962         const int y           = pixelNdx % ETC2_BLOCK_HEIGHT;
963         const int dstOffset   = (y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_R11;
964         const int pixelBitNdx = 45 - 3 * pixelNdx;
965         const uint32_t modifierNdx =
966             (getBit(src, pixelBitNdx + 2) << 2) | (getBit(src, pixelBitNdx + 1) << 1) | getBit(src, pixelBitNdx);
967         const int modifier = modifierTable[tableNdx][modifierNdx];
968 
969         if (signedMode)
970         {
971             int16_t value;
972 
973             if (multiplier != 0)
974                 value = (int16_t)deClamp32(baseCodeword * 8 + multiplier * modifier * 8, -1023, 1023);
975             else
976                 value = (int16_t)deClamp32(baseCodeword * 8 + modifier, -1023, 1023);
977 
978             *((int16_t *)(dst + dstOffset)) = value;
979         }
980         else
981         {
982             uint16_t value;
983 
984             if (multiplier != 0)
985                 value = (uint16_t)deClamp32(baseCodeword * 8 + 4 + multiplier * modifier * 8, 0, 2047);
986             else
987                 value = (uint16_t)deClamp32(baseCodeword * 8 + 4 + modifier, 0, 2047);
988 
989             *((uint16_t *)(dst + dstOffset)) = value;
990         }
991     }
992 }
993 
994 } // namespace EtcDecompressInternal
995 
decompressETC1(const PixelBufferAccess & dst,const uint8_t * src)996 void decompressETC1(const PixelBufferAccess &dst, const uint8_t *src)
997 {
998     using namespace EtcDecompressInternal;
999 
1000     uint8_t *const dstPtr          = (uint8_t *)dst.getDataPtr();
1001     const uint64_t compressedBlock = get64BitBlock(src, 0);
1002 
1003     decompressETC1Block(dstPtr, compressedBlock);
1004 }
1005 
decompressETC2(const PixelBufferAccess & dst,const uint8_t * src)1006 void decompressETC2(const PixelBufferAccess &dst, const uint8_t *src)
1007 {
1008     using namespace EtcDecompressInternal;
1009 
1010     uint8_t *const dstPtr          = (uint8_t *)dst.getDataPtr();
1011     const uint64_t compressedBlock = get64BitBlock(src, 0);
1012 
1013     decompressETC2Block(dstPtr, compressedBlock, NULL, false);
1014 }
1015 
decompressETC2_EAC_RGBA8(const PixelBufferAccess & dst,const uint8_t * src)1016 void decompressETC2_EAC_RGBA8(const PixelBufferAccess &dst, const uint8_t *src)
1017 {
1018     using namespace EtcDecompressInternal;
1019 
1020     uint8_t *const dstPtr  = (uint8_t *)dst.getDataPtr();
1021     const int dstRowPitch  = dst.getRowPitch();
1022     const int dstPixelSize = ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8;
1023 
1024     const uint64_t compressedBlockAlpha = get128BitBlockStart(src, 0);
1025     const uint64_t compressedBlockRGB   = get128BitBlockEnd(src, 0);
1026     uint8_t uncompressedBlockAlpha[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8];
1027     uint8_t uncompressedBlockRGB[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8];
1028 
1029     // Decompress.
1030     decompressETC2Block(uncompressedBlockRGB, compressedBlockRGB, NULL, false);
1031     decompressEAC8Block(uncompressedBlockAlpha, compressedBlockAlpha);
1032 
1033     // Write to dst.
1034     for (int y = 0; y < (int)ETC2_BLOCK_HEIGHT; y++)
1035     {
1036         for (int x = 0; x < (int)ETC2_BLOCK_WIDTH; x++)
1037         {
1038             const uint8_t *const srcPixelRGB =
1039                 &uncompressedBlockRGB[(y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8];
1040             const uint8_t *const srcPixelAlpha =
1041                 &uncompressedBlockAlpha[(y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_A8];
1042             uint8_t *const dstPixel = dstPtr + y * dstRowPitch + x * dstPixelSize;
1043 
1044             DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8 == 4);
1045             dstPixel[0] = srcPixelRGB[0];
1046             dstPixel[1] = srcPixelRGB[1];
1047             dstPixel[2] = srcPixelRGB[2];
1048             dstPixel[3] = srcPixelAlpha[0];
1049         }
1050     }
1051 }
1052 
decompressETC2_RGB8_PUNCHTHROUGH_ALPHA1(const PixelBufferAccess & dst,const uint8_t * src)1053 void decompressETC2_RGB8_PUNCHTHROUGH_ALPHA1(const PixelBufferAccess &dst, const uint8_t *src)
1054 {
1055     using namespace EtcDecompressInternal;
1056 
1057     uint8_t *const dstPtr  = (uint8_t *)dst.getDataPtr();
1058     const int dstRowPitch  = dst.getRowPitch();
1059     const int dstPixelSize = ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8;
1060 
1061     const uint64_t compressedBlockRGBA = get64BitBlock(src, 0);
1062     uint8_t uncompressedBlockRGB[ETC2_UNCOMPRESSED_BLOCK_SIZE_RGB8];
1063     uint8_t uncompressedBlockAlpha[ETC2_UNCOMPRESSED_BLOCK_SIZE_A8];
1064 
1065     // Decompress.
1066     decompressETC2Block(uncompressedBlockRGB, compressedBlockRGBA, uncompressedBlockAlpha, true);
1067 
1068     // Write to dst.
1069     for (int y = 0; y < (int)ETC2_BLOCK_HEIGHT; y++)
1070     {
1071         for (int x = 0; x < (int)ETC2_BLOCK_WIDTH; x++)
1072         {
1073             const uint8_t *const srcPixel =
1074                 &uncompressedBlockRGB[(y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_RGB8];
1075             const uint8_t *const srcPixelAlpha =
1076                 &uncompressedBlockAlpha[(y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_A8];
1077             uint8_t *const dstPixel = dstPtr + y * dstRowPitch + x * dstPixelSize;
1078 
1079             DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_RGBA8 == 4);
1080             dstPixel[0] = srcPixel[0];
1081             dstPixel[1] = srcPixel[1];
1082             dstPixel[2] = srcPixel[2];
1083             dstPixel[3] = srcPixelAlpha[0];
1084         }
1085     }
1086 }
1087 
decompressEAC_R11(const PixelBufferAccess & dst,const uint8_t * src,bool signedMode)1088 void decompressEAC_R11(const PixelBufferAccess &dst, const uint8_t *src, bool signedMode)
1089 {
1090     using namespace EtcDecompressInternal;
1091 
1092     uint8_t *const dstPtr  = (uint8_t *)dst.getDataPtr();
1093     const int dstRowPitch  = dst.getRowPitch();
1094     const int dstPixelSize = ETC2_UNCOMPRESSED_PIXEL_SIZE_R11;
1095 
1096     const uint64_t compressedBlock = get64BitBlock(src, 0);
1097     uint8_t uncompressedBlock[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11];
1098 
1099     // Decompress.
1100     decompressEAC11Block(uncompressedBlock, compressedBlock, signedMode);
1101 
1102     // Write to dst.
1103     for (int y = 0; y < (int)ETC2_BLOCK_HEIGHT; y++)
1104     {
1105         for (int x = 0; x < (int)ETC2_BLOCK_WIDTH; x++)
1106         {
1107             DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_R11 == 2);
1108 
1109             if (signedMode)
1110             {
1111                 const int16_t *const srcPixel =
1112                     (int16_t *)&uncompressedBlock[(y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
1113                 int16_t *const dstPixel = (int16_t *)(dstPtr + y * dstRowPitch + x * dstPixelSize);
1114 
1115                 dstPixel[0] = extend11To16WithSign(srcPixel[0]);
1116             }
1117             else
1118             {
1119                 const uint16_t *const srcPixel =
1120                     (uint16_t *)&uncompressedBlock[(y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
1121                 uint16_t *const dstPixel = (uint16_t *)(dstPtr + y * dstRowPitch + x * dstPixelSize);
1122 
1123                 dstPixel[0] = extend11To16(srcPixel[0]);
1124             }
1125         }
1126     }
1127 }
1128 
decompressEAC_RG11(const PixelBufferAccess & dst,const uint8_t * src,bool signedMode)1129 void decompressEAC_RG11(const PixelBufferAccess &dst, const uint8_t *src, bool signedMode)
1130 {
1131     using namespace EtcDecompressInternal;
1132 
1133     uint8_t *const dstPtr  = (uint8_t *)dst.getDataPtr();
1134     const int dstRowPitch  = dst.getRowPitch();
1135     const int dstPixelSize = ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11;
1136 
1137     const uint64_t compressedBlockR = get128BitBlockStart(src, 0);
1138     const uint64_t compressedBlockG = get128BitBlockEnd(src, 0);
1139     uint8_t uncompressedBlockR[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11];
1140     uint8_t uncompressedBlockG[ETC2_UNCOMPRESSED_BLOCK_SIZE_R11];
1141 
1142     // Decompress.
1143     decompressEAC11Block(uncompressedBlockR, compressedBlockR, signedMode);
1144     decompressEAC11Block(uncompressedBlockG, compressedBlockG, signedMode);
1145 
1146     // Write to dst.
1147     for (int y = 0; y < (int)ETC2_BLOCK_HEIGHT; y++)
1148     {
1149         for (int x = 0; x < (int)ETC2_BLOCK_WIDTH; x++)
1150         {
1151             DE_STATIC_ASSERT(ETC2_UNCOMPRESSED_PIXEL_SIZE_RG11 == 4);
1152 
1153             if (signedMode)
1154             {
1155                 const int16_t *const srcPixelR =
1156                     (int16_t *)&uncompressedBlockR[(y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
1157                 const int16_t *const srcPixelG =
1158                     (int16_t *)&uncompressedBlockG[(y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
1159                 int16_t *const dstPixel = (int16_t *)(dstPtr + y * dstRowPitch + x * dstPixelSize);
1160 
1161                 dstPixel[0] = extend11To16WithSign(srcPixelR[0]);
1162                 dstPixel[1] = extend11To16WithSign(srcPixelG[0]);
1163             }
1164             else
1165             {
1166                 const uint16_t *const srcPixelR =
1167                     (uint16_t *)&uncompressedBlockR[(y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
1168                 const uint16_t *const srcPixelG =
1169                     (uint16_t *)&uncompressedBlockG[(y * ETC2_BLOCK_WIDTH + x) * ETC2_UNCOMPRESSED_PIXEL_SIZE_R11];
1170                 uint16_t *const dstPixel = (uint16_t *)(dstPtr + y * dstRowPitch + x * dstPixelSize);
1171 
1172                 dstPixel[0] = extend11To16(srcPixelR[0]);
1173                 dstPixel[1] = extend11To16(srcPixelG[0]);
1174             }
1175         }
1176     }
1177 }
1178 
1179 namespace BcDecompressInternal
1180 {
1181 
1182 enum
1183 {
1184     BC_BLOCK_WIDTH  = 4,
1185     BC_BLOCK_HEIGHT = 4
1186 };
1187 
1188 static const uint8_t epBits[14] = {10, 7, 11, 11, 11, 9, 8, 8, 8, 6, 10, 11, 12, 16};
1189 
1190 static const uint8_t partitions2[64][16] = {
1191     {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1}, {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1},
1192     {0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1}, {0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1},
1193     {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1}, {0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1},
1194     {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1},
1195     {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1}, {0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
1196     {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1},
1197     {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1},
1198     {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1},
1199     {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1}, {0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
1200     {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0}, {0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0},
1201     {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0},
1202     {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, {0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1},
1203     {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0},
1204     {0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0}, {0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0},
1205     {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0}, {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0},
1206     {0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0}, {0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0},
1207     {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}, {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1},
1208     {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0}, {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0},
1209     {0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0}, {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0},
1210     {0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1}, {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1},
1211     {0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0}, {0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0},
1212     {0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, {0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0},
1213     {0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0}, {0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1},
1214     {0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1}, {0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0},
1215     {0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, {0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0},
1216     {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0}, {0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0},
1217     {0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1}, {0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1},
1218     {0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0}, {0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0},
1219     {0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1}, {0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1},
1220     {0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1}, {0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1},
1221     {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1}, {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0},
1222     {0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0}, {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1}};
1223 
1224 static const uint8_t partitions3[64][16] = {
1225     {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2}, {0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1},
1226     {0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1}, {0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1},
1227     {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2}, {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2},
1228     {0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1}, {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1},
1229     {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}, {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2},
1230     {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2}, {0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2},
1231     {0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2}, {0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2},
1232     {0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2}, {0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0},
1233     {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2}, {0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0},
1234     {0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2}, {0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1},
1235     {0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2}, {0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1},
1236     {0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2}, {0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0},
1237     {0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0}, {0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2},
1238     {0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0}, {0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1},
1239     {0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2}, {0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2},
1240     {0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1}, {0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1},
1241     {0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2}, {0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1},
1242     {0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2}, {0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0},
1243     {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0}, {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0},
1244     {0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0}, {0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1},
1245     {0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1}, {0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2},
1246     {0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1}, {0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2},
1247     {0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1}, {0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1},
1248     {0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1}, {0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1},
1249     {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2}, {0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1},
1250     {0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2}, {0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2},
1251     {0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2}, {0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2},
1252     {0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2}, {0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2},
1253     {0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2}, {0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2},
1254     {0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2},
1255     {0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1}, {0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2},
1256     {0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, {0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0}};
1257 
1258 static const uint8_t anchorIndicesSecondSubset2[64] = {15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
1259                                                        15, 2,  8,  2,  2,  8,  8,  15, 2,  8,  2,  2,  8,  8,  2,  2,
1260                                                        15, 15, 6,  8,  2,  8,  15, 15, 2,  8,  2,  2,  2,  15, 15, 6,
1261                                                        6,  2,  6,  8,  15, 15, 2,  2,  15, 15, 15, 15, 15, 2,  2,  15};
1262 
1263 static const uint8_t anchorIndicesSecondSubset3[64] = {
1264     3, 3,  15, 15, 8, 3,  15, 15, 8,  8, 6,  6, 6,  5,  3,  3,  3, 3,  8, 15, 3, 3, 6, 10, 5, 8,  8, 6,  8,  5,  15, 15,
1265     8, 15, 3,  5,  6, 10, 8,  15, 15, 3, 15, 5, 15, 15, 15, 15, 3, 15, 5, 5,  5, 8, 5, 10, 5, 10, 8, 13, 15, 12, 3,  3};
1266 
1267 static const uint8_t anchorIndicesThirdSubset[64] = {15, 8, 8,  3,  15, 15, 3,  8,  15, 15, 15, 15, 15, 15, 15, 8,
1268                                                      15, 8, 15, 3,  15, 8,  15, 8,  3,  15, 6,  10, 15, 15, 10, 8,
1269                                                      15, 3, 15, 10, 10, 8,  9,  10, 6,  15, 8,  15, 3,  6,  6,  8,
1270                                                      15, 3, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 3,  15, 15, 8};
1271 
1272 static const uint16_t weights2[4]  = {0, 21, 43, 64};
1273 static const uint16_t weights3[8]  = {0, 9, 18, 27, 37, 46, 55, 64};
1274 static const uint16_t weights4[16] = {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64};
1275 
uint8ToFloat(uint8_t src)1276 inline float uint8ToFloat(uint8_t src)
1277 {
1278     return ((float)src / 255.0f);
1279 }
1280 
int8ToFloat(int8_t src)1281 inline float int8ToFloat(int8_t src)
1282 {
1283     return ((float)src / 128.0f);
1284 }
1285 
bgr16torgba32(uint16_t src)1286 inline uint32_t bgr16torgba32(uint16_t src)
1287 {
1288     const uint32_t src32 = src;
1289     const uint8_t b5     = (src32 & 0x1f);
1290     const uint8_t g6     = (src32 >> 5) & 0x3f;
1291     const uint8_t r5     = (src32 >> 11) & 0x1f;
1292     const uint32_t a8    = 0xff;
1293     const uint32_t b8    = extend5To8(b5);
1294     const uint32_t g8    = extend6To8(g6);
1295     const uint32_t r8    = extend5To8(r5);
1296 
1297     return (r8 | (g8 << 8) | (b8 << 16) | (a8 << 24));
1298 }
1299 
1300 // Interpolates color = 1/3 * c0 + 2/3 * c1
interpolateColor(uint32_t c0,uint32_t c1)1301 inline uint32_t interpolateColor(uint32_t c0, uint32_t c1)
1302 {
1303     const uint32_t r0 = c0 & 0xff;
1304     const uint32_t g0 = (c0 >> 8) & 0xff;
1305     const uint32_t b0 = (c0 >> 16) & 0xff;
1306     const uint32_t a0 = (c0 >> 24) & 0xff;
1307 
1308     const uint32_t r1 = c1 & 0xff;
1309     const uint32_t g1 = (c1 >> 8) & 0xff;
1310     const uint32_t b1 = (c1 >> 16) & 0xff;
1311     const uint32_t a1 = (c1 >> 24) & 0xff;
1312 
1313     const uint32_t r = (r0 + (r1 << 1)) / 3;
1314     const uint32_t g = (g0 + (g1 << 1)) / 3;
1315     const uint32_t b = (b0 + (b1 << 1)) / 3;
1316     const uint32_t a = (a0 + (a1 << 1)) / 3;
1317 
1318     return (r | (g << 8) | (b << 16) | (a << 24));
1319 }
1320 
1321 // Average of two colors
averageColor(uint32_t c0,uint32_t c1)1322 inline uint32_t averageColor(uint32_t c0, uint32_t c1)
1323 {
1324     const uint32_t r0 = c0 & 0xff;
1325     const uint32_t g0 = (c0 >> 8) & 0xff;
1326     const uint32_t b0 = (c0 >> 16) & 0xff;
1327     const uint32_t a0 = (c0 >> 24) & 0xff;
1328 
1329     const uint32_t r1 = c1 & 0xff;
1330     const uint32_t g1 = (c1 >> 8) & 0xff;
1331     const uint32_t b1 = (c1 >> 16) & 0xff;
1332     const uint32_t a1 = (c1 >> 24) & 0xff;
1333 
1334     const uint32_t r = (r0 + r1) >> 1;
1335     const uint32_t g = (g0 + g1) >> 1;
1336     const uint32_t b = (b0 + b1) >> 1;
1337     const uint32_t a = (a0 + a1) >> 1;
1338 
1339     return (r | (g << 8) | (b << 16) | (a << 24));
1340 }
1341 
extractModeBc6(uint8_t src)1342 inline int8_t extractModeBc6(uint8_t src)
1343 {
1344     // Catch illegal modes
1345     switch (src & 0x1f)
1346     {
1347     case 0x13:
1348     case 0x17:
1349     case 0x1b:
1350     case 0x1f:
1351         return -1;
1352     }
1353 
1354     switch (src & 0x3)
1355     {
1356     case 0:
1357         return 0;
1358     case 1:
1359         return 1;
1360     case 2:
1361         return (int8_t)(2 + ((src >> 2) & 0x7));
1362     case 3:
1363         return (int8_t)(10 + ((src >> 2) & 0x7));
1364     }
1365 
1366     return -1;
1367 }
1368 
extractModeBc7(uint8_t src)1369 inline int8_t extractModeBc7(uint8_t src)
1370 {
1371     for (int8_t i = 0; i < 8; i++)
1372         if (src & (1 << i))
1373             return i;
1374 
1375     return -1;
1376 }
1377 
get64BitBlockLE(const uint8_t * src,int blockNdx)1378 inline uint64_t get64BitBlockLE(const uint8_t *src, int blockNdx)
1379 {
1380     // Same as get64BitBlock, but little-endian.
1381     uint64_t block = 0;
1382 
1383     for (int i = 0; i < 8; i++)
1384         block |= (uint64_t)(src[blockNdx * 8 + i]) << (8ull * i);
1385 
1386     return block;
1387 }
1388 
getBits128(uint64_t low,uint64_t high,uint32_t first,uint32_t last)1389 inline uint32_t getBits128(uint64_t low, uint64_t high, uint32_t first, uint32_t last)
1390 {
1391     const uint64_t d[2] = {low, high};
1392     const bool reverse  = first > last;
1393     uint32_t ret        = 0;
1394 
1395     if (reverse)
1396     {
1397         const uint32_t tmp = first;
1398         first              = last;
1399         last               = tmp;
1400     }
1401 
1402     const int elementFirst = first / 64;
1403     const int elementLast  = last / 64;
1404 
1405     if (elementFirst == elementLast)
1406     {
1407         // Bits contained in one of the 64bit elements
1408         const uint32_t shift = first % 64;
1409         const uint32_t len   = last - first + 1;
1410         const uint32_t mask  = (1 << len) - 1;
1411         ret                  = (uint32_t)((d[elementFirst] >> shift) & mask);
1412     }
1413     else
1414     {
1415         // Bits contained in both of the 64bit elements
1416         DE_ASSERT(last > 63);
1417         DE_ASSERT(first < 64);
1418         const uint32_t len0  = 64 - first;
1419         const uint32_t mask0 = (1 << len0) - 1;
1420         const uint32_t data0 = (uint32_t)(low >> first) & mask0;
1421         const uint32_t len1  = last - 63;
1422         const uint32_t mask1 = (1 << len1) - 1;
1423         const uint32_t data1 = (uint32_t)(high & mask1);
1424         ret                  = (uint32_t)((data1 << len0) | data0);
1425     }
1426 
1427     if (reverse)
1428     {
1429         const uint32_t len  = last - first + 1;
1430         const uint32_t orig = ret;
1431         ret                 = 0;
1432 
1433         for (uint32_t i = 0; i < len; i++)
1434         {
1435             ret |= ((orig >> (len - 1 - i)) & 1) << i;
1436         }
1437     }
1438 
1439     return ret;
1440 }
1441 
signExtend(int32_t value,int32_t srcBits,int32_t dstBits)1442 inline int32_t signExtend(int32_t value, int32_t srcBits, int32_t dstBits)
1443 {
1444     uint32_t sign = value & (1 << (srcBits - 1));
1445 
1446     if (!sign)
1447         return value;
1448 
1449     int32_t dstMask      = (int32_t)(((uint64_t)1 << dstBits) - 1);
1450     int32_t extendedBits = 0xffffffff << srcBits;
1451     return (value | extendedBits) & dstMask;
1452 }
1453 
unquantize(int32_t x,int mode,bool hasSign)1454 inline int32_t unquantize(int32_t x, int mode, bool hasSign)
1455 {
1456     if (hasSign)
1457     {
1458         bool s = false;
1459 
1460         if (epBits[mode] >= 16)
1461             return x;
1462 
1463         if (x < 0)
1464         {
1465             s = true;
1466             x = -x;
1467         }
1468 
1469         if (x == 0)
1470             x = 0;
1471         else if (x >= (((int32_t)1 << (epBits[mode] - 1)) - 1))
1472             x = 0x7fff;
1473         else
1474             x = (((int32_t)x << 15) + 0x4000) >> (epBits[mode] - 1);
1475 
1476         if (s)
1477             x = -x;
1478 
1479         return x;
1480     }
1481     else
1482     {
1483         if (epBits[mode] >= 15)
1484             return x;
1485         else if (x == 0)
1486             return 0;
1487         else if (x == (((int32_t)1 << epBits[mode]) - 1))
1488             return 0xffff;
1489         else
1490             return ((((int32_t)x << 15) + 0x4000) >> (epBits[mode] - 1));
1491     }
1492 }
1493 
interpolate(int32_t a,int32_t b,uint32_t index,uint32_t indexPrecision)1494 inline int32_t interpolate(int32_t a, int32_t b, uint32_t index, uint32_t indexPrecision)
1495 {
1496     const uint16_t *weights[] = {weights2, weights3, weights4};
1497     const uint16_t *weight    = weights[indexPrecision - 2];
1498     DE_ASSERT(indexPrecision >= 2 && indexPrecision <= 4);
1499 
1500     return (((64 - weight[index]) * a + weight[index] * b + 32) >> 6);
1501 }
1502 
finishUnquantize(int32_t x,bool hasSign)1503 inline int16_t finishUnquantize(int32_t x, bool hasSign)
1504 {
1505     if (hasSign)
1506     {
1507         if (x < 0)
1508             x = -(((-x) * 31) >> 5);
1509         else
1510             x = (x * 31) >> 5;
1511 
1512         if (x < 0)
1513             x = (-x) | 0x8000;
1514     }
1515     else
1516     {
1517         x = (x * 31) / 64;
1518     }
1519 
1520     return (int16_t)x;
1521 }
1522 
1523 } // namespace BcDecompressInternal
1524 
decompressBc1(const PixelBufferAccess & dst,const uint8_t * src,bool hasAlpha)1525 void decompressBc1(const PixelBufferAccess &dst, const uint8_t *src, bool hasAlpha)
1526 {
1527     using namespace BcDecompressInternal;
1528 
1529     uint8_t *const dstPtr         = (uint8_t *)dst.getDataPtr();
1530     const uint32_t dstRowPitch    = dst.getRowPitch();
1531     const uint32_t dstPixelSize   = 4;
1532     const uint16_t color0_16      = ((uint16_t *)src)[0];
1533     const uint16_t color1_16      = ((uint16_t *)src)[1];
1534     const uint32_t color0         = bgr16torgba32(color0_16);
1535     const uint32_t color1         = bgr16torgba32(color1_16);
1536     const uint8_t *const indices8 = &src[4];
1537 
1538     const bool alphaMode = color1_16 > color0_16;
1539 
1540     const int32_t indices[16] = {
1541         (indices8[0] >> 0) & 0x3, (indices8[0] >> 2) & 0x3, (indices8[0] >> 4) & 0x3, (indices8[0] >> 6) & 0x3,
1542         (indices8[1] >> 0) & 0x3, (indices8[1] >> 2) & 0x3, (indices8[1] >> 4) & 0x3, (indices8[1] >> 6) & 0x3,
1543         (indices8[2] >> 0) & 0x3, (indices8[2] >> 2) & 0x3, (indices8[2] >> 4) & 0x3, (indices8[2] >> 6) & 0x3,
1544         (indices8[3] >> 0) & 0x3, (indices8[3] >> 2) & 0x3, (indices8[3] >> 4) & 0x3, (indices8[3] >> 6) & 0x3};
1545 
1546     const uint32_t colors[4] = {color0, color1,
1547                                 alphaMode ? averageColor(color0, color1) : interpolateColor(color1, color0),
1548                                 alphaMode ? (hasAlpha ? 0 : 0xff000000) : interpolateColor(color0, color1)};
1549 
1550     for (uint32_t y = 0; y < (uint32_t)BC_BLOCK_HEIGHT; y++)
1551     {
1552         for (uint32_t x = 0; x < (uint32_t)BC_BLOCK_WIDTH; x++)
1553         {
1554             uint32_t *const dstPixel = (uint32_t *)(dstPtr + y * dstRowPitch + x * dstPixelSize);
1555             *dstPixel                = colors[indices[y * BC_BLOCK_WIDTH + x]];
1556         }
1557     }
1558 }
1559 
decompressBc2(const PixelBufferAccess & dst,const uint8_t * src)1560 void decompressBc2(const PixelBufferAccess &dst, const uint8_t *src)
1561 {
1562     using namespace BcDecompressInternal;
1563 
1564     uint8_t *const dstPtr         = (uint8_t *)dst.getDataPtr();
1565     const uint32_t dstRowPitch    = dst.getRowPitch();
1566     const uint32_t dstPixelSize   = 4;
1567     const uint16_t color0_16      = ((uint16_t *)src)[4];
1568     const uint16_t color1_16      = ((uint16_t *)src)[5];
1569     const uint32_t color0         = bgr16torgba32(color0_16);
1570     const uint32_t color1         = bgr16torgba32(color1_16);
1571     const uint8_t *const indices8 = &src[12];
1572     const uint8_t *const alphas8  = src;
1573 
1574     const int32_t indices[16] = {
1575         (indices8[0] >> 0) & 0x3, (indices8[0] >> 2) & 0x3, (indices8[0] >> 4) & 0x3, (indices8[0] >> 6) & 0x3,
1576         (indices8[1] >> 0) & 0x3, (indices8[1] >> 2) & 0x3, (indices8[1] >> 4) & 0x3, (indices8[1] >> 6) & 0x3,
1577         (indices8[2] >> 0) & 0x3, (indices8[2] >> 2) & 0x3, (indices8[2] >> 4) & 0x3, (indices8[2] >> 6) & 0x3,
1578         (indices8[3] >> 0) & 0x3, (indices8[3] >> 2) & 0x3, (indices8[3] >> 4) & 0x3, (indices8[3] >> 6) & 0x3};
1579 
1580     const int32_t alphas[16] = {
1581         extend4To8(((alphas8[0] >> 0) & 0xf)) << 24, extend4To8(((alphas8[0] >> 4) & 0xf)) << 24,
1582         extend4To8(((alphas8[1] >> 0) & 0xf)) << 24, extend4To8(((alphas8[1] >> 4) & 0xf)) << 24,
1583         extend4To8(((alphas8[2] >> 0) & 0xf)) << 24, extend4To8(((alphas8[2] >> 4) & 0xf)) << 24,
1584         extend4To8(((alphas8[3] >> 0) & 0xf)) << 24, extend4To8(((alphas8[3] >> 4) & 0xf)) << 24,
1585         extend4To8(((alphas8[4] >> 0) & 0xf)) << 24, extend4To8(((alphas8[4] >> 4) & 0xf)) << 24,
1586         extend4To8(((alphas8[5] >> 0) & 0xf)) << 24, extend4To8(((alphas8[5] >> 4) & 0xf)) << 24,
1587         extend4To8(((alphas8[6] >> 0) & 0xf)) << 24, extend4To8(((alphas8[6] >> 4) & 0xf)) << 24,
1588         extend4To8(((alphas8[7] >> 0) & 0xf)) << 24, extend4To8(((alphas8[7] >> 4) & 0xf)) << 24};
1589 
1590     const uint32_t colors[4] = {color0, color1, interpolateColor(color1, color0), interpolateColor(color0, color1)};
1591 
1592     for (uint32_t y = 0; y < (uint32_t)BC_BLOCK_HEIGHT; y++)
1593     {
1594         for (uint32_t x = 0; x < (uint32_t)BC_BLOCK_WIDTH; x++)
1595         {
1596             uint32_t *const dstPixel = (uint32_t *)(dstPtr + y * dstRowPitch + x * dstPixelSize);
1597             *dstPixel = (colors[indices[y * BC_BLOCK_WIDTH + x]] & 0x00ffffff) | alphas[y * BC_BLOCK_WIDTH + x];
1598         }
1599     }
1600 }
1601 
decompressBc3(const PixelBufferAccess & dst,const uint8_t * src)1602 void decompressBc3(const PixelBufferAccess &dst, const uint8_t *src)
1603 {
1604     using namespace BcDecompressInternal;
1605 
1606     uint8_t *const dstPtr         = (uint8_t *)dst.getDataPtr();
1607     const uint32_t dstRowPitch    = dst.getRowPitch();
1608     const uint32_t dstPixelSize   = 4;
1609     const uint8_t alpha0          = src[0];
1610     const uint8_t alpha1          = src[1];
1611     const uint16_t color0_16      = ((uint16_t *)src)[4];
1612     const uint16_t color1_16      = ((uint16_t *)src)[5];
1613     const uint32_t color0         = bgr16torgba32(color0_16);
1614     const uint32_t color1         = bgr16torgba32(color1_16);
1615     const uint8_t *const indices8 = &src[12];
1616     const uint64_t alphaBits      = get64BitBlockLE(src, 0) >> 16;
1617     uint32_t alphas[8];
1618 
1619     const int32_t indices[16] = {
1620         (indices8[0] >> 0) & 0x3, (indices8[0] >> 2) & 0x3, (indices8[0] >> 4) & 0x3, (indices8[0] >> 6) & 0x3,
1621         (indices8[1] >> 0) & 0x3, (indices8[1] >> 2) & 0x3, (indices8[1] >> 4) & 0x3, (indices8[1] >> 6) & 0x3,
1622         (indices8[2] >> 0) & 0x3, (indices8[2] >> 2) & 0x3, (indices8[2] >> 4) & 0x3, (indices8[2] >> 6) & 0x3,
1623         (indices8[3] >> 0) & 0x3, (indices8[3] >> 2) & 0x3, (indices8[3] >> 4) & 0x3, (indices8[3] >> 6) & 0x3};
1624 
1625     const int32_t alphaIndices[16] = {
1626         (int32_t)((alphaBits >> 0) & 0x7),  (int32_t)((alphaBits >> 3) & 0x7),  (int32_t)((alphaBits >> 6) & 0x7),
1627         (int32_t)((alphaBits >> 9) & 0x7),  (int32_t)((alphaBits >> 12) & 0x7), (int32_t)((alphaBits >> 15) & 0x7),
1628         (int32_t)((alphaBits >> 18) & 0x7), (int32_t)((alphaBits >> 21) & 0x7), (int32_t)((alphaBits >> 24) & 0x7),
1629         (int32_t)((alphaBits >> 27) & 0x7), (int32_t)((alphaBits >> 30) & 0x7), (int32_t)((alphaBits >> 33) & 0x7),
1630         (int32_t)((alphaBits >> 36) & 0x7), (int32_t)((alphaBits >> 39) & 0x7), (int32_t)((alphaBits >> 42) & 0x7),
1631         (int32_t)((alphaBits >> 45) & 0x7)};
1632 
1633     const uint32_t colors[4] = {color0, color1, interpolateColor(color1, color0), interpolateColor(color0, color1)};
1634 
1635     alphas[0] = alpha0 << 24;
1636     alphas[1] = alpha1 << 24;
1637 
1638     if (alpha0 > alpha1)
1639     {
1640         for (uint32_t i = 0; i < 6; i++)
1641             alphas[i + 2] = (((uint32_t)alpha0 * (6 - i) + (uint32_t)alpha1 * (1 + i)) / 7) << 24;
1642     }
1643     else
1644     {
1645         for (uint32_t i = 0; i < 4; i++)
1646             alphas[i + 2] = (((uint32_t)alpha0 * (4 - i) + (uint32_t)alpha1 * (1 + i)) / 5) << 24;
1647         alphas[6] = 0;
1648         alphas[7] = 0xff000000;
1649     }
1650 
1651     for (uint32_t y = 0; y < (uint32_t)BC_BLOCK_HEIGHT; y++)
1652     {
1653         for (uint32_t x = 0; x < (uint32_t)BC_BLOCK_WIDTH; x++)
1654         {
1655             uint32_t *const dstPixel = (uint32_t *)(dstPtr + y * dstRowPitch + x * dstPixelSize);
1656             *dstPixel =
1657                 (colors[indices[y * BC_BLOCK_WIDTH + x]] & 0x00ffffff) | alphas[alphaIndices[y * BC_BLOCK_WIDTH + x]];
1658         }
1659     }
1660 }
1661 
decompressBc4(const PixelBufferAccess & dst,const uint8_t * src,bool hasSign)1662 void decompressBc4(const PixelBufferAccess &dst, const uint8_t *src, bool hasSign)
1663 {
1664     using namespace BcDecompressInternal;
1665 
1666     uint8_t *const dstPtr       = (uint8_t *)dst.getDataPtr();
1667     const uint32_t dstRowPitch  = dst.getRowPitch();
1668     const uint32_t dstPixelSize = 4;
1669     const uint8_t red0          = src[0];
1670     const uint8_t red1          = src[1];
1671     const int8_t red0s          = ((int8_t *)src)[0];
1672     const int8_t red1s          = ((int8_t *)src)[1];
1673     const uint64_t indexBits    = get64BitBlockLE(src, 0) >> 16;
1674     float reds[8];
1675 
1676     const int32_t indices[16] = {
1677         (int32_t)((indexBits >> 0) & 0x7),  (int32_t)((indexBits >> 3) & 0x7),  (int32_t)((indexBits >> 6) & 0x7),
1678         (int32_t)((indexBits >> 9) & 0x7),  (int32_t)((indexBits >> 12) & 0x7), (int32_t)((indexBits >> 15) & 0x7),
1679         (int32_t)((indexBits >> 18) & 0x7), (int32_t)((indexBits >> 21) & 0x7), (int32_t)((indexBits >> 24) & 0x7),
1680         (int32_t)((indexBits >> 27) & 0x7), (int32_t)((indexBits >> 30) & 0x7), (int32_t)((indexBits >> 33) & 0x7),
1681         (int32_t)((indexBits >> 36) & 0x7), (int32_t)((indexBits >> 39) & 0x7), (int32_t)((indexBits >> 42) & 0x7),
1682         (int32_t)((indexBits >> 45) & 0x7)};
1683 
1684     reds[0] = hasSign ? int8ToFloat(red0s) : uint8ToFloat(red0);
1685     reds[1] = hasSign ? int8ToFloat(red1s) : uint8ToFloat(red1);
1686 
1687     if (reds[0] > reds[1])
1688     {
1689         for (uint32_t i = 0; i < 6; i++)
1690             reds[i + 2] = (reds[0] * (6.0f - (float)i) + reds[1] * (1.0f + (float)i)) / 7.0f;
1691     }
1692     else
1693     {
1694         for (uint32_t i = 0; i < 4; i++)
1695             reds[i + 2] = (reds[0] * (4.0f - (float)i) + reds[1] * (1.0f + (float)i)) / 5.0f;
1696         reds[6] = hasSign ? -1.0f : 0.0f;
1697         reds[7] = 1.0f;
1698     }
1699 
1700     for (uint32_t y = 0; y < (uint32_t)BC_BLOCK_HEIGHT; y++)
1701     {
1702         for (uint32_t x = 0; x < (uint32_t)BC_BLOCK_WIDTH; x++)
1703         {
1704             float *const dstPixel = (float *)(dstPtr + y * dstRowPitch + x * dstPixelSize);
1705             *dstPixel             = reds[indices[y * BC_BLOCK_WIDTH + x]];
1706         }
1707     }
1708 }
1709 
decompressBc5(const PixelBufferAccess & dst,const uint8_t * src,bool hasSign)1710 void decompressBc5(const PixelBufferAccess &dst, const uint8_t *src, bool hasSign)
1711 {
1712     using namespace BcDecompressInternal;
1713 
1714     uint8_t *const dstPtr       = (uint8_t *)dst.getDataPtr();
1715     const uint32_t dstRowPitch  = dst.getRowPitch();
1716     const uint32_t dstPixelSize = 8;
1717     float rg[2][8];
1718     uint32_t indices[2][16];
1719 
1720     for (uint32_t c = 0; c < 2; c++)
1721     {
1722         const uint32_t offset    = c * 8;
1723         const uint8_t rg0        = src[offset];
1724         const uint8_t rg1        = src[offset + 1];
1725         const int8_t rg0s        = ((int8_t *)src)[offset];
1726         const int8_t rg1s        = ((int8_t *)src)[offset + 1];
1727         const uint64_t indexBits = get64BitBlockLE(src, c) >> 16;
1728 
1729         for (uint32_t i = 0; i < 16; i++)
1730             indices[c][i] = (indexBits >> (i * 3)) & 0x7;
1731 
1732         rg[c][0] = hasSign ? int8ToFloat(rg0s) : uint8ToFloat(rg0);
1733         rg[c][1] = hasSign ? int8ToFloat(rg1s) : uint8ToFloat(rg1);
1734 
1735         if (rg[c][0] > rg[c][1])
1736         {
1737             for (uint32_t i = 0; i < 6; i++)
1738                 rg[c][i + 2] = (rg[c][0] * (6.0f - (float)i) + rg[c][1] * (1.0f + (float)i)) / 7.0f;
1739         }
1740         else
1741         {
1742             for (uint32_t i = 0; i < 4; i++)
1743                 rg[c][i + 2] = (rg[c][0] * (4.0f - (float)i) + rg[c][1] * (1.0f + (float)i)) / 5.0f;
1744             rg[c][6] = hasSign ? -1.0f : 0.0f;
1745             rg[c][7] = 1.0f;
1746         }
1747     }
1748 
1749     for (uint32_t y = 0; y < (uint32_t)BC_BLOCK_HEIGHT; y++)
1750     {
1751         for (uint32_t x = 0; x < (uint32_t)BC_BLOCK_WIDTH; x++)
1752         {
1753             float *const dstPixel = (float *)(dstPtr + y * dstRowPitch + x * dstPixelSize);
1754             for (uint32_t i = 0; i < 2; i++)
1755                 dstPixel[i] = rg[i][indices[i][y * BC_BLOCK_WIDTH + x]];
1756         }
1757     }
1758 }
1759 
decompressBc6H(const PixelBufferAccess & dst,const uint8_t * src,bool hasSign)1760 void decompressBc6H(const PixelBufferAccess &dst, const uint8_t *src, bool hasSign)
1761 {
1762     using namespace BcDecompressInternal;
1763 
1764     uint8_t *const dstPtr       = (uint8_t *)dst.getDataPtr();
1765     const uint32_t dstRowPitch  = dst.getRowPitch();
1766     const uint32_t dstPixelSize = 6;
1767 
1768     int32_t mode = extractModeBc6(src[0]);
1769     IVec4 r(0);
1770     IVec4 g(0);
1771     IVec4 b(0);
1772     uint32_t deltaBitsR           = 0;
1773     uint32_t deltaBitsG           = 0;
1774     uint32_t deltaBitsB           = 0;
1775     const uint64_t low            = ((uint64_t *)src)[0];
1776     const uint64_t high           = ((uint64_t *)src)[1];
1777     const uint32_t d              = mode < 10 ? getBits128(low, high, 77, 81) : 0;
1778     const uint32_t numRegions     = mode > 9 ? 1 : 2;
1779     const uint32_t numEndpoints   = numRegions * 2;
1780     const bool transformed        = mode != 9 && mode != 10;
1781     const uint32_t colorIndexBC   = mode < 10 ? 3 : 4;
1782     uint64_t colorIndexData       = high >> (mode < 10 ? 18 : 1);
1783     const uint32_t anchorIndex[2] = {0, anchorIndicesSecondSubset2[d]};
1784 
1785     switch (mode)
1786     {
1787     case 0:
1788         g[2] |= getBits128(low, high, 2, 2) << 4;
1789         b[2] |= getBits128(low, high, 3, 3) << 4;
1790         b[3] |= getBits128(low, high, 4, 4) << 4;
1791         r[0] |= getBits128(low, high, 5, 14);
1792         g[0] |= getBits128(low, high, 15, 24);
1793         b[0] |= getBits128(low, high, 25, 34);
1794         r[1] |= getBits128(low, high, 35, 39);
1795         g[3] |= getBits128(low, high, 40, 40) << 4;
1796         g[2] |= getBits128(low, high, 41, 44);
1797         g[1] |= getBits128(low, high, 45, 49);
1798         b[3] |= getBits128(low, high, 50, 50);
1799         g[3] |= getBits128(low, high, 51, 54);
1800         b[1] |= getBits128(low, high, 55, 59);
1801         b[3] |= getBits128(low, high, 60, 60) << 1;
1802         b[2] |= getBits128(low, high, 61, 64);
1803         r[2] |= getBits128(low, high, 65, 69);
1804         b[3] |= getBits128(low, high, 70, 70) << 2;
1805         r[3] |= getBits128(low, high, 71, 75);
1806         b[3] |= getBits128(low, high, 76, 76) << 3;
1807         deltaBitsR = deltaBitsG = deltaBitsB = 5;
1808         break;
1809 
1810     case 1:
1811         g[2] |= getBits128(low, high, 2, 2) << 5;
1812         g[3] |= getBits128(low, high, 3, 3) << 4;
1813         g[3] |= getBits128(low, high, 4, 4) << 5;
1814         r[0] |= getBits128(low, high, 5, 11);
1815         b[3] |= getBits128(low, high, 12, 12);
1816         b[3] |= getBits128(low, high, 13, 13) << 1;
1817         b[2] |= getBits128(low, high, 14, 14) << 4;
1818         g[0] |= getBits128(low, high, 15, 21);
1819         b[2] |= getBits128(low, high, 22, 22) << 5;
1820         b[3] |= getBits128(low, high, 23, 23) << 2;
1821         g[2] |= getBits128(low, high, 24, 24) << 4;
1822         b[0] |= getBits128(low, high, 25, 31);
1823         b[3] |= getBits128(low, high, 32, 32) << 3;
1824         b[3] |= getBits128(low, high, 33, 33) << 5;
1825         b[3] |= getBits128(low, high, 34, 34) << 4;
1826         r[1] |= getBits128(low, high, 35, 40);
1827         g[2] |= getBits128(low, high, 41, 44);
1828         g[1] |= getBits128(low, high, 45, 50);
1829         g[3] |= getBits128(low, high, 51, 54);
1830         b[1] |= getBits128(low, high, 55, 60);
1831         b[2] |= getBits128(low, high, 61, 64);
1832         r[2] |= getBits128(low, high, 65, 70);
1833         r[3] |= getBits128(low, high, 71, 76);
1834         deltaBitsR = deltaBitsG = deltaBitsB = 6;
1835         break;
1836 
1837     case 2:
1838         r[0] |= getBits128(low, high, 5, 14);
1839         g[0] |= getBits128(low, high, 15, 24);
1840         b[0] |= getBits128(low, high, 25, 34);
1841         r[1] |= getBits128(low, high, 35, 39);
1842         r[0] |= getBits128(low, high, 40, 40) << 10;
1843         g[2] |= getBits128(low, high, 41, 44);
1844         g[1] |= getBits128(low, high, 45, 48);
1845         g[0] |= getBits128(low, high, 49, 49) << 10;
1846         b[3] |= getBits128(low, high, 50, 50);
1847         g[3] |= getBits128(low, high, 51, 54);
1848         b[1] |= getBits128(low, high, 55, 58);
1849         b[0] |= getBits128(low, high, 59, 59) << 10;
1850         b[3] |= getBits128(low, high, 60, 60) << 1;
1851         b[2] |= getBits128(low, high, 61, 64);
1852         r[2] |= getBits128(low, high, 65, 69);
1853         b[3] |= getBits128(low, high, 70, 70) << 2;
1854         r[3] |= getBits128(low, high, 71, 75);
1855         b[3] |= getBits128(low, high, 76, 76) << 3;
1856         deltaBitsR = 5;
1857         deltaBitsG = deltaBitsB = 4;
1858         break;
1859 
1860     case 3:
1861         r[0] |= getBits128(low, high, 5, 14);
1862         g[0] |= getBits128(low, high, 15, 24);
1863         b[0] |= getBits128(low, high, 25, 34);
1864         r[1] |= getBits128(low, high, 35, 38);
1865         r[0] |= getBits128(low, high, 39, 39) << 10;
1866         g[3] |= getBits128(low, high, 40, 40) << 4;
1867         g[2] |= getBits128(low, high, 41, 44);
1868         g[1] |= getBits128(low, high, 45, 49);
1869         g[0] |= getBits128(low, high, 50, 50) << 10;
1870         g[3] |= getBits128(low, high, 51, 54);
1871         b[1] |= getBits128(low, high, 55, 58);
1872         b[0] |= getBits128(low, high, 59, 59) << 10;
1873         b[3] |= getBits128(low, high, 60, 60) << 1;
1874         b[2] |= getBits128(low, high, 61, 64);
1875         r[2] |= getBits128(low, high, 65, 68);
1876         b[3] |= getBits128(low, high, 69, 69);
1877         b[3] |= getBits128(low, high, 70, 70) << 2;
1878         r[3] |= getBits128(low, high, 71, 74);
1879         g[2] |= getBits128(low, high, 75, 75) << 4;
1880         b[3] |= getBits128(low, high, 76, 76) << 3;
1881         deltaBitsR = deltaBitsB = 4;
1882         deltaBitsG              = 5;
1883         break;
1884 
1885     case 4:
1886         r[0] |= getBits128(low, high, 5, 14);
1887         g[0] |= getBits128(low, high, 15, 24);
1888         b[0] |= getBits128(low, high, 25, 34);
1889         r[1] |= getBits128(low, high, 35, 38);
1890         r[0] |= getBits128(low, high, 39, 39) << 10;
1891         b[2] |= getBits128(low, high, 40, 40) << 4;
1892         g[2] |= getBits128(low, high, 41, 44);
1893         g[1] |= getBits128(low, high, 45, 48);
1894         g[0] |= getBits128(low, high, 49, 49) << 10;
1895         b[3] |= getBits128(low, high, 50, 50);
1896         g[3] |= getBits128(low, high, 51, 54);
1897         b[1] |= getBits128(low, high, 55, 59);
1898         b[0] |= getBits128(low, high, 60, 60) << 10;
1899         b[2] |= getBits128(low, high, 61, 64);
1900         r[2] |= getBits128(low, high, 65, 68);
1901         b[3] |= getBits128(low, high, 69, 69) << 1;
1902         b[3] |= getBits128(low, high, 70, 70) << 2;
1903         r[3] |= getBits128(low, high, 71, 74);
1904         b[3] |= getBits128(low, high, 75, 75) << 4;
1905         b[3] |= getBits128(low, high, 76, 76) << 3;
1906         deltaBitsR = deltaBitsG = 4;
1907         deltaBitsB              = 5;
1908         break;
1909 
1910     case 5:
1911         r[0] |= getBits128(low, high, 5, 13);
1912         b[2] |= getBits128(low, high, 14, 14) << 4;
1913         g[0] |= getBits128(low, high, 15, 23);
1914         g[2] |= getBits128(low, high, 24, 24) << 4;
1915         b[0] |= getBits128(low, high, 25, 33);
1916         b[3] |= getBits128(low, high, 34, 34) << 4;
1917         r[1] |= getBits128(low, high, 35, 39);
1918         g[3] |= getBits128(low, high, 40, 40) << 4;
1919         g[2] |= getBits128(low, high, 41, 44);
1920         g[1] |= getBits128(low, high, 45, 49);
1921         b[3] |= getBits128(low, high, 50, 50);
1922         g[3] |= getBits128(low, high, 51, 54);
1923         b[1] |= getBits128(low, high, 55, 59);
1924         b[3] |= getBits128(low, high, 60, 60) << 1;
1925         b[2] |= getBits128(low, high, 61, 64);
1926         r[2] |= getBits128(low, high, 65, 69);
1927         b[3] |= getBits128(low, high, 70, 70) << 2;
1928         r[3] |= getBits128(low, high, 71, 75);
1929         b[3] |= getBits128(low, high, 76, 76) << 3;
1930         deltaBitsR = deltaBitsG = deltaBitsB = 5;
1931         break;
1932 
1933     case 6:
1934         r[0] |= getBits128(low, high, 5, 12);
1935         g[3] |= getBits128(low, high, 13, 13) << 4;
1936         b[2] |= getBits128(low, high, 14, 14) << 4;
1937         g[0] |= getBits128(low, high, 15, 22);
1938         b[3] |= getBits128(low, high, 23, 23) << 2;
1939         g[2] |= getBits128(low, high, 24, 24) << 4;
1940         b[0] |= getBits128(low, high, 25, 32);
1941         b[3] |= getBits128(low, high, 33, 33) << 3;
1942         b[3] |= getBits128(low, high, 34, 34) << 4;
1943         r[1] |= getBits128(low, high, 35, 40);
1944         g[2] |= getBits128(low, high, 41, 44);
1945         g[1] |= getBits128(low, high, 45, 49);
1946         b[3] |= getBits128(low, high, 50, 50);
1947         g[3] |= getBits128(low, high, 51, 54);
1948         b[1] |= getBits128(low, high, 55, 59);
1949         b[3] |= getBits128(low, high, 60, 60) << 1;
1950         b[2] |= getBits128(low, high, 61, 64);
1951         r[2] |= getBits128(low, high, 65, 70);
1952         r[3] |= getBits128(low, high, 71, 76);
1953         deltaBitsR = 6;
1954         deltaBitsG = deltaBitsB = 5;
1955         break;
1956 
1957     case 7:
1958         r[0] |= getBits128(low, high, 5, 12);
1959         b[3] |= getBits128(low, high, 13, 13);
1960         b[2] |= getBits128(low, high, 14, 14) << 4;
1961         g[0] |= getBits128(low, high, 15, 22);
1962         g[2] |= getBits128(low, high, 23, 23) << 5;
1963         g[2] |= getBits128(low, high, 24, 24) << 4;
1964         b[0] |= getBits128(low, high, 25, 32);
1965         g[3] |= getBits128(low, high, 33, 33) << 5;
1966         b[3] |= getBits128(low, high, 34, 34) << 4;
1967         r[1] |= getBits128(low, high, 35, 39);
1968         g[3] |= getBits128(low, high, 40, 40) << 4;
1969         g[2] |= getBits128(low, high, 41, 44);
1970         g[1] |= getBits128(low, high, 45, 50);
1971         g[3] |= getBits128(low, high, 51, 54);
1972         b[1] |= getBits128(low, high, 55, 59);
1973         b[3] |= getBits128(low, high, 60, 60) << 1;
1974         b[2] |= getBits128(low, high, 61, 64);
1975         r[2] |= getBits128(low, high, 65, 69);
1976         b[3] |= getBits128(low, high, 70, 70) << 2;
1977         r[3] |= getBits128(low, high, 71, 75);
1978         b[3] |= getBits128(low, high, 76, 76) << 3;
1979         deltaBitsR = deltaBitsB = 5;
1980         deltaBitsG              = 6;
1981         break;
1982 
1983     case 8:
1984         r[0] |= getBits128(low, high, 5, 12);
1985         b[3] |= getBits128(low, high, 13, 13) << 1;
1986         b[2] |= getBits128(low, high, 14, 14) << 4;
1987         g[0] |= getBits128(low, high, 15, 22);
1988         b[2] |= getBits128(low, high, 23, 23) << 5;
1989         g[2] |= getBits128(low, high, 24, 24) << 4;
1990         b[0] |= getBits128(low, high, 25, 32);
1991         b[3] |= getBits128(low, high, 33, 33) << 5;
1992         b[3] |= getBits128(low, high, 34, 34) << 4;
1993         r[1] |= getBits128(low, high, 35, 39);
1994         g[3] |= getBits128(low, high, 40, 40) << 4;
1995         g[2] |= getBits128(low, high, 41, 44);
1996         g[1] |= getBits128(low, high, 45, 49);
1997         b[3] |= getBits128(low, high, 50, 50);
1998         g[3] |= getBits128(low, high, 51, 54);
1999         b[1] |= getBits128(low, high, 55, 60);
2000         b[2] |= getBits128(low, high, 61, 64);
2001         r[2] |= getBits128(low, high, 65, 69);
2002         b[3] |= getBits128(low, high, 70, 70) << 2;
2003         r[3] |= getBits128(low, high, 71, 75);
2004         b[3] |= getBits128(low, high, 76, 76) << 3;
2005         deltaBitsR = deltaBitsG = 5;
2006         deltaBitsB              = 6;
2007         break;
2008 
2009     case 9:
2010         r[0] |= getBits128(low, high, 5, 10);
2011         g[3] |= getBits128(low, high, 11, 11) << 4;
2012         b[3] |= getBits128(low, high, 12, 13);
2013         b[2] |= getBits128(low, high, 14, 14) << 4;
2014         g[0] |= getBits128(low, high, 15, 20);
2015         g[2] |= getBits128(low, high, 21, 21) << 5;
2016         b[2] |= getBits128(low, high, 22, 22) << 5;
2017         b[3] |= getBits128(low, high, 23, 23) << 2;
2018         g[2] |= getBits128(low, high, 24, 24) << 4;
2019         b[0] |= getBits128(low, high, 25, 30);
2020         g[3] |= getBits128(low, high, 31, 31) << 5;
2021         b[3] |= getBits128(low, high, 32, 32) << 3;
2022         b[3] |= getBits128(low, high, 33, 33) << 5;
2023         b[3] |= getBits128(low, high, 34, 34) << 4;
2024         r[1] |= getBits128(low, high, 35, 40);
2025         g[2] |= getBits128(low, high, 41, 44);
2026         g[1] |= getBits128(low, high, 45, 50);
2027         g[3] |= getBits128(low, high, 51, 54);
2028         b[1] |= getBits128(low, high, 55, 60);
2029         b[2] |= getBits128(low, high, 61, 64);
2030         r[2] |= getBits128(low, high, 65, 70);
2031         r[3] |= getBits128(low, high, 71, 76);
2032         deltaBitsR = deltaBitsG = deltaBitsB = 6;
2033         break;
2034 
2035     case 10:
2036         r[0] |= getBits128(low, high, 5, 14);
2037         g[0] |= getBits128(low, high, 15, 24);
2038         b[0] |= getBits128(low, high, 25, 34);
2039         r[1] |= getBits128(low, high, 35, 44);
2040         g[1] |= getBits128(low, high, 45, 54);
2041         b[1] |= getBits128(low, high, 55, 64);
2042         deltaBitsR = deltaBitsG = deltaBitsB = 10;
2043         break;
2044 
2045     case 11:
2046         r[0] |= getBits128(low, high, 5, 14);
2047         g[0] |= getBits128(low, high, 15, 24);
2048         b[0] |= getBits128(low, high, 25, 34);
2049         r[1] |= getBits128(low, high, 35, 43);
2050         r[0] |= getBits128(low, high, 44, 44) << 10;
2051         g[1] |= getBits128(low, high, 45, 53);
2052         g[0] |= getBits128(low, high, 54, 54) << 10;
2053         b[1] |= getBits128(low, high, 55, 63);
2054         b[0] |= getBits128(low, high, 64, 64) << 10;
2055         deltaBitsR = deltaBitsG = deltaBitsB = 9;
2056         break;
2057 
2058     case 12:
2059         r[0] |= getBits128(low, high, 5, 14);
2060         g[0] |= getBits128(low, high, 15, 24);
2061         b[0] |= getBits128(low, high, 25, 34);
2062         r[1] |= getBits128(low, high, 35, 42);
2063         r[0] |= getBits128(low, high, 44, 43) << 10;
2064         g[1] |= getBits128(low, high, 45, 52);
2065         g[0] |= getBits128(low, high, 54, 53) << 10;
2066         b[1] |= getBits128(low, high, 55, 62);
2067         b[0] |= getBits128(low, high, 64, 63) << 10;
2068         deltaBitsR = deltaBitsG = deltaBitsB = 8;
2069         break;
2070 
2071     case 13:
2072         r[0] |= getBits128(low, high, 5, 14);
2073         g[0] |= getBits128(low, high, 15, 24);
2074         b[0] |= getBits128(low, high, 25, 34);
2075         r[1] |= getBits128(low, high, 35, 38);
2076         r[0] |= getBits128(low, high, 44, 39) << 10;
2077         g[1] |= getBits128(low, high, 45, 48);
2078         g[0] |= getBits128(low, high, 54, 49) << 10;
2079         b[1] |= getBits128(low, high, 55, 58);
2080         b[0] |= getBits128(low, high, 64, 59) << 10;
2081         deltaBitsR = deltaBitsG = deltaBitsB = 4;
2082         break;
2083     }
2084 
2085     if (hasSign)
2086     {
2087         r[0] = signExtend(r[0], epBits[mode], 32);
2088         g[0] = signExtend(g[0], epBits[mode], 32);
2089         b[0] = signExtend(b[0], epBits[mode], 32);
2090     }
2091 
2092     if (transformed)
2093     {
2094         for (uint32_t i = 1; i < numEndpoints; i++)
2095         {
2096             r[i] = signExtend(r[i], deltaBitsR, 32);
2097             r[i] = (r[0] + r[i]) & (((uint32_t)1 << epBits[mode]) - 1);
2098             g[i] = signExtend(g[i], deltaBitsG, 32);
2099             g[i] = (g[0] + g[i]) & (((uint32_t)1 << epBits[mode]) - 1);
2100             b[i] = signExtend(b[i], deltaBitsB, 32);
2101             b[i] = (b[0] + b[i]) & (((uint32_t)1 << epBits[mode]) - 1);
2102         }
2103     }
2104 
2105     if (hasSign)
2106     {
2107         for (uint32_t i = 1; i < 4; i++)
2108         {
2109             r[i] = signExtend(r[i], epBits[mode], 32);
2110             g[i] = signExtend(g[i], epBits[mode], 32);
2111             b[i] = signExtend(b[i], epBits[mode], 32);
2112         }
2113     }
2114 
2115     for (uint32_t i = 0; i < numEndpoints; i++)
2116     {
2117         r[i] = unquantize(r[i], mode, hasSign);
2118         g[i] = unquantize(g[i], mode, hasSign);
2119         b[i] = unquantize(b[i], mode, hasSign);
2120     }
2121 
2122     for (uint32_t i = 0; i < 16; i++)
2123     {
2124         const uint32_t subsetIndex   = (numRegions == 1 ? 0 : partitions2[d][i]);
2125         const uint32_t bits          = (i == anchorIndex[subsetIndex]) ? (colorIndexBC - 1) : colorIndexBC;
2126         const uint32_t colorIndex    = (uint32_t)(colorIndexData & ((1 << bits) - 1));
2127         const int32_t endpointStartR = r[2 * subsetIndex];
2128         const int32_t endpointEndR   = r[2 * subsetIndex + 1];
2129         const int32_t endpointStartG = g[2 * subsetIndex];
2130         const int32_t endpointEndG   = g[2 * subsetIndex + 1];
2131         const int32_t endpointStartB = b[2 * subsetIndex];
2132         const int32_t endpointEndB   = b[2 * subsetIndex + 1];
2133         const int16_t r16 =
2134             finishUnquantize(interpolate(endpointStartR, endpointEndR, colorIndex, colorIndexBC), hasSign);
2135         const int16_t g16 =
2136             finishUnquantize(interpolate(endpointStartG, endpointEndG, colorIndex, colorIndexBC), hasSign);
2137         const int16_t b16 =
2138             finishUnquantize(interpolate(endpointStartB, endpointEndB, colorIndex, colorIndexBC), hasSign);
2139         const int32_t y         = i / 4;
2140         const int32_t x         = i % 4;
2141         int16_t *const dstPixel = (int16_t *)(dstPtr + y * dstRowPitch + x * dstPixelSize);
2142 
2143         if (mode == -1)
2144         {
2145             dstPixel[0] = 0;
2146             dstPixel[1] = 0;
2147             dstPixel[2] = 0;
2148         }
2149         else
2150         {
2151             dstPixel[0] = r16;
2152             dstPixel[1] = g16;
2153             dstPixel[2] = b16;
2154         }
2155 
2156         colorIndexData >>= bits;
2157     }
2158 }
2159 
decompressBc7(const PixelBufferAccess & dst,const uint8_t * src)2160 void decompressBc7(const PixelBufferAccess &dst, const uint8_t *src)
2161 {
2162     using namespace BcDecompressInternal;
2163 
2164     static const uint8_t subsets[]          = {3, 2, 3, 2, 1, 1, 1, 2};
2165     static const uint8_t partitionBits[]    = {4, 6, 6, 6, 0, 0, 0, 6};
2166     static const uint8_t endpointBits[8][5] = {//r, g, b, a, p
2167                                                {4, 4, 4, 0, 1}, {6, 6, 6, 0, 1}, {5, 5, 5, 0, 0}, {7, 7, 7, 0, 1},
2168                                                {5, 5, 5, 6, 0}, {7, 7, 7, 8, 0}, {7, 7, 7, 7, 1}, {5, 5, 5, 5, 1}};
2169     static const uint8_t indexBits[]        = {3, 3, 2, 2, 2, 2, 4, 2};
2170 
2171     uint8_t *const dstPtr       = (uint8_t *)dst.getDataPtr();
2172     const uint32_t dstRowPitch  = dst.getRowPitch();
2173     const uint32_t dstPixelSize = 4;
2174 
2175     const uint64_t low  = ((uint64_t *)src)[0];
2176     const uint64_t high = ((uint64_t *)src)[1];
2177     const int32_t mode  = extractModeBc7(src[0]);
2178     uint32_t numSubsets = 1;
2179     uint32_t offset     = mode + 1;
2180     uint32_t rotation   = 0;
2181     uint32_t idxMode    = 0;
2182     uint32_t endpoints[6][5];
2183     uint32_t partitionSetId = 0;
2184 
2185     // Decode partition data from explicit partition bits
2186     if (mode == 0 || mode == 1 || mode == 2 || mode == 3 || mode == 7)
2187     {
2188         numSubsets     = subsets[mode];
2189         partitionSetId = getBits128(low, high, offset, offset + partitionBits[mode] - 1);
2190         offset += partitionBits[mode];
2191     }
2192 
2193     // Extract rotation bits
2194     if (mode == 4 || mode == 5)
2195     {
2196         rotation = getBits128(low, high, offset, offset + 1);
2197         offset += 2;
2198         if (mode == 4)
2199         {
2200             idxMode = getBits128(low, high, offset, offset);
2201             offset++;
2202         }
2203     }
2204 
2205     {
2206         const uint32_t numEndpoints = numSubsets * 2;
2207 
2208         // Extract raw, compressed endpoint bits
2209         for (uint32_t cpnt = 0; cpnt < 5; cpnt++)
2210         {
2211             for (uint32_t ep = 0; ep < numEndpoints; ep++)
2212             {
2213                 if (mode == 1 && cpnt == 4 && ep > 1)
2214                     continue; // Mode 1 has shared P bits
2215 
2216                 int n = mode == -1 ? 0 : endpointBits[mode][cpnt];
2217                 if (n > 0)
2218                     endpoints[ep][cpnt] = getBits128(low, high, offset, offset + n - 1);
2219                 offset += n;
2220             }
2221         }
2222 
2223         // Decode endpoints
2224         if (mode == 0 || mode == 1 || mode == 3 || mode == 6 || mode == 7)
2225         {
2226             // First handle modes that have P-bits
2227             for (uint32_t ep = 0; ep < numEndpoints; ep++)
2228             {
2229                 for (uint32_t cpnt = 0; cpnt < 4; cpnt++)
2230                 {
2231                     endpoints[ep][cpnt] <<= 1;
2232                 }
2233             }
2234 
2235             if (mode == 1)
2236             {
2237                 // P-bit is shared
2238                 const uint32_t pbitZero = endpoints[0][4];
2239                 const uint32_t pbitOne  = endpoints[1][4];
2240 
2241                 for (uint32_t cpnt = 0; cpnt < 3; cpnt++)
2242                 {
2243                     endpoints[0][cpnt] |= pbitZero;
2244                     endpoints[1][cpnt] |= pbitZero;
2245                     endpoints[2][cpnt] |= pbitOne;
2246                     endpoints[3][cpnt] |= pbitOne;
2247                 }
2248             }
2249             else
2250             {
2251                 // Unique p-bit per endpoint
2252                 for (uint32_t ep = 0; ep < numEndpoints; ep++)
2253                 {
2254                     for (uint32_t cpnt = 0; cpnt < 4; cpnt++)
2255                     {
2256                         endpoints[ep][cpnt] |= endpoints[ep][4];
2257                     }
2258                 }
2259             }
2260         }
2261 
2262         for (uint32_t ep = 0; ep < numEndpoints; ep++)
2263         {
2264             // Left shift endpoint components so that their MSB lies in bit 7
2265             for (uint32_t cpnt = 0; cpnt < 4; cpnt++)
2266                 endpoints[ep][cpnt] <<= 8 - (endpointBits[mode][cpnt] + endpointBits[mode][4]);
2267 
2268             // Replicate each component's MSB into the LSBs revealed by the left-shift operation above
2269             for (uint32_t cpnt = 0; cpnt < 4; cpnt++)
2270                 endpoints[ep][cpnt] |= endpoints[ep][cpnt] >> (endpointBits[mode][cpnt] + endpointBits[mode][4]);
2271         }
2272 
2273         // If this mode does not explicitly define the alpha component set alpha equal to 1.0
2274         if (mode < 4)
2275         {
2276             for (uint32_t ep = 0; ep < numEndpoints; ep++)
2277                 endpoints[ep][3] = 255;
2278         }
2279     }
2280 
2281     {
2282         uint32_t colorIdxOffset = offset + ((mode == 4 && idxMode) ? 31 : 0);
2283         uint32_t alphaIdxOffset = offset + ((mode == 5 || (mode == 4 && !idxMode)) ? 31 : 0);
2284 
2285         for (uint32_t pixel = 0; pixel < 16; pixel++)
2286         {
2287             const uint32_t y         = pixel / 4;
2288             const uint32_t x         = pixel % 4;
2289             uint32_t *const dstPixel = (uint32_t *)(dstPtr + y * dstRowPitch + x * dstPixelSize);
2290             uint32_t subsetIndex     = 0;
2291             uint32_t anchorIndex     = 0;
2292             uint32_t endpointStart[4];
2293             uint32_t endpointEnd[4];
2294 
2295             if (mode == -1)
2296             {
2297                 *dstPixel = 0;
2298                 continue;
2299             }
2300 
2301             if (numSubsets == 2)
2302                 subsetIndex = partitions2[partitionSetId][pixel];
2303             else if (numSubsets == 3)
2304                 subsetIndex = partitions3[partitionSetId][pixel];
2305 
2306             if (numSubsets == 2 && subsetIndex == 1)
2307             {
2308                 anchorIndex = anchorIndicesSecondSubset2[partitionSetId];
2309             }
2310             else if (numSubsets == 3)
2311             {
2312                 if (subsetIndex == 1)
2313                     anchorIndex = anchorIndicesSecondSubset3[partitionSetId];
2314                 else if (subsetIndex == 2)
2315                     anchorIndex = anchorIndicesThirdSubset[partitionSetId];
2316             }
2317 
2318             for (uint32_t cpnt = 0; cpnt < 4; cpnt++)
2319             {
2320                 endpointStart[cpnt] = endpoints[2 * subsetIndex][cpnt];
2321                 endpointEnd[cpnt]   = endpoints[2 * subsetIndex + 1][cpnt];
2322             }
2323 
2324             {
2325                 const uint32_t colorInterpolationBits = indexBits[mode] + idxMode;
2326                 const uint32_t colorIndexBits         = colorInterpolationBits - ((anchorIndex == pixel) ? 1 : 0);
2327                 const uint32_t alphaInterpolationBits =
2328                     mode == 4 ? 3 - idxMode : (mode == 5 ? 2 : colorInterpolationBits);
2329                 const uint32_t alphaIndexBits = alphaInterpolationBits - ((anchorIndex == pixel) ? 1 : 0);
2330                 const uint32_t colorIdx = getBits128(low, high, colorIdxOffset, colorIdxOffset + colorIndexBits - 1);
2331                 const uint32_t alphaIdx = (mode == 4 || mode == 5) ? getBits128(low, high, alphaIdxOffset,
2332                                                                                 alphaIdxOffset + alphaIndexBits - 1) :
2333                                                                      colorIdx;
2334                 const uint32_t r = interpolate(endpointStart[0], endpointEnd[0], colorIdx, colorInterpolationBits);
2335                 const uint32_t g = interpolate(endpointStart[1], endpointEnd[1], colorIdx, colorInterpolationBits);
2336                 const uint32_t b = interpolate(endpointStart[2], endpointEnd[2], colorIdx, colorInterpolationBits);
2337                 const uint32_t a = interpolate(endpointStart[3], endpointEnd[3], alphaIdx, alphaInterpolationBits);
2338 
2339                 colorIdxOffset += colorIndexBits;
2340                 alphaIdxOffset += alphaIndexBits;
2341 
2342                 if ((mode == 4 || mode == 5) && rotation != 0)
2343                 {
2344                     if (rotation == 1)
2345                         *dstPixel = a | (g << 8) | (b << 16) | (r << 24);
2346                     else if (rotation == 2)
2347                         *dstPixel = r | (a << 8) | (b << 16) | (g << 24);
2348                     else
2349                         *dstPixel = r | (g << 8) | (a << 16) | (b << 24);
2350                 }
2351                 else
2352                 {
2353                     *dstPixel = r | (g << 8) | (b << 16) | (a << 24);
2354                 }
2355             }
2356         }
2357     }
2358 }
2359 
decompressAhbRaw10(const PixelBufferAccess & dst,const uint8_t * src)2360 void decompressAhbRaw10(const PixelBufferAccess &dst, const uint8_t *src)
2361 {
2362     // Packed format with 4 pixels in 5 bytes
2363     // Layout: https://developer.android.com/reference/android/graphics/ImageFormat#RAW10
2364 
2365     uint32_t firstPixel  = (*(src + 0u));
2366     uint32_t secondPixel = (*(src + 1u));
2367     uint32_t thirdPixel  = (*(src + 2u));
2368     uint32_t fourthPixel = (*(src + 3u));
2369     uint32_t packedPixel = (*(src + 4u));
2370 
2371     // We now need to take last bits for each pixel from the packed pixel to build all pixel values
2372     firstPixel  = ((firstPixel << 2u) | ((packedPixel >> 0u) & 0b00000011u));
2373     secondPixel = ((secondPixel << 2u) | ((packedPixel >> 2u) & 0b00000011u));
2374     thirdPixel  = ((thirdPixel << 2u) | ((packedPixel >> 4u) & 0b00000011u));
2375     fourthPixel = ((fourthPixel << 2u) | ((packedPixel >> 6u) & 0b00000011u));
2376 
2377     // Store values in buffer (higher bits is were data is stored)
2378     uint16_t *pixel = static_cast<uint16_t *>(dst.getDataPtr());
2379     (*pixel)        = static_cast<uint16_t>(firstPixel << 6u);
2380 
2381     pixel++;
2382     (*pixel) = static_cast<uint16_t>(secondPixel << 6u);
2383 
2384     pixel++;
2385     (*pixel) = static_cast<uint16_t>(thirdPixel << 6u);
2386 
2387     pixel++;
2388     (*pixel) = static_cast<uint16_t>(fourthPixel << 6u);
2389 }
2390 
decompressAhbRaw12(const PixelBufferAccess & dst,const uint8_t * src)2391 void decompressAhbRaw12(const PixelBufferAccess &dst, const uint8_t *src)
2392 {
2393     // Packed format with 2 pixels in 3 bytes
2394     // Layout: https://developer.android.com/reference/android/graphics/ImageFormat#RAW12
2395 
2396     uint32_t firstPixel  = (*(src + 0));
2397     uint32_t secondPixel = (*(src + 1));
2398     uint32_t packedPixel = (*(src + 2));
2399 
2400     // We now need to take last bits for each pixel from the packed pixel to build all pixel values
2401     firstPixel  = ((firstPixel << 4u) | ((packedPixel >> 0u) & 0b00001111u));
2402     secondPixel = ((secondPixel << 4u) | ((packedPixel >> 4u) & 0b00001111u));
2403 
2404     // Store values in buffer (higher bits is were data is stored)
2405     uint16_t *pixel = static_cast<uint16_t *>(dst.getDataPtr());
2406     (*pixel)        = static_cast<uint16_t>(firstPixel << 6u);
2407 
2408     pixel++;
2409     (*pixel) = static_cast<uint16_t>(secondPixel << 6u);
2410 }
2411 
decompressBlock(CompressedTexFormat format,const PixelBufferAccess & dst,const uint8_t * src,const TexDecompressionParams & params)2412 void decompressBlock(CompressedTexFormat format, const PixelBufferAccess &dst, const uint8_t *src,
2413                      const TexDecompressionParams ¶ms)
2414 {
2415     // No 3D blocks supported right now
2416     DE_ASSERT(dst.getDepth() == 1);
2417 
2418     switch (format)
2419     {
2420     case COMPRESSEDTEXFORMAT_ETC1_RGB8:
2421         decompressETC1(dst, src);
2422         break;
2423     case COMPRESSEDTEXFORMAT_EAC_R11:
2424         decompressEAC_R11(dst, src, false);
2425         break;
2426     case COMPRESSEDTEXFORMAT_EAC_SIGNED_R11:
2427         decompressEAC_R11(dst, src, true);
2428         break;
2429     case COMPRESSEDTEXFORMAT_EAC_RG11:
2430         decompressEAC_RG11(dst, src, false);
2431         break;
2432     case COMPRESSEDTEXFORMAT_EAC_SIGNED_RG11:
2433         decompressEAC_RG11(dst, src, true);
2434         break;
2435     case COMPRESSEDTEXFORMAT_ETC2_RGB8:
2436         decompressETC2(dst, src);
2437         break;
2438     case COMPRESSEDTEXFORMAT_ETC2_SRGB8:
2439         decompressETC2(dst, src);
2440         break;
2441     case COMPRESSEDTEXFORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1:
2442         decompressETC2_RGB8_PUNCHTHROUGH_ALPHA1(dst, src);
2443         break;
2444     case COMPRESSEDTEXFORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1:
2445         decompressETC2_RGB8_PUNCHTHROUGH_ALPHA1(dst, src);
2446         break;
2447     case COMPRESSEDTEXFORMAT_ETC2_EAC_RGBA8:
2448         decompressETC2_EAC_RGBA8(dst, src);
2449         break;
2450     case COMPRESSEDTEXFORMAT_ETC2_EAC_SRGB8_ALPHA8:
2451         decompressETC2_EAC_RGBA8(dst, src);
2452         break;
2453 
2454     case COMPRESSEDTEXFORMAT_ASTC_4x4_RGBA:
2455     case COMPRESSEDTEXFORMAT_ASTC_5x4_RGBA:
2456     case COMPRESSEDTEXFORMAT_ASTC_5x5_RGBA:
2457     case COMPRESSEDTEXFORMAT_ASTC_6x5_RGBA:
2458     case COMPRESSEDTEXFORMAT_ASTC_6x6_RGBA:
2459     case COMPRESSEDTEXFORMAT_ASTC_8x5_RGBA:
2460     case COMPRESSEDTEXFORMAT_ASTC_8x6_RGBA:
2461     case COMPRESSEDTEXFORMAT_ASTC_8x8_RGBA:
2462     case COMPRESSEDTEXFORMAT_ASTC_10x5_RGBA:
2463     case COMPRESSEDTEXFORMAT_ASTC_10x6_RGBA:
2464     case COMPRESSEDTEXFORMAT_ASTC_10x8_RGBA:
2465     case COMPRESSEDTEXFORMAT_ASTC_10x10_RGBA:
2466     case COMPRESSEDTEXFORMAT_ASTC_12x10_RGBA:
2467     case COMPRESSEDTEXFORMAT_ASTC_12x12_RGBA:
2468     case COMPRESSEDTEXFORMAT_ASTC_4x4_SRGB8_ALPHA8:
2469     case COMPRESSEDTEXFORMAT_ASTC_5x4_SRGB8_ALPHA8:
2470     case COMPRESSEDTEXFORMAT_ASTC_5x5_SRGB8_ALPHA8:
2471     case COMPRESSEDTEXFORMAT_ASTC_6x5_SRGB8_ALPHA8:
2472     case COMPRESSEDTEXFORMAT_ASTC_6x6_SRGB8_ALPHA8:
2473     case COMPRESSEDTEXFORMAT_ASTC_8x5_SRGB8_ALPHA8:
2474     case COMPRESSEDTEXFORMAT_ASTC_8x6_SRGB8_ALPHA8:
2475     case COMPRESSEDTEXFORMAT_ASTC_8x8_SRGB8_ALPHA8:
2476     case COMPRESSEDTEXFORMAT_ASTC_10x5_SRGB8_ALPHA8:
2477     case COMPRESSEDTEXFORMAT_ASTC_10x6_SRGB8_ALPHA8:
2478     case COMPRESSEDTEXFORMAT_ASTC_10x8_SRGB8_ALPHA8:
2479     case COMPRESSEDTEXFORMAT_ASTC_10x10_SRGB8_ALPHA8:
2480     case COMPRESSEDTEXFORMAT_ASTC_12x10_SRGB8_ALPHA8:
2481     case COMPRESSEDTEXFORMAT_ASTC_12x12_SRGB8_ALPHA8:
2482         astc::decompress(dst, src, format, params.astcMode);
2483         break;
2484 
2485     case COMPRESSEDTEXFORMAT_BC1_RGB_UNORM_BLOCK:
2486         decompressBc1(dst, src, false);
2487         break;
2488     case COMPRESSEDTEXFORMAT_BC1_RGB_SRGB_BLOCK:
2489         decompressBc1(dst, src, false);
2490         break;
2491     case COMPRESSEDTEXFORMAT_BC1_RGBA_UNORM_BLOCK:
2492         decompressBc1(dst, src, true);
2493         break;
2494     case COMPRESSEDTEXFORMAT_BC1_RGBA_SRGB_BLOCK:
2495         decompressBc1(dst, src, true);
2496         break;
2497     case COMPRESSEDTEXFORMAT_BC2_UNORM_BLOCK:
2498         decompressBc2(dst, src);
2499         break;
2500     case COMPRESSEDTEXFORMAT_BC2_SRGB_BLOCK:
2501         decompressBc2(dst, src);
2502         break;
2503     case COMPRESSEDTEXFORMAT_BC3_UNORM_BLOCK:
2504         decompressBc3(dst, src);
2505         break;
2506     case COMPRESSEDTEXFORMAT_BC3_SRGB_BLOCK:
2507         decompressBc3(dst, src);
2508         break;
2509     case COMPRESSEDTEXFORMAT_BC4_UNORM_BLOCK:
2510         decompressBc4(dst, src, false);
2511         break;
2512     case COMPRESSEDTEXFORMAT_BC4_SNORM_BLOCK:
2513         decompressBc4(dst, src, true);
2514         break;
2515     case COMPRESSEDTEXFORMAT_BC5_UNORM_BLOCK:
2516         decompressBc5(dst, src, false);
2517         break;
2518     case COMPRESSEDTEXFORMAT_BC5_SNORM_BLOCK:
2519         decompressBc5(dst, src, true);
2520         break;
2521     case COMPRESSEDTEXFORMAT_BC6H_UFLOAT_BLOCK:
2522         decompressBc6H(dst, src, false);
2523         break;
2524     case COMPRESSEDTEXFORMAT_BC6H_SFLOAT_BLOCK:
2525         decompressBc6H(dst, src, true);
2526         break;
2527     case COMPRESSEDTEXFORMAT_BC7_UNORM_BLOCK:
2528         decompressBc7(dst, src);
2529         break;
2530     case COMPRESSEDTEXFORMAT_BC7_SRGB_BLOCK:
2531         decompressBc7(dst, src);
2532         break;
2533 
2534     case COMPRESSEDTEXFORMAT_AHB_RAW10:
2535         decompressAhbRaw10(dst, src);
2536         break;
2537     case COMPRESSEDTEXFORMAT_AHB_RAW12:
2538         decompressAhbRaw12(dst, src);
2539         break;
2540 
2541     default:
2542         DE_FATAL("Unexpected format");
2543         break;
2544     }
2545 }
2546 
componentSum(const IVec3 & vec)2547 int componentSum(const IVec3 &vec)
2548 {
2549     return vec.x() + vec.y() + vec.z();
2550 }
2551 
2552 } // namespace
2553 
decompress(const PixelBufferAccess & dst,CompressedTexFormat fmt,const uint8_t * src,const TexDecompressionParams & params)2554 void decompress(const PixelBufferAccess &dst, CompressedTexFormat fmt, const uint8_t *src,
2555                 const TexDecompressionParams ¶ms)
2556 {
2557     const int blockSize = getBlockSize(fmt);
2558     const IVec3 blockPixelSize(getBlockPixelSize(fmt));
2559     const IVec3 blockCount(deDivRoundUp32(dst.getWidth(), blockPixelSize.x()),
2560                            deDivRoundUp32(dst.getHeight(), blockPixelSize.y()),
2561                            deDivRoundUp32(dst.getDepth(), blockPixelSize.z()));
2562     const IVec3 blockPitches(blockSize, blockSize * blockCount.x(), blockSize * blockCount.x() * blockCount.y());
2563 
2564     std::vector<uint8_t> uncompressedBlock(dst.getFormat().getPixelSize() * blockPixelSize.x() * blockPixelSize.y() *
2565                                            blockPixelSize.z());
2566     const PixelBufferAccess blockAccess(getUncompressedFormat(fmt), blockPixelSize.x(), blockPixelSize.y(),
2567                                         blockPixelSize.z(), &uncompressedBlock[0]);
2568 
2569     DE_ASSERT(dst.getFormat() == getUncompressedFormat(fmt));
2570 
2571     for (int blockZ = 0; blockZ < blockCount.z(); blockZ++)
2572         for (int blockY = 0; blockY < blockCount.y(); blockY++)
2573             for (int blockX = 0; blockX < blockCount.x(); blockX++)
2574             {
2575                 const IVec3 blockPos(blockX, blockY, blockZ);
2576                 const uint8_t *const blockPtr = src + componentSum(blockPos * blockPitches);
2577                 const IVec3 copySize(de::min(blockPixelSize.x(), dst.getWidth() - blockPos.x() * blockPixelSize.x()),
2578                                      de::min(blockPixelSize.y(), dst.getHeight() - blockPos.y() * blockPixelSize.y()),
2579                                      de::min(blockPixelSize.z(), dst.getDepth() - blockPos.z() * blockPixelSize.z()));
2580                 const IVec3 dstPixelPos = blockPos * blockPixelSize;
2581 
2582                 decompressBlock(fmt, blockAccess, blockPtr, params);
2583 
2584                 copy(getSubregion(dst, dstPixelPos.x(), dstPixelPos.y(), dstPixelPos.z(), copySize.x(), copySize.y(),
2585                                   copySize.z()),
2586                      getSubregion(blockAccess, 0, 0, 0, copySize.x(), copySize.y(), copySize.z()));
2587             }
2588 }
2589 
CompressedTexture(void)2590 CompressedTexture::CompressedTexture(void) : m_format(COMPRESSEDTEXFORMAT_LAST), m_width(0), m_height(0), m_depth(0)
2591 {
2592 }
2593 
CompressedTexture(CompressedTexFormat format,int width,int height,int depth)2594 CompressedTexture::CompressedTexture(CompressedTexFormat format, int width, int height, int depth)
2595     : m_format(COMPRESSEDTEXFORMAT_LAST)
2596     , m_width(0)
2597     , m_height(0)
2598     , m_depth(0)
2599 {
2600     setStorage(format, width, height, depth);
2601 }
2602 
CompressedTexture(CompressedTexFormat format,int width,int height,int depth,std::vector<uint8_t> && data)2603 CompressedTexture::CompressedTexture(CompressedTexFormat format, int width, int height, int depth,
2604                                      std::vector<uint8_t> &&data)
2605     : m_format(COMPRESSEDTEXFORMAT_LAST)
2606     , m_width(0)
2607     , m_height(0)
2608     , m_depth(0)
2609     , m_data(data)
2610 {
2611     setStorage(format, width, height, depth);
2612 }
2613 
~CompressedTexture(void)2614 CompressedTexture::~CompressedTexture(void)
2615 {
2616 }
2617 
setStorage(CompressedTexFormat format,int width,int height,int depth)2618 void CompressedTexture::setStorage(CompressedTexFormat format, int width, int height, int depth)
2619 {
2620     m_format = format;
2621     m_width  = width;
2622     m_height = height;
2623     m_depth  = depth;
2624 
2625     if (m_format != COMPRESSEDTEXFORMAT_LAST)
2626     {
2627         const IVec3 blockPixelSize = getBlockPixelSize(m_format);
2628         const int blockSize        = getBlockSize(m_format);
2629 
2630         if (m_data.size() > 0)
2631         {
2632             DE_ASSERT(static_cast<int>(m_data.size()) >=
2633                       (deDivRoundUp32(m_width, blockPixelSize.x()) * deDivRoundUp32(m_height, blockPixelSize.y()) *
2634                        deDivRoundUp32(m_depth, blockPixelSize.z()) * blockSize));
2635         }
2636         else
2637         {
2638             m_data.resize(deDivRoundUp32(m_width, blockPixelSize.x()) * deDivRoundUp32(m_height, blockPixelSize.y()) *
2639                           deDivRoundUp32(m_depth, blockPixelSize.z()) * blockSize);
2640         }
2641     }
2642     else
2643     {
2644         DE_ASSERT(m_format == COMPRESSEDTEXFORMAT_LAST);
2645         DE_ASSERT(m_width == 0 && m_height == 0 && m_depth == 0);
2646         m_data.resize(0);
2647     }
2648 }
2649 
2650 /*--------------------------------------------------------------------*//*!
2651  * \brief Decode to uncompressed pixel data
2652  * \param dst Destination buffer
2653  *//*--------------------------------------------------------------------*/
decompress(const PixelBufferAccess & dst,const TexDecompressionParams & params) const2654 void CompressedTexture::decompress(const PixelBufferAccess &dst, const TexDecompressionParams ¶ms) const
2655 {
2656     DE_ASSERT(dst.getWidth() == m_width && dst.getHeight() == m_height && dst.getDepth() == m_depth);
2657     DE_ASSERT(dst.getFormat() == getUncompressedFormat(m_format));
2658 
2659     tcu::decompress(dst, m_format, &m_data[0], params);
2660 }
2661 
2662 } // namespace tcu
2663