1 /* libs/opengles/dxt.cpp
2 **
3 ** Copyright 2007, The Android Open Source Project
4 **
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
8 **
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 **
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 */
17
18 #define TIMING 0
19
20 #if TIMING
21 #include <sys/time.h> // for optimization timing
22 #include <stdio.h>
23 #include <stdlib.h>
24 #endif
25
26 #include <GLES/gl.h>
27 #include <utils/Endian.h>
28
29 #include "context.h"
30
31 #define TIMING 0
32
33 namespace android {
34
35 static uint8_t avg23tab[64*64];
36 static volatile int tables_initialized = 0;
37
38 // Definitions below are equivalent to these over the valid range of arguments
39 // #define div5(x) ((x)/5)
40 // #define div7(x) ((x)/7)
41
42 // Use fixed-point to divide by 5 and 7
43 // 3277 = 2^14/5 + 1
44 // 2341 = 2^14/7 + 1
45 #define div5(x) (((x)*3277) >> 14)
46 #define div7(x) (((x)*2341) >> 14)
47
48 // Table with entry [a << 6 | b] = (2*a + b)/3 for 0 <= a,b < 64
49 #define avg23(x0,x1) avg23tab[((x0) << 6) | (x1)]
50
51 // Extract 5/6/5 RGB
52 #define red(x) (((x) >> 11) & 0x1f)
53 #define green(x) (((x) >> 5) & 0x3f)
54 #define blue(x) ( (x) & 0x1f)
55
56 /*
57 * Convert 5/6/5 RGB (as 3 ints) to 8/8/8
58 *
59 * Operation count: 8 <<, 0 &, 5 |
60 */
rgb565SepTo888(int r,int g,int b)61 inline static int rgb565SepTo888(int r, int g, int b)
62
63 {
64 return ((((r << 3) | (r >> 2)) << 16) |
65 (((g << 2) | (g >> 4)) << 8) |
66 ((b << 3) | (b >> 2)));
67 }
68
69 /*
70 * Convert 5/6/5 RGB (as a single 16-bit word) to 8/8/8
71 *
72 * r4r3r2r1 r0g5g4g3 g2g1g0b4 b3b2b1b0 rgb
73 * r4r3r2 r1r0g5g4 g3g2g1g0 b4b3b2b1 b0 0 0 0 rgb << 3
74 * r4r3r2r1 r0r4r3r2 g5g4g3g2 g1g0g5g4 b4b3b2b1 b0b4b3b2 desired result
75 *
76 * Construct the 24-bit RGB word as:
77 *
78 * r4r3r2r1 r0------ -------- -------- -------- -------- (rgb << 8) & 0xf80000
79 * r4r3r2 -------- -------- -------- -------- (rgb << 3) & 0x070000
80 * g5g4g3g2 g1g0---- -------- -------- (rgb << 5) & 0x00fc00
81 * g5g4 -------- -------- (rgb >> 1) & 0x000300
82 * b4b3b2b1 b0------ (rgb << 3) & 0x0000f8
83 * b4b3b2 (rgb >> 2) & 0x000007
84 *
85 * Operation count: 5 <<, 6 &, 5 | (n.b. rgb >> 3 is used twice)
86 */
rgb565To888(int rgb)87 inline static int rgb565To888(int rgb)
88
89 {
90 int rgb3 = rgb >> 3;
91 return (((rgb << 8) & 0xf80000) |
92 ( rgb3 & 0x070000) |
93 ((rgb << 5) & 0x00fc00) |
94 ((rgb >> 1) & 0x000300) |
95 ( rgb3 & 0x0000f8) |
96 ((rgb >> 2) & 0x000007));
97 }
98
99 #if __BYTE_ORDER == __BIG_ENDIAN
swap(uint32_t x)100 static uint32_t swap(uint32_t x) {
101 int b0 = (x >> 24) & 0xff;
102 int b1 = (x >> 16) & 0xff;
103 int b2 = (x >> 8) & 0xff;
104 int b3 = (x ) & 0xff;
105
106 return (uint32_t)((b3 << 24) | (b2 << 16) | (b1 << 8) | b0);
107 }
108 #endif
109
110 static void
init_tables()111 init_tables()
112 {
113 if (tables_initialized) {
114 return;
115 }
116
117 for (int i = 0; i < 64; i++) {
118 for (int j = 0; j < 64; j++) {
119 int avg = (2*i + j)/3;
120 avg23tab[(i << 6) | j] = avg;
121 }
122 }
123
124 asm volatile ("" : : : "memory");
125 tables_initialized = 1;
126 }
127
128 /*
129 * Utility to scan a DXT1 compressed texture to determine whether it
130 * contains a transparent pixel (color0 < color1, code == 3). This
131 * may be useful if the application lacks information as to whether
132 * the true format is GL_COMPRESSED_RGB_S3TC_DXT1_EXT or
133 * GL_COMPRESSED_RGBA_S3TC_DXT1_EXT.
134 */
135 bool
DXT1HasAlpha(const GLvoid * data,int width,int height)136 DXT1HasAlpha(const GLvoid *data, int width, int height) {
137 #if TIMING
138 struct timeval start_t, end_t;
139 struct timezone tz;
140
141 gettimeofday(&start_t, &tz);
142 #endif
143
144 bool hasAlpha = false;
145
146 int xblocks = (width + 3)/4;
147 int yblocks = (height + 3)/4;
148 int numblocks = xblocks*yblocks;
149
150 uint32_t const *d32 = (uint32_t *)data;
151 for (int b = 0; b < numblocks; b++) {
152 uint32_t colors = *d32++;
153
154 #if __BYTE_ORDER == __BIG_ENDIAN
155 colors = swap(colors);
156 #endif
157
158 uint16_t color0 = colors & 0xffff;
159 uint16_t color1 = colors >> 16;
160
161 if (color0 < color1) {
162 // There's no need to endian-swap within 'bits'
163 // since we don't care which pixel is the transparent one
164 uint32_t bits = *d32++;
165
166 // Detect if any (odd, even) pair of bits are '11'
167 // bits: b31 b30 b29 ... b3 b2 b1 b0
168 // bits >> 1: b31 b31 b30 ... b4 b3 b2 b1
169 // &: b31 (b31 & b30) (b29 & b28) ... (b2 & b1) (b1 & b0)
170 // & 0x55..: 0 (b31 & b30) 0 ... 0 (b1 & b0)
171 if (((bits & (bits >> 1)) & 0x55555555) != 0) {
172 hasAlpha = true;
173 goto done;
174 }
175 } else {
176 // Skip 4 bytes
177 ++d32;
178 }
179 }
180
181 done:
182 #if TIMING
183 gettimeofday(&end_t, &tz);
184 long usec = (end_t.tv_sec - start_t.tv_sec)*1000000 +
185 (end_t.tv_usec - start_t.tv_usec);
186
187 printf("Scanned w=%d h=%d in %ld usec\n", width, height, usec);
188 #endif
189
190 return hasAlpha;
191 }
192
193 static void
decodeDXT1(const GLvoid * data,int width,int height,void * surface,int stride,bool hasAlpha)194 decodeDXT1(const GLvoid *data, int width, int height,
195 void *surface, int stride,
196 bool hasAlpha)
197
198 {
199 init_tables();
200
201 uint32_t const *d32 = (uint32_t *)data;
202
203 // Color table for the current block
204 uint16_t c[4];
205 c[0] = c[1] = c[2] = c[3] = 0;
206
207 // Specified colors from the previous block
208 uint16_t prev_color0 = 0x0000;
209 uint16_t prev_color1 = 0x0000;
210
211 uint16_t* rowPtr = (uint16_t*)surface;
212 for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) {
213 uint16_t *blockPtr = rowPtr;
214 for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) {
215 uint32_t colors = *d32++;
216 uint32_t bits = *d32++;
217
218 #if __BYTE_ORDER == __BIG_ENDIAN
219 colors = swap(colors);
220 bits = swap(bits);
221 #endif
222
223 // Raw colors
224 uint16_t color0 = colors & 0xffff;
225 uint16_t color1 = colors >> 16;
226
227 // If the new block has the same base colors as the
228 // previous one, we don't need to recompute the color
229 // table c[]
230 if (color0 != prev_color0 || color1 != prev_color1) {
231 // Store raw colors for comparison with next block
232 prev_color0 = color0;
233 prev_color1 = color1;
234
235 int r0 = red(color0);
236 int g0 = green(color0);
237 int b0 = blue(color0);
238
239 int r1 = red(color1);
240 int g1 = green(color1);
241 int b1 = blue(color1);
242
243 if (hasAlpha) {
244 c[0] = (r0 << 11) | ((g0 >> 1) << 6) | (b0 << 1) | 0x1;
245 c[1] = (r1 << 11) | ((g1 >> 1) << 6) | (b1 << 1) | 0x1;
246 } else {
247 c[0] = color0;
248 c[1] = color1;
249 }
250
251 int r2, g2, b2, r3, g3, b3, a3;
252
253 int bbits = bits >> 1;
254 bool has2 = ((bbits & ~bits) & 0x55555555) != 0;
255 bool has3 = ((bbits & bits) & 0x55555555) != 0;
256
257 if (has2 || has3) {
258 if (color0 > color1) {
259 r2 = avg23(r0, r1);
260 g2 = avg23(g0, g1);
261 b2 = avg23(b0, b1);
262
263 r3 = avg23(r1, r0);
264 g3 = avg23(g1, g0);
265 b3 = avg23(b1, b0);
266 a3 = 1;
267 } else {
268 r2 = (r0 + r1) >> 1;
269 g2 = (g0 + g1) >> 1;
270 b2 = (b0 + b1) >> 1;
271
272 r3 = g3 = b3 = a3 = 0;
273 }
274 if (hasAlpha) {
275 c[2] = (r2 << 11) | ((g2 >> 1) << 6) |
276 (b2 << 1) | 0x1;
277 c[3] = (r3 << 11) | ((g3 >> 1) << 6) |
278 (b3 << 1) | a3;
279 } else {
280 c[2] = (r2 << 11) | (g2 << 5) | b2;
281 c[3] = (r3 << 11) | (g3 << 5) | b3;
282 }
283 }
284 }
285
286 uint16_t* blockRowPtr = blockPtr;
287 for (int y = 0; y < 4; y++, blockRowPtr += stride) {
288 // Don't process rows past the botom
289 if (base_y + y >= height) {
290 break;
291 }
292
293 int w = min(width - base_x, 4);
294 for (int x = 0; x < w; x++) {
295 int code = bits & 0x3;
296 bits >>= 2;
297
298 blockRowPtr[x] = c[code];
299 }
300 }
301 }
302 }
303 }
304
305 // Output data as internalformat=GL_RGBA, type=GL_UNSIGNED_BYTE
306 static void
decodeDXT3(const GLvoid * data,int width,int height,void * surface,int stride)307 decodeDXT3(const GLvoid *data, int width, int height,
308 void *surface, int stride)
309
310 {
311 init_tables();
312
313 uint32_t const *d32 = (uint32_t *)data;
314
315 // Specified colors from the previous block
316 uint16_t prev_color0 = 0x0000;
317 uint16_t prev_color1 = 0x0000;
318
319 // Color table for the current block
320 uint32_t c[4];
321 c[0] = c[1] = c[2] = c[3] = 0;
322
323 uint32_t* rowPtr = (uint32_t*)surface;
324 for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) {
325 uint32_t *blockPtr = rowPtr;
326 for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) {
327
328 #if __BYTE_ORDER == __BIG_ENDIAN
329 uint32_t alphahi = *d32++;
330 uint32_t alphalo = *d32++;
331 alphahi = swap(alphahi);
332 alphalo = swap(alphalo);
333 #else
334 uint32_t alphalo = *d32++;
335 uint32_t alphahi = *d32++;
336 #endif
337
338 uint32_t colors = *d32++;
339 uint32_t bits = *d32++;
340
341 #if __BYTE_ORDER == __BIG_ENDIAN
342 colors = swap(colors);
343 bits = swap(bits);
344 #endif
345
346 uint64_t alpha = ((uint64_t)alphahi << 32) | alphalo;
347
348 // Raw colors
349 uint16_t color0 = colors & 0xffff;
350 uint16_t color1 = colors >> 16;
351
352 // If the new block has the same base colors as the
353 // previous one, we don't need to recompute the color
354 // table c[]
355 if (color0 != prev_color0 || color1 != prev_color1) {
356 // Store raw colors for comparison with next block
357 prev_color0 = color0;
358 prev_color1 = color1;
359
360 int bbits = bits >> 1;
361 bool has2 = ((bbits & ~bits) & 0x55555555) != 0;
362 bool has3 = ((bbits & bits) & 0x55555555) != 0;
363
364 if (has2 || has3) {
365 int r0 = red(color0);
366 int g0 = green(color0);
367 int b0 = blue(color0);
368
369 int r1 = red(color1);
370 int g1 = green(color1);
371 int b1 = blue(color1);
372
373 int r2 = avg23(r0, r1);
374 int g2 = avg23(g0, g1);
375 int b2 = avg23(b0, b1);
376
377 int r3 = avg23(r1, r0);
378 int g3 = avg23(g1, g0);
379 int b3 = avg23(b1, b0);
380
381 c[0] = rgb565SepTo888(r0, g0, b0);
382 c[1] = rgb565SepTo888(r1, g1, b1);
383 c[2] = rgb565SepTo888(r2, g2, b2);
384 c[3] = rgb565SepTo888(r3, g3, b3);
385 } else {
386 // Convert to 8 bits
387 c[0] = rgb565To888(color0);
388 c[1] = rgb565To888(color1);
389 }
390 }
391
392 uint32_t* blockRowPtr = blockPtr;
393 for (int y = 0; y < 4; y++, blockRowPtr += stride) {
394 // Don't process rows past the botom
395 if (base_y + y >= height) {
396 break;
397 }
398
399 int w = min(width - base_x, 4);
400 for (int x = 0; x < w; x++) {
401 int a = alpha & 0xf;
402 alpha >>= 4;
403
404 int code = bits & 0x3;
405 bits >>= 2;
406
407 blockRowPtr[x] = c[code] | (a << 28) | (a << 24);
408 }
409 }
410 }
411 }
412 }
413
414 // Output data as internalformat=GL_RGBA, type=GL_UNSIGNED_BYTE
415 static void
decodeDXT5(const GLvoid * data,int width,int height,void * surface,int stride)416 decodeDXT5(const GLvoid *data, int width, int height,
417 void *surface, int stride)
418
419 {
420 init_tables();
421
422 uint32_t const *d32 = (uint32_t *)data;
423
424 // Specified alphas from the previous block
425 uint8_t prev_alpha0 = 0x00;
426 uint8_t prev_alpha1 = 0x00;
427
428 // Specified colors from the previous block
429 uint16_t prev_color0 = 0x0000;
430 uint16_t prev_color1 = 0x0000;
431
432 // Alpha table for the current block
433 uint8_t a[8];
434 a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = 0;
435
436 // Color table for the current block
437 uint32_t c[4];
438 c[0] = c[1] = c[2] = c[3] = 0;
439
440 int good_a5 = 0;
441 int bad_a5 = 0;
442 int good_a6 = 0;
443 int bad_a6 = 0;
444 int good_a7 = 0;
445 int bad_a7 = 0;
446
447 uint32_t* rowPtr = (uint32_t*)surface;
448 for (int base_y = 0; base_y < height; base_y += 4, rowPtr += 4*stride) {
449 uint32_t *blockPtr = rowPtr;
450 for (int base_x = 0; base_x < width; base_x += 4, blockPtr += 4) {
451
452 #if __BYTE_ORDER == __BIG_ENDIAN
453 uint32_t alphahi = *d32++;
454 uint32_t alphalo = *d32++;
455 alphahi = swap(alphahi);
456 alphalo = swap(alphalo);
457 #else
458 uint32_t alphalo = *d32++;
459 uint32_t alphahi = *d32++;
460 #endif
461
462 uint32_t colors = *d32++;
463 uint32_t bits = *d32++;
464
465 #if __BYTE_ORDER == __BIG_ENDIANx
466 colors = swap(colors);
467 bits = swap(bits);
468 #endif
469
470 uint64_t alpha = ((uint64_t)alphahi << 32) | alphalo;
471 uint64_t alpha0 = alpha & 0xff;
472 alpha >>= 8;
473 uint64_t alpha1 = alpha & 0xff;
474 alpha >>= 8;
475
476 if (alpha0 != prev_alpha0 || alpha1 != prev_alpha1) {
477 prev_alpha0 = alpha0;
478 prev_alpha1 = alpha1;
479
480 a[0] = alpha0;
481 a[1] = alpha1;
482 int a01 = alpha0 + alpha1 - 1;
483 if (alpha0 > alpha1) {
484 a[2] = div7(6*alpha0 + alpha1);
485 a[4] = div7(4*alpha0 + 3*alpha1);
486 a[6] = div7(2*alpha0 + 5*alpha1);
487
488 // Use symmetry to derive half of the values
489 // A few values will be off by 1 (~.5%)
490 // Alternate which values are computed directly
491 // and which are derived to try to reduce bias
492 a[3] = a01 - a[6];
493 a[5] = a01 - a[4];
494 a[7] = a01 - a[2];
495 } else {
496 a[2] = div5(4*alpha0 + alpha1);
497 a[4] = div5(2*alpha0 + 3*alpha1);
498 a[3] = a01 - a[4];
499 a[5] = a01 - a[2];
500 a[6] = 0x00;
501 a[7] = 0xff;
502 }
503 }
504
505 // Raw colors
506 uint16_t color0 = colors & 0xffff;
507 uint16_t color1 = colors >> 16;
508
509 // If the new block has the same base colors as the
510 // previous one, we don't need to recompute the color
511 // table c[]
512 if (color0 != prev_color0 || color1 != prev_color1) {
513 // Store raw colors for comparison with next block
514 prev_color0 = color0;
515 prev_color1 = color1;
516
517 int bbits = bits >> 1;
518 bool has2 = ((bbits & ~bits) & 0x55555555) != 0;
519 bool has3 = ((bbits & bits) & 0x55555555) != 0;
520
521 if (has2 || has3) {
522 int r0 = red(color0);
523 int g0 = green(color0);
524 int b0 = blue(color0);
525
526 int r1 = red(color1);
527 int g1 = green(color1);
528 int b1 = blue(color1);
529
530 int r2 = avg23(r0, r1);
531 int g2 = avg23(g0, g1);
532 int b2 = avg23(b0, b1);
533
534 int r3 = avg23(r1, r0);
535 int g3 = avg23(g1, g0);
536 int b3 = avg23(b1, b0);
537
538 c[0] = rgb565SepTo888(r0, g0, b0);
539 c[1] = rgb565SepTo888(r1, g1, b1);
540 c[2] = rgb565SepTo888(r2, g2, b2);
541 c[3] = rgb565SepTo888(r3, g3, b3);
542 } else {
543 // Convert to 8 bits
544 c[0] = rgb565To888(color0);
545 c[1] = rgb565To888(color1);
546 }
547 }
548
549 uint32_t* blockRowPtr = blockPtr;
550 for (int y = 0; y < 4; y++, blockRowPtr += stride) {
551 // Don't process rows past the botom
552 if (base_y + y >= height) {
553 break;
554 }
555
556 int w = min(width - base_x, 4);
557 for (int x = 0; x < w; x++) {
558 int acode = alpha & 0x7;
559 alpha >>= 3;
560
561 int code = bits & 0x3;
562 bits >>= 2;
563
564 blockRowPtr[x] = c[code] | (a[acode] << 24);
565 }
566 }
567 }
568 }
569 }
570
571 /*
572 * Decode a DXT-compressed texture into memory. DXT textures consist of
573 * a series of 4x4 pixel blocks in left-to-right, top-down order.
574 * The number of blocks is given by ceil(width/4)*ceil(height/4).
575 *
576 * 'data' points to the texture data. 'width' and 'height' indicate the
577 * dimensions of the texture. We assume width and height are >= 0 but
578 * do not require them to be powers of 2 or divisible by any factor.
579 *
580 * The output is written to 'surface' with each scanline separated by
581 * 'stride' 2- or 4-byte words.
582 *
583 * 'format' indicates the type of compression and must be one of the following:
584 *
585 * GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
586 * The output is written as 5/6/5 opaque RGB (16 bit words).
587 * 8 bytes are read from 'data' for each block.
588 *
589 * GL_COMPRESSED_RGBA_S3TC_DXT1_EXT
590 * The output is written as 5/5/5/1 RGBA (16 bit words)
591 * 8 bytes are read from 'data' for each block.
592 *
593 * GL_COMPRESSED_RGBA_S3TC_DXT3_EXT
594 * GL_COMPRESSED_RGBA_S3TC_DXT5_EXT
595 * The output is written as 8/8/8/8 ARGB (32 bit words)
596 * 16 bytes are read from 'data' for each block.
597 */
598 void
decodeDXT(const GLvoid * data,int width,int height,void * surface,int stride,int format)599 decodeDXT(const GLvoid *data, int width, int height,
600 void *surface, int stride, int format)
601 {
602 #if TIMING
603 struct timeval start_t, end_t;
604 struct timezone tz;
605
606 gettimeofday(&start_t, &tz);
607 #endif
608
609 switch (format) {
610 case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
611 decodeDXT1(data, width, height, surface, stride, false);
612 break;
613
614 case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
615 decodeDXT1(data, width, height, surface, stride, true);
616 break;
617
618 case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
619 decodeDXT3(data, width, height, surface, stride);
620 break;
621
622 case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
623 decodeDXT5(data, width, height, surface, stride);
624 break;
625 }
626
627 #if TIMING
628 gettimeofday(&end_t, &tz);
629 long usec = (end_t.tv_sec - start_t.tv_sec)*1000000 +
630 (end_t.tv_usec - start_t.tv_usec);
631
632 printf("Loaded w=%d h=%d in %ld usec\n", width, height, usec);
633 #endif
634 }
635
636 } // namespace android
637