1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 /**
27 * \file texcompress_fxt1.c
28 * GL_3DFX_texture_compression_FXT1 support.
29 */
30
31
32 #include "glheader.h"
33 #include "imports.h"
34 #include "image.h"
35 #include "macros.h"
36 #include "mipmap.h"
37 #include "texcompress.h"
38 #include "texcompress_fxt1.h"
39 #include "texstore.h"
40
41
42 static void
43 fxt1_encode (GLuint width, GLuint height, GLint comps,
44 const void *source, GLint srcRowStride,
45 void *dest, GLint destRowStride);
46
47 static void
48 fxt1_decode_1 (const void *texture, GLint stride,
49 GLint i, GLint j, GLubyte *rgba);
50
51
52 /**
53 * Store user's image in rgb_fxt1 format.
54 */
55 GLboolean
_mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)56 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
57 {
58 const GLubyte *pixels;
59 GLint srcRowStride;
60 GLubyte *dst;
61 const GLubyte *tempImage = NULL;
62
63 assert(dstFormat == MESA_FORMAT_RGB_FXT1);
64
65 if (srcFormat != GL_RGB ||
66 srcType != GL_UNSIGNED_BYTE ||
67 ctx->_ImageTransferState ||
68 ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
69 srcPacking->SwapBytes) {
70 /* convert image to RGB/GLubyte */
71 GLubyte *tempImageSlices[1];
72 int rgbRowStride = 3 * srcWidth * sizeof(GLubyte);
73 tempImage = malloc(srcWidth * srcHeight * 3 * sizeof(GLubyte));
74 if (!tempImage)
75 return GL_FALSE; /* out of memory */
76 tempImageSlices[0] = (GLubyte *) tempImage;
77 _mesa_texstore(ctx, dims,
78 baseInternalFormat,
79 MESA_FORMAT_RGB_UNORM8,
80 rgbRowStride, tempImageSlices,
81 srcWidth, srcHeight, srcDepth,
82 srcFormat, srcType, srcAddr,
83 srcPacking);
84 pixels = tempImage;
85 srcRowStride = 3 * srcWidth;
86 srcFormat = GL_RGB;
87 }
88 else {
89 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
90 srcFormat, srcType, 0, 0);
91
92 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
93 srcType) / sizeof(GLubyte);
94 }
95
96 dst = dstSlices[0];
97
98 fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
99 dst, dstRowStride);
100
101 free((void*) tempImage);
102
103 return GL_TRUE;
104 }
105
106
107 /**
108 * Store user's image in rgba_fxt1 format.
109 */
110 GLboolean
_mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)111 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
112 {
113 const GLubyte *pixels;
114 GLint srcRowStride;
115 GLubyte *dst;
116 const GLubyte *tempImage = NULL;
117
118 assert(dstFormat == MESA_FORMAT_RGBA_FXT1);
119
120 if (srcFormat != GL_RGBA ||
121 srcType != GL_UNSIGNED_BYTE ||
122 ctx->_ImageTransferState ||
123 srcPacking->SwapBytes) {
124 /* convert image to RGBA/GLubyte */
125 GLubyte *tempImageSlices[1];
126 int rgbaRowStride = 4 * srcWidth * sizeof(GLubyte);
127 tempImage = malloc(srcWidth * srcHeight * 4 * sizeof(GLubyte));
128 if (!tempImage)
129 return GL_FALSE; /* out of memory */
130 tempImageSlices[0] = (GLubyte *) tempImage;
131 _mesa_texstore(ctx, dims,
132 baseInternalFormat,
133 _mesa_little_endian() ? MESA_FORMAT_R8G8B8A8_UNORM
134 : MESA_FORMAT_A8B8G8R8_UNORM,
135 rgbaRowStride, tempImageSlices,
136 srcWidth, srcHeight, srcDepth,
137 srcFormat, srcType, srcAddr,
138 srcPacking);
139 pixels = tempImage;
140 srcRowStride = 4 * srcWidth;
141 srcFormat = GL_RGBA;
142 }
143 else {
144 pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
145 srcFormat, srcType, 0, 0);
146
147 srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
148 srcType) / sizeof(GLubyte);
149 }
150
151 dst = dstSlices[0];
152
153 fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
154 dst, dstRowStride);
155
156 free((void*) tempImage);
157
158 return GL_TRUE;
159 }
160
161
162 /***************************************************************************\
163 * FXT1 encoder
164 *
165 * The encoder was built by reversing the decoder,
166 * and is vaguely based on Texus2 by 3dfx. Note that this code
167 * is merely a proof of concept, since it is highly UNoptimized;
168 * moreover, it is sub-optimal due to initial conditions passed
169 * to Lloyd's algorithm (the interpolation modes are even worse).
170 \***************************************************************************/
171
172
173 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
174 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
175 #define N_TEXELS 32 /* number of texels in a block (always 32) */
176 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
177 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
178 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
179 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
180 static const GLuint zero = 0;
181 #define ISTBLACK(v) (memcmp(&(v), &zero, sizeof(zero)) == 0)
182
183 /*
184 * Define a 64-bit unsigned integer type and macros
185 */
186 #if 1
187
188 #define FX64_NATIVE 1
189
190 typedef uint64_t Fx64;
191
192 #define FX64_MOV32(a, b) a = b
193 #define FX64_OR32(a, b) a |= b
194 #define FX64_SHL(a, c) a <<= c
195
196 #else
197
198 #define FX64_NATIVE 0
199
200 typedef struct {
201 GLuint lo, hi;
202 } Fx64;
203
204 #define FX64_MOV32(a, b) a.lo = b
205 #define FX64_OR32(a, b) a.lo |= b
206
207 #define FX64_SHL(a, c) \
208 do { \
209 if ((c) >= 32) { \
210 a.hi = a.lo << ((c) - 32); \
211 a.lo = 0; \
212 } else { \
213 a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
214 a.lo <<= (c); \
215 } \
216 } while (0)
217
218 #endif
219
220
221 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
222 #define SAFECDOT 1 /* for paranoids */
223
224 #define MAKEIVEC(NV, NC, IV, B, V0, V1) \
225 do { \
226 /* compute interpolation vector */ \
227 GLfloat d2 = 0.0F; \
228 GLfloat rd2; \
229 \
230 for (i = 0; i < NC; i++) { \
231 IV[i] = (V1[i] - V0[i]) * F(i); \
232 d2 += IV[i] * IV[i]; \
233 } \
234 rd2 = (GLfloat)NV / d2; \
235 B = 0; \
236 for (i = 0; i < NC; i++) { \
237 IV[i] *= F(i); \
238 B -= IV[i] * V0[i]; \
239 IV[i] *= rd2; \
240 } \
241 B = B * rd2 + 0.5f; \
242 } while (0)
243
244 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
245 do { \
246 GLfloat dot = 0.0F; \
247 for (i = 0; i < NC; i++) { \
248 dot += V[i] * IV[i]; \
249 } \
250 TEXEL = (GLint)(dot + B); \
251 if (SAFECDOT) { \
252 if (TEXEL < 0) { \
253 TEXEL = 0; \
254 } else if (TEXEL > NV) { \
255 TEXEL = NV; \
256 } \
257 } \
258 } while (0)
259
260
261 static GLint
fxt1_bestcol(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[MAX_COMP],GLint nc)262 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
263 GLubyte input[MAX_COMP], GLint nc)
264 {
265 GLint i, j, best = -1;
266 GLfloat err = 1e9; /* big enough */
267
268 for (j = 0; j < nv; j++) {
269 GLfloat e = 0.0F;
270 for (i = 0; i < nc; i++) {
271 e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
272 }
273 if (e < err) {
274 err = e;
275 best = j;
276 }
277 }
278
279 return best;
280 }
281
282
283 static GLint
fxt1_worst(GLfloat vec[MAX_COMP],GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)284 fxt1_worst (GLfloat vec[MAX_COMP],
285 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
286 {
287 GLint i, k, worst = -1;
288 GLfloat err = -1.0F; /* small enough */
289
290 for (k = 0; k < n; k++) {
291 GLfloat e = 0.0F;
292 for (i = 0; i < nc; i++) {
293 e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
294 }
295 if (e > err) {
296 err = e;
297 worst = k;
298 }
299 }
300
301 return worst;
302 }
303
304
305 static GLint
fxt1_variance(GLdouble variance[MAX_COMP],GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)306 fxt1_variance (GLdouble variance[MAX_COMP],
307 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
308 {
309 GLint i, k, best = 0;
310 GLint sx, sx2;
311 GLdouble var, maxvar = -1; /* small enough */
312 GLdouble teenth = 1.0 / n;
313
314 for (i = 0; i < nc; i++) {
315 sx = sx2 = 0;
316 for (k = 0; k < n; k++) {
317 GLint t = input[k][i];
318 sx += t;
319 sx2 += t * t;
320 }
321 var = sx2 * teenth - sx * sx * teenth * teenth;
322 if (maxvar < var) {
323 maxvar = var;
324 best = i;
325 }
326 if (variance) {
327 variance[i] = var;
328 }
329 }
330
331 return best;
332 }
333
334
335 static GLint
fxt1_choose(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)336 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
337 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
338 {
339 #if 0
340 /* Choose colors from a grid.
341 */
342 GLint i, j;
343
344 for (j = 0; j < nv; j++) {
345 GLint m = j * (n - 1) / (nv - 1);
346 for (i = 0; i < nc; i++) {
347 vec[j][i] = input[m][i];
348 }
349 }
350 #else
351 /* Our solution here is to find the darkest and brightest colors in
352 * the 8x4 tile and use those as the two representative colors.
353 * There are probably better algorithms to use (histogram-based).
354 */
355 GLint i, j, k;
356 GLint minSum = 2000; /* big enough */
357 GLint maxSum = -1; /* small enough */
358 GLint minCol = 0; /* phoudoin: silent compiler! */
359 GLint maxCol = 0; /* phoudoin: silent compiler! */
360
361 struct {
362 GLint flag;
363 GLint key;
364 GLint freq;
365 GLint idx;
366 } hist[N_TEXELS];
367 GLint lenh = 0;
368
369 memset(hist, 0, sizeof(hist));
370
371 for (k = 0; k < n; k++) {
372 GLint l;
373 GLint key = 0;
374 GLint sum = 0;
375 for (i = 0; i < nc; i++) {
376 key <<= 8;
377 key |= input[k][i];
378 sum += input[k][i];
379 }
380 for (l = 0; l < n; l++) {
381 if (!hist[l].flag) {
382 /* alloc new slot */
383 hist[l].flag = !0;
384 hist[l].key = key;
385 hist[l].freq = 1;
386 hist[l].idx = k;
387 lenh = l + 1;
388 break;
389 } else if (hist[l].key == key) {
390 hist[l].freq++;
391 break;
392 }
393 }
394 if (minSum > sum) {
395 minSum = sum;
396 minCol = k;
397 }
398 if (maxSum < sum) {
399 maxSum = sum;
400 maxCol = k;
401 }
402 }
403
404 if (lenh <= nv) {
405 for (j = 0; j < lenh; j++) {
406 for (i = 0; i < nc; i++) {
407 vec[j][i] = (GLfloat)input[hist[j].idx][i];
408 }
409 }
410 for (; j < nv; j++) {
411 for (i = 0; i < nc; i++) {
412 vec[j][i] = vec[0][i];
413 }
414 }
415 return 0;
416 }
417
418 for (j = 0; j < nv; j++) {
419 for (i = 0; i < nc; i++) {
420 vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
421 }
422 }
423 #endif
424
425 return !0;
426 }
427
428
429 static GLint
fxt1_lloyd(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)430 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
431 GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
432 {
433 /* Use the generalized lloyd's algorithm for VQ:
434 * find 4 color vectors.
435 *
436 * for each sample color
437 * sort to nearest vector.
438 *
439 * replace each vector with the centroid of its matching colors.
440 *
441 * repeat until RMS doesn't improve.
442 *
443 * if a color vector has no samples, or becomes the same as another
444 * vector, replace it with the color which is farthest from a sample.
445 *
446 * vec[][MAX_COMP] initial vectors and resulting colors
447 * nv number of resulting colors required
448 * input[N_TEXELS][MAX_COMP] input texels
449 * nc number of components in input / vec
450 * n number of input samples
451 */
452
453 GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
454 GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
455 GLfloat error, lasterror = 1e9;
456
457 GLint i, j, k, rep;
458
459 /* the quantizer */
460 for (rep = 0; rep < LL_N_REP; rep++) {
461 /* reset sums & counters */
462 for (j = 0; j < nv; j++) {
463 for (i = 0; i < nc; i++) {
464 sum[j][i] = 0;
465 }
466 cnt[j] = 0;
467 }
468 error = 0;
469
470 /* scan whole block */
471 for (k = 0; k < n; k++) {
472 #if 1
473 GLint best = -1;
474 GLfloat err = 1e9; /* big enough */
475 /* determine best vector */
476 for (j = 0; j < nv; j++) {
477 GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
478 (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
479 (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
480 if (nc == 4) {
481 e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
482 }
483 if (e < err) {
484 err = e;
485 best = j;
486 }
487 }
488 #else
489 GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
490 #endif
491 assert(best >= 0);
492 /* add in closest color */
493 for (i = 0; i < nc; i++) {
494 sum[best][i] += input[k][i];
495 }
496 /* mark this vector as used */
497 cnt[best]++;
498 /* accumulate error */
499 error += err;
500 }
501
502 /* check RMS */
503 if ((error < LL_RMS_E) ||
504 ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
505 return !0; /* good match */
506 }
507 lasterror = error;
508
509 /* move each vector to the barycenter of its closest colors */
510 for (j = 0; j < nv; j++) {
511 if (cnt[j]) {
512 GLfloat div = 1.0F / cnt[j];
513 for (i = 0; i < nc; i++) {
514 vec[j][i] = div * sum[j][i];
515 }
516 } else {
517 /* this vec has no samples or is identical with a previous vec */
518 GLint worst = fxt1_worst(vec[j], input, nc, n);
519 for (i = 0; i < nc; i++) {
520 vec[j][i] = input[worst][i];
521 }
522 }
523 }
524 }
525
526 return 0; /* could not converge fast enough */
527 }
528
529
530 static void
fxt1_quantize_CHROMA(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])531 fxt1_quantize_CHROMA (GLuint *cc,
532 GLubyte input[N_TEXELS][MAX_COMP])
533 {
534 const GLint n_vect = 4; /* 4 base vectors to find */
535 const GLint n_comp = 3; /* 3 components: R, G, B */
536 GLfloat vec[MAX_VECT][MAX_COMP];
537 GLint i, j, k;
538 Fx64 hi; /* high quadword */
539 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
540
541 if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
542 fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
543 }
544
545 FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
546 for (j = n_vect - 1; j >= 0; j--) {
547 for (i = 0; i < n_comp; i++) {
548 /* add in colors */
549 FX64_SHL(hi, 5);
550 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
551 }
552 }
553 ((Fx64 *)cc)[1] = hi;
554
555 lohi = lolo = 0;
556 /* right microtile */
557 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
558 lohi <<= 2;
559 lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
560 }
561 /* left microtile */
562 for (; k >= 0; k--) {
563 lolo <<= 2;
564 lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
565 }
566 cc[1] = lohi;
567 cc[0] = lolo;
568 }
569
570
571 static void
fxt1_quantize_ALPHA0(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP],GLubyte reord[N_TEXELS][MAX_COMP],GLint n)572 fxt1_quantize_ALPHA0 (GLuint *cc,
573 GLubyte input[N_TEXELS][MAX_COMP],
574 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
575 {
576 const GLint n_vect = 3; /* 3 base vectors to find */
577 const GLint n_comp = 4; /* 4 components: R, G, B, A */
578 GLfloat vec[MAX_VECT][MAX_COMP];
579 GLint i, j, k;
580 Fx64 hi; /* high quadword */
581 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
582
583 /* the last vector indicates zero */
584 for (i = 0; i < n_comp; i++) {
585 vec[n_vect][i] = 0;
586 }
587
588 /* the first n texels in reord are guaranteed to be non-zero */
589 if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
590 fxt1_lloyd(vec, n_vect, reord, n_comp, n);
591 }
592
593 FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
594 for (j = n_vect - 1; j >= 0; j--) {
595 /* add in alphas */
596 FX64_SHL(hi, 5);
597 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
598 }
599 for (j = n_vect - 1; j >= 0; j--) {
600 for (i = 0; i < n_comp - 1; i++) {
601 /* add in colors */
602 FX64_SHL(hi, 5);
603 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
604 }
605 }
606 ((Fx64 *)cc)[1] = hi;
607
608 lohi = lolo = 0;
609 /* right microtile */
610 for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
611 lohi <<= 2;
612 lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
613 }
614 /* left microtile */
615 for (; k >= 0; k--) {
616 lolo <<= 2;
617 lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
618 }
619 cc[1] = lohi;
620 cc[0] = lolo;
621 }
622
623
624 static void
fxt1_quantize_ALPHA1(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])625 fxt1_quantize_ALPHA1 (GLuint *cc,
626 GLubyte input[N_TEXELS][MAX_COMP])
627 {
628 const GLint n_vect = 3; /* highest vector number in each microtile */
629 const GLint n_comp = 4; /* 4 components: R, G, B, A */
630 GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
631 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
632 GLint i, j, k;
633 Fx64 hi; /* high quadword */
634 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
635
636 GLint minSum;
637 GLint maxSum;
638 GLint minColL = 0, maxColL = 0;
639 GLint minColR = 0, maxColR = 0;
640 GLint sumL = 0, sumR = 0;
641 GLint nn_comp;
642 /* Our solution here is to find the darkest and brightest colors in
643 * the 4x4 tile and use those as the two representative colors.
644 * There are probably better algorithms to use (histogram-based).
645 */
646 nn_comp = n_comp;
647 while ((minColL == maxColL) && nn_comp) {
648 minSum = 2000; /* big enough */
649 maxSum = -1; /* small enough */
650 for (k = 0; k < N_TEXELS / 2; k++) {
651 GLint sum = 0;
652 for (i = 0; i < nn_comp; i++) {
653 sum += input[k][i];
654 }
655 if (minSum > sum) {
656 minSum = sum;
657 minColL = k;
658 }
659 if (maxSum < sum) {
660 maxSum = sum;
661 maxColL = k;
662 }
663 sumL += sum;
664 }
665
666 nn_comp--;
667 }
668
669 nn_comp = n_comp;
670 while ((minColR == maxColR) && nn_comp) {
671 minSum = 2000; /* big enough */
672 maxSum = -1; /* small enough */
673 for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
674 GLint sum = 0;
675 for (i = 0; i < nn_comp; i++) {
676 sum += input[k][i];
677 }
678 if (minSum > sum) {
679 minSum = sum;
680 minColR = k;
681 }
682 if (maxSum < sum) {
683 maxSum = sum;
684 maxColR = k;
685 }
686 sumR += sum;
687 }
688
689 nn_comp--;
690 }
691
692 /* choose the common vector (yuck!) */
693 {
694 GLint j1, j2;
695 GLint v1 = 0, v2 = 0;
696 GLfloat err = 1e9; /* big enough */
697 GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
698 for (i = 0; i < n_comp; i++) {
699 tv[0][i] = input[minColL][i];
700 tv[1][i] = input[maxColL][i];
701 tv[2][i] = input[minColR][i];
702 tv[3][i] = input[maxColR][i];
703 }
704 for (j1 = 0; j1 < 2; j1++) {
705 for (j2 = 2; j2 < 4; j2++) {
706 GLfloat e = 0.0F;
707 for (i = 0; i < n_comp; i++) {
708 e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
709 }
710 if (e < err) {
711 err = e;
712 v1 = j1;
713 v2 = j2;
714 }
715 }
716 }
717 for (i = 0; i < n_comp; i++) {
718 vec[0][i] = tv[1 - v1][i];
719 vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
720 vec[2][i] = tv[5 - v2][i];
721 }
722 }
723
724 /* left microtile */
725 cc[0] = 0;
726 if (minColL != maxColL) {
727 /* compute interpolation vector */
728 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
729
730 /* add in texels */
731 lolo = 0;
732 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
733 GLint texel;
734 /* interpolate color */
735 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
736 /* add in texel */
737 lolo <<= 2;
738 lolo |= texel;
739 }
740
741 cc[0] = lolo;
742 }
743
744 /* right microtile */
745 cc[1] = 0;
746 if (minColR != maxColR) {
747 /* compute interpolation vector */
748 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
749
750 /* add in texels */
751 lohi = 0;
752 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
753 GLint texel;
754 /* interpolate color */
755 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
756 /* add in texel */
757 lohi <<= 2;
758 lohi |= texel;
759 }
760
761 cc[1] = lohi;
762 }
763
764 FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
765 for (j = n_vect - 1; j >= 0; j--) {
766 /* add in alphas */
767 FX64_SHL(hi, 5);
768 FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
769 }
770 for (j = n_vect - 1; j >= 0; j--) {
771 for (i = 0; i < n_comp - 1; i++) {
772 /* add in colors */
773 FX64_SHL(hi, 5);
774 FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
775 }
776 }
777 ((Fx64 *)cc)[1] = hi;
778 }
779
780
781 static void
fxt1_quantize_HI(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP],GLubyte reord[N_TEXELS][MAX_COMP],GLint n)782 fxt1_quantize_HI (GLuint *cc,
783 GLubyte input[N_TEXELS][MAX_COMP],
784 GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
785 {
786 const GLint n_vect = 6; /* highest vector number */
787 const GLint n_comp = 3; /* 3 components: R, G, B */
788 GLfloat b = 0.0F; /* phoudoin: silent compiler! */
789 GLfloat iv[MAX_COMP]; /* interpolation vector */
790 GLint i, k;
791 GLuint hihi; /* high quadword: hi dword */
792
793 GLint minSum = 2000; /* big enough */
794 GLint maxSum = -1; /* small enough */
795 GLint minCol = 0; /* phoudoin: silent compiler! */
796 GLint maxCol = 0; /* phoudoin: silent compiler! */
797
798 /* Our solution here is to find the darkest and brightest colors in
799 * the 8x4 tile and use those as the two representative colors.
800 * There are probably better algorithms to use (histogram-based).
801 */
802 for (k = 0; k < n; k++) {
803 GLint sum = 0;
804 for (i = 0; i < n_comp; i++) {
805 sum += reord[k][i];
806 }
807 if (minSum > sum) {
808 minSum = sum;
809 minCol = k;
810 }
811 if (maxSum < sum) {
812 maxSum = sum;
813 maxCol = k;
814 }
815 }
816
817 hihi = 0; /* cc-hi = "00" */
818 for (i = 0; i < n_comp; i++) {
819 /* add in colors */
820 hihi <<= 5;
821 hihi |= reord[maxCol][i] >> 3;
822 }
823 for (i = 0; i < n_comp; i++) {
824 /* add in colors */
825 hihi <<= 5;
826 hihi |= reord[minCol][i] >> 3;
827 }
828 cc[3] = hihi;
829 cc[0] = cc[1] = cc[2] = 0;
830
831 /* compute interpolation vector */
832 if (minCol != maxCol) {
833 MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
834 }
835
836 /* add in texels */
837 for (k = N_TEXELS - 1; k >= 0; k--) {
838 GLint t = k * 3;
839 GLuint *kk = (GLuint *)((char *)cc + t / 8);
840 GLint texel = n_vect + 1; /* transparent black */
841
842 if (!ISTBLACK(input[k])) {
843 if (minCol != maxCol) {
844 /* interpolate color */
845 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
846 /* add in texel */
847 kk[0] |= texel << (t & 7);
848 }
849 } else {
850 /* add in texel */
851 kk[0] |= texel << (t & 7);
852 }
853 }
854 }
855
856
857 static void
fxt1_quantize_MIXED1(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])858 fxt1_quantize_MIXED1 (GLuint *cc,
859 GLubyte input[N_TEXELS][MAX_COMP])
860 {
861 const GLint n_vect = 2; /* highest vector number in each microtile */
862 const GLint n_comp = 3; /* 3 components: R, G, B */
863 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
864 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
865 GLint i, j, k;
866 Fx64 hi; /* high quadword */
867 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
868
869 GLint minSum;
870 GLint maxSum;
871 GLint minColL = 0, maxColL = -1;
872 GLint minColR = 0, maxColR = -1;
873
874 /* Our solution here is to find the darkest and brightest colors in
875 * the 4x4 tile and use those as the two representative colors.
876 * There are probably better algorithms to use (histogram-based).
877 */
878 minSum = 2000; /* big enough */
879 maxSum = -1; /* small enough */
880 for (k = 0; k < N_TEXELS / 2; k++) {
881 if (!ISTBLACK(input[k])) {
882 GLint sum = 0;
883 for (i = 0; i < n_comp; i++) {
884 sum += input[k][i];
885 }
886 if (minSum > sum) {
887 minSum = sum;
888 minColL = k;
889 }
890 if (maxSum < sum) {
891 maxSum = sum;
892 maxColL = k;
893 }
894 }
895 }
896 minSum = 2000; /* big enough */
897 maxSum = -1; /* small enough */
898 for (; k < N_TEXELS; k++) {
899 if (!ISTBLACK(input[k])) {
900 GLint sum = 0;
901 for (i = 0; i < n_comp; i++) {
902 sum += input[k][i];
903 }
904 if (minSum > sum) {
905 minSum = sum;
906 minColR = k;
907 }
908 if (maxSum < sum) {
909 maxSum = sum;
910 maxColR = k;
911 }
912 }
913 }
914
915 /* left microtile */
916 if (maxColL == -1) {
917 /* all transparent black */
918 cc[0] = ~0u;
919 for (i = 0; i < n_comp; i++) {
920 vec[0][i] = 0;
921 vec[1][i] = 0;
922 }
923 } else {
924 cc[0] = 0;
925 for (i = 0; i < n_comp; i++) {
926 vec[0][i] = input[minColL][i];
927 vec[1][i] = input[maxColL][i];
928 }
929 if (minColL != maxColL) {
930 /* compute interpolation vector */
931 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
932
933 /* add in texels */
934 lolo = 0;
935 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
936 GLint texel = n_vect + 1; /* transparent black */
937 if (!ISTBLACK(input[k])) {
938 /* interpolate color */
939 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
940 }
941 /* add in texel */
942 lolo <<= 2;
943 lolo |= texel;
944 }
945 cc[0] = lolo;
946 }
947 }
948
949 /* right microtile */
950 if (maxColR == -1) {
951 /* all transparent black */
952 cc[1] = ~0u;
953 for (i = 0; i < n_comp; i++) {
954 vec[2][i] = 0;
955 vec[3][i] = 0;
956 }
957 } else {
958 cc[1] = 0;
959 for (i = 0; i < n_comp; i++) {
960 vec[2][i] = input[minColR][i];
961 vec[3][i] = input[maxColR][i];
962 }
963 if (minColR != maxColR) {
964 /* compute interpolation vector */
965 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
966
967 /* add in texels */
968 lohi = 0;
969 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
970 GLint texel = n_vect + 1; /* transparent black */
971 if (!ISTBLACK(input[k])) {
972 /* interpolate color */
973 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
974 }
975 /* add in texel */
976 lohi <<= 2;
977 lohi |= texel;
978 }
979 cc[1] = lohi;
980 }
981 }
982
983 FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
984 for (j = 2 * 2 - 1; j >= 0; j--) {
985 for (i = 0; i < n_comp; i++) {
986 /* add in colors */
987 FX64_SHL(hi, 5);
988 FX64_OR32(hi, vec[j][i] >> 3);
989 }
990 }
991 ((Fx64 *)cc)[1] = hi;
992 }
993
994
995 static void
fxt1_quantize_MIXED0(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])996 fxt1_quantize_MIXED0 (GLuint *cc,
997 GLubyte input[N_TEXELS][MAX_COMP])
998 {
999 const GLint n_vect = 3; /* highest vector number in each microtile */
1000 const GLint n_comp = 3; /* 3 components: R, G, B */
1001 GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1002 GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1003 GLint i, j, k;
1004 Fx64 hi; /* high quadword */
1005 GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1006
1007 GLint minColL = 0, maxColL = 0;
1008 GLint minColR = 0, maxColR = 0;
1009 #if 0
1010 GLint minSum;
1011 GLint maxSum;
1012
1013 /* Our solution here is to find the darkest and brightest colors in
1014 * the 4x4 tile and use those as the two representative colors.
1015 * There are probably better algorithms to use (histogram-based).
1016 */
1017 minSum = 2000; /* big enough */
1018 maxSum = -1; /* small enough */
1019 for (k = 0; k < N_TEXELS / 2; k++) {
1020 GLint sum = 0;
1021 for (i = 0; i < n_comp; i++) {
1022 sum += input[k][i];
1023 }
1024 if (minSum > sum) {
1025 minSum = sum;
1026 minColL = k;
1027 }
1028 if (maxSum < sum) {
1029 maxSum = sum;
1030 maxColL = k;
1031 }
1032 }
1033 minSum = 2000; /* big enough */
1034 maxSum = -1; /* small enough */
1035 for (; k < N_TEXELS; k++) {
1036 GLint sum = 0;
1037 for (i = 0; i < n_comp; i++) {
1038 sum += input[k][i];
1039 }
1040 if (minSum > sum) {
1041 minSum = sum;
1042 minColR = k;
1043 }
1044 if (maxSum < sum) {
1045 maxSum = sum;
1046 maxColR = k;
1047 }
1048 }
1049 #else
1050 GLint minVal;
1051 GLint maxVal;
1052 GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1053 GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1054
1055 /* Scan the channel with max variance for lo & hi
1056 * and use those as the two representative colors.
1057 */
1058 minVal = 2000; /* big enough */
1059 maxVal = -1; /* small enough */
1060 for (k = 0; k < N_TEXELS / 2; k++) {
1061 GLint t = input[k][maxVarL];
1062 if (minVal > t) {
1063 minVal = t;
1064 minColL = k;
1065 }
1066 if (maxVal < t) {
1067 maxVal = t;
1068 maxColL = k;
1069 }
1070 }
1071 minVal = 2000; /* big enough */
1072 maxVal = -1; /* small enough */
1073 for (; k < N_TEXELS; k++) {
1074 GLint t = input[k][maxVarR];
1075 if (minVal > t) {
1076 minVal = t;
1077 minColR = k;
1078 }
1079 if (maxVal < t) {
1080 maxVal = t;
1081 maxColR = k;
1082 }
1083 }
1084 #endif
1085
1086 /* left microtile */
1087 cc[0] = 0;
1088 for (i = 0; i < n_comp; i++) {
1089 vec[0][i] = input[minColL][i];
1090 vec[1][i] = input[maxColL][i];
1091 }
1092 if (minColL != maxColL) {
1093 /* compute interpolation vector */
1094 MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1095
1096 /* add in texels */
1097 lolo = 0;
1098 for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1099 GLint texel;
1100 /* interpolate color */
1101 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1102 /* add in texel */
1103 lolo <<= 2;
1104 lolo |= texel;
1105 }
1106
1107 /* funky encoding for LSB of green */
1108 if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1109 for (i = 0; i < n_comp; i++) {
1110 vec[1][i] = input[minColL][i];
1111 vec[0][i] = input[maxColL][i];
1112 }
1113 lolo = ~lolo;
1114 }
1115
1116 cc[0] = lolo;
1117 }
1118
1119 /* right microtile */
1120 cc[1] = 0;
1121 for (i = 0; i < n_comp; i++) {
1122 vec[2][i] = input[minColR][i];
1123 vec[3][i] = input[maxColR][i];
1124 }
1125 if (minColR != maxColR) {
1126 /* compute interpolation vector */
1127 MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1128
1129 /* add in texels */
1130 lohi = 0;
1131 for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1132 GLint texel;
1133 /* interpolate color */
1134 CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1135 /* add in texel */
1136 lohi <<= 2;
1137 lohi |= texel;
1138 }
1139
1140 /* funky encoding for LSB of green */
1141 if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1142 for (i = 0; i < n_comp; i++) {
1143 vec[3][i] = input[minColR][i];
1144 vec[2][i] = input[maxColR][i];
1145 }
1146 lohi = ~lohi;
1147 }
1148
1149 cc[1] = lohi;
1150 }
1151
1152 FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1153 for (j = 2 * 2 - 1; j >= 0; j--) {
1154 for (i = 0; i < n_comp; i++) {
1155 /* add in colors */
1156 FX64_SHL(hi, 5);
1157 FX64_OR32(hi, vec[j][i] >> 3);
1158 }
1159 }
1160 ((Fx64 *)cc)[1] = hi;
1161 }
1162
1163
1164 static void
fxt1_quantize(GLuint * cc,const GLubyte * lines[],GLint comps)1165 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1166 {
1167 GLint trualpha;
1168 GLubyte reord[N_TEXELS][MAX_COMP];
1169
1170 GLubyte input[N_TEXELS][MAX_COMP];
1171 GLint i, k, l;
1172
1173 if (comps == 3) {
1174 /* make the whole block opaque */
1175 memset(input, -1, sizeof(input));
1176 }
1177
1178 /* 8 texels each line */
1179 for (l = 0; l < 4; l++) {
1180 for (k = 0; k < 4; k++) {
1181 for (i = 0; i < comps; i++) {
1182 input[k + l * 4][i] = *lines[l]++;
1183 }
1184 }
1185 for (; k < 8; k++) {
1186 for (i = 0; i < comps; i++) {
1187 input[k + l * 4 + 12][i] = *lines[l]++;
1188 }
1189 }
1190 }
1191
1192 /* block layout:
1193 * 00, 01, 02, 03, 08, 09, 0a, 0b
1194 * 10, 11, 12, 13, 18, 19, 1a, 1b
1195 * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1196 * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1197 */
1198
1199 /* [dBorca]
1200 * stupidity flows forth from this
1201 */
1202 l = N_TEXELS;
1203 trualpha = 0;
1204 if (comps == 4) {
1205 /* skip all transparent black texels */
1206 l = 0;
1207 for (k = 0; k < N_TEXELS; k++) {
1208 /* test all components against 0 */
1209 if (!ISTBLACK(input[k])) {
1210 /* texel is not transparent black */
1211 COPY_4UBV(reord[l], input[k]);
1212 if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1213 /* non-opaque texel */
1214 trualpha = !0;
1215 }
1216 l++;
1217 }
1218 }
1219 }
1220
1221 #if 0
1222 if (trualpha) {
1223 fxt1_quantize_ALPHA0(cc, input, reord, l);
1224 } else if (l == 0) {
1225 cc[0] = cc[1] = cc[2] = -1;
1226 cc[3] = 0;
1227 } else if (l < N_TEXELS) {
1228 fxt1_quantize_HI(cc, input, reord, l);
1229 } else {
1230 fxt1_quantize_CHROMA(cc, input);
1231 }
1232 (void)fxt1_quantize_ALPHA1;
1233 (void)fxt1_quantize_MIXED1;
1234 (void)fxt1_quantize_MIXED0;
1235 #else
1236 if (trualpha) {
1237 fxt1_quantize_ALPHA1(cc, input);
1238 } else if (l == 0) {
1239 cc[0] = cc[1] = cc[2] = ~0u;
1240 cc[3] = 0;
1241 } else if (l < N_TEXELS) {
1242 fxt1_quantize_MIXED1(cc, input);
1243 } else {
1244 fxt1_quantize_MIXED0(cc, input);
1245 }
1246 (void)fxt1_quantize_ALPHA0;
1247 (void)fxt1_quantize_HI;
1248 (void)fxt1_quantize_CHROMA;
1249 #endif
1250 }
1251
1252
1253
1254 /**
1255 * Upscale an image by replication, not (typical) stretching.
1256 * We use this when the image width or height is less than a
1257 * certain size (4, 8) and we need to upscale an image.
1258 */
1259 static void
upscale_teximage2d(GLsizei inWidth,GLsizei inHeight,GLsizei outWidth,GLsizei outHeight,GLint comps,const GLubyte * src,GLint srcRowStride,GLubyte * dest)1260 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1261 GLsizei outWidth, GLsizei outHeight,
1262 GLint comps, const GLubyte *src, GLint srcRowStride,
1263 GLubyte *dest )
1264 {
1265 GLint i, j, k;
1266
1267 assert(outWidth >= inWidth);
1268 assert(outHeight >= inHeight);
1269 #if 0
1270 assert(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1271 assert((outWidth & 3) == 0);
1272 assert((outHeight & 3) == 0);
1273 #endif
1274
1275 for (i = 0; i < outHeight; i++) {
1276 const GLint ii = i % inHeight;
1277 for (j = 0; j < outWidth; j++) {
1278 const GLint jj = j % inWidth;
1279 for (k = 0; k < comps; k++) {
1280 dest[(i * outWidth + j) * comps + k]
1281 = src[ii * srcRowStride + jj * comps + k];
1282 }
1283 }
1284 }
1285 }
1286
1287
1288 static void
fxt1_encode(GLuint width,GLuint height,GLint comps,const void * source,GLint srcRowStride,void * dest,GLint destRowStride)1289 fxt1_encode (GLuint width, GLuint height, GLint comps,
1290 const void *source, GLint srcRowStride,
1291 void *dest, GLint destRowStride)
1292 {
1293 GLuint x, y;
1294 const GLubyte *data;
1295 GLuint *encoded = (GLuint *)dest;
1296 void *newSource = NULL;
1297
1298 assert(comps == 3 || comps == 4);
1299
1300 /* Replicate image if width is not M8 or height is not M4 */
1301 if ((width & 7) | (height & 3)) {
1302 GLint newWidth = (width + 7) & ~7;
1303 GLint newHeight = (height + 3) & ~3;
1304 newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1305 if (!newSource) {
1306 GET_CURRENT_CONTEXT(ctx);
1307 _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1308 goto cleanUp;
1309 }
1310 upscale_teximage2d(width, height, newWidth, newHeight,
1311 comps, (const GLubyte *) source,
1312 srcRowStride, (GLubyte *) newSource);
1313 source = newSource;
1314 width = newWidth;
1315 height = newHeight;
1316 srcRowStride = comps * newWidth;
1317 }
1318
1319 data = (const GLubyte *) source;
1320 destRowStride = (destRowStride - width * 2) / 4;
1321 for (y = 0; y < height; y += 4) {
1322 GLuint offs = 0 + (y + 0) * srcRowStride;
1323 for (x = 0; x < width; x += 8) {
1324 const GLubyte *lines[4];
1325 lines[0] = &data[offs];
1326 lines[1] = lines[0] + srcRowStride;
1327 lines[2] = lines[1] + srcRowStride;
1328 lines[3] = lines[2] + srcRowStride;
1329 offs += 8 * comps;
1330 fxt1_quantize(encoded, lines, comps);
1331 /* 128 bits per 8x4 block */
1332 encoded += 4;
1333 }
1334 encoded += destRowStride;
1335 }
1336
1337 cleanUp:
1338 free(newSource);
1339 }
1340
1341
1342 /***************************************************************************\
1343 * FXT1 decoder
1344 *
1345 * The decoder is based on GL_3DFX_texture_compression_FXT1
1346 * specification and serves as a concept for the encoder.
1347 \***************************************************************************/
1348
1349
1350 /* lookup table for scaling 5 bit colors up to 8 bits */
1351 static const GLubyte _rgb_scale_5[] = {
1352 0, 8, 16, 25, 33, 41, 49, 58,
1353 66, 74, 82, 90, 99, 107, 115, 123,
1354 132, 140, 148, 156, 165, 173, 181, 189,
1355 197, 206, 214, 222, 230, 239, 247, 255
1356 };
1357
1358 /* lookup table for scaling 6 bit colors up to 8 bits */
1359 static const GLubyte _rgb_scale_6[] = {
1360 0, 4, 8, 12, 16, 20, 24, 28,
1361 32, 36, 40, 45, 49, 53, 57, 61,
1362 65, 69, 73, 77, 81, 85, 89, 93,
1363 97, 101, 105, 109, 113, 117, 121, 125,
1364 130, 134, 138, 142, 146, 150, 154, 158,
1365 162, 166, 170, 174, 178, 182, 186, 190,
1366 194, 198, 202, 206, 210, 215, 219, 223,
1367 227, 231, 235, 239, 243, 247, 251, 255
1368 };
1369
1370
1371 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1372 #define UP5(c) _rgb_scale_5[(c) & 31]
1373 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1374 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1375
1376
1377 static void
fxt1_decode_1HI(const GLubyte * code,GLint t,GLubyte * rgba)1378 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1379 {
1380 const GLuint *cc;
1381
1382 t *= 3;
1383 cc = (const GLuint *)(code + t / 8);
1384 t = (cc[0] >> (t & 7)) & 7;
1385
1386 if (t == 7) {
1387 rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1388 } else {
1389 GLubyte r, g, b;
1390 cc = (const GLuint *)(code + 12);
1391 if (t == 0) {
1392 b = UP5(CC_SEL(cc, 0));
1393 g = UP5(CC_SEL(cc, 5));
1394 r = UP5(CC_SEL(cc, 10));
1395 } else if (t == 6) {
1396 b = UP5(CC_SEL(cc, 15));
1397 g = UP5(CC_SEL(cc, 20));
1398 r = UP5(CC_SEL(cc, 25));
1399 } else {
1400 b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1401 g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1402 r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1403 }
1404 rgba[RCOMP] = r;
1405 rgba[GCOMP] = g;
1406 rgba[BCOMP] = b;
1407 rgba[ACOMP] = 255;
1408 }
1409 }
1410
1411
1412 static void
fxt1_decode_1CHROMA(const GLubyte * code,GLint t,GLubyte * rgba)1413 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1414 {
1415 const GLuint *cc;
1416 GLuint kk;
1417
1418 cc = (const GLuint *)code;
1419 if (t & 16) {
1420 cc++;
1421 t &= 15;
1422 }
1423 t = (cc[0] >> (t * 2)) & 3;
1424
1425 t *= 15;
1426 cc = (const GLuint *)(code + 8 + t / 8);
1427 kk = cc[0] >> (t & 7);
1428 rgba[BCOMP] = UP5(kk);
1429 rgba[GCOMP] = UP5(kk >> 5);
1430 rgba[RCOMP] = UP5(kk >> 10);
1431 rgba[ACOMP] = 255;
1432 }
1433
1434
1435 static void
fxt1_decode_1MIXED(const GLubyte * code,GLint t,GLubyte * rgba)1436 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1437 {
1438 const GLuint *cc;
1439 GLuint col[2][3];
1440 GLint glsb, selb;
1441
1442 cc = (const GLuint *)code;
1443 if (t & 16) {
1444 t &= 15;
1445 t = (cc[1] >> (t * 2)) & 3;
1446 /* col 2 */
1447 col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1448 col[0][GCOMP] = CC_SEL(cc, 99);
1449 col[0][RCOMP] = CC_SEL(cc, 104);
1450 /* col 3 */
1451 col[1][BCOMP] = CC_SEL(cc, 109);
1452 col[1][GCOMP] = CC_SEL(cc, 114);
1453 col[1][RCOMP] = CC_SEL(cc, 119);
1454 glsb = CC_SEL(cc, 126);
1455 selb = CC_SEL(cc, 33);
1456 } else {
1457 t = (cc[0] >> (t * 2)) & 3;
1458 /* col 0 */
1459 col[0][BCOMP] = CC_SEL(cc, 64);
1460 col[0][GCOMP] = CC_SEL(cc, 69);
1461 col[0][RCOMP] = CC_SEL(cc, 74);
1462 /* col 1 */
1463 col[1][BCOMP] = CC_SEL(cc, 79);
1464 col[1][GCOMP] = CC_SEL(cc, 84);
1465 col[1][RCOMP] = CC_SEL(cc, 89);
1466 glsb = CC_SEL(cc, 125);
1467 selb = CC_SEL(cc, 1);
1468 }
1469
1470 if (CC_SEL(cc, 124) & 1) {
1471 /* alpha[0] == 1 */
1472
1473 if (t == 3) {
1474 /* zero */
1475 rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1476 } else {
1477 GLubyte r, g, b;
1478 if (t == 0) {
1479 b = UP5(col[0][BCOMP]);
1480 g = UP5(col[0][GCOMP]);
1481 r = UP5(col[0][RCOMP]);
1482 } else if (t == 2) {
1483 b = UP5(col[1][BCOMP]);
1484 g = UP6(col[1][GCOMP], glsb);
1485 r = UP5(col[1][RCOMP]);
1486 } else {
1487 b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1488 g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1489 r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1490 }
1491 rgba[RCOMP] = r;
1492 rgba[GCOMP] = g;
1493 rgba[BCOMP] = b;
1494 rgba[ACOMP] = 255;
1495 }
1496 } else {
1497 /* alpha[0] == 0 */
1498 GLubyte r, g, b;
1499 if (t == 0) {
1500 b = UP5(col[0][BCOMP]);
1501 g = UP6(col[0][GCOMP], glsb ^ selb);
1502 r = UP5(col[0][RCOMP]);
1503 } else if (t == 3) {
1504 b = UP5(col[1][BCOMP]);
1505 g = UP6(col[1][GCOMP], glsb);
1506 r = UP5(col[1][RCOMP]);
1507 } else {
1508 b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1509 g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1510 UP6(col[1][GCOMP], glsb));
1511 r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1512 }
1513 rgba[RCOMP] = r;
1514 rgba[GCOMP] = g;
1515 rgba[BCOMP] = b;
1516 rgba[ACOMP] = 255;
1517 }
1518 }
1519
1520
1521 static void
fxt1_decode_1ALPHA(const GLubyte * code,GLint t,GLubyte * rgba)1522 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1523 {
1524 const GLuint *cc;
1525 GLubyte r, g, b, a;
1526
1527 cc = (const GLuint *)code;
1528 if (CC_SEL(cc, 124) & 1) {
1529 /* lerp == 1 */
1530 GLuint col0[4];
1531
1532 if (t & 16) {
1533 t &= 15;
1534 t = (cc[1] >> (t * 2)) & 3;
1535 /* col 2 */
1536 col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1537 col0[GCOMP] = CC_SEL(cc, 99);
1538 col0[RCOMP] = CC_SEL(cc, 104);
1539 col0[ACOMP] = CC_SEL(cc, 119);
1540 } else {
1541 t = (cc[0] >> (t * 2)) & 3;
1542 /* col 0 */
1543 col0[BCOMP] = CC_SEL(cc, 64);
1544 col0[GCOMP] = CC_SEL(cc, 69);
1545 col0[RCOMP] = CC_SEL(cc, 74);
1546 col0[ACOMP] = CC_SEL(cc, 109);
1547 }
1548
1549 if (t == 0) {
1550 b = UP5(col0[BCOMP]);
1551 g = UP5(col0[GCOMP]);
1552 r = UP5(col0[RCOMP]);
1553 a = UP5(col0[ACOMP]);
1554 } else if (t == 3) {
1555 b = UP5(CC_SEL(cc, 79));
1556 g = UP5(CC_SEL(cc, 84));
1557 r = UP5(CC_SEL(cc, 89));
1558 a = UP5(CC_SEL(cc, 114));
1559 } else {
1560 b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1561 g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1562 r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1563 a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1564 }
1565 } else {
1566 /* lerp == 0 */
1567
1568 if (t & 16) {
1569 cc++;
1570 t &= 15;
1571 }
1572 t = (cc[0] >> (t * 2)) & 3;
1573
1574 if (t == 3) {
1575 /* zero */
1576 r = g = b = a = 0;
1577 } else {
1578 GLuint kk;
1579 cc = (const GLuint *)code;
1580 a = UP5(cc[3] >> (t * 5 + 13));
1581 t *= 15;
1582 cc = (const GLuint *)(code + 8 + t / 8);
1583 kk = cc[0] >> (t & 7);
1584 b = UP5(kk);
1585 g = UP5(kk >> 5);
1586 r = UP5(kk >> 10);
1587 }
1588 }
1589 rgba[RCOMP] = r;
1590 rgba[GCOMP] = g;
1591 rgba[BCOMP] = b;
1592 rgba[ACOMP] = a;
1593 }
1594
1595
1596 static void
fxt1_decode_1(const void * texture,GLint stride,GLint i,GLint j,GLubyte * rgba)1597 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1598 GLint i, GLint j, GLubyte *rgba)
1599 {
1600 static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1601 fxt1_decode_1HI, /* cc-high = "00?" */
1602 fxt1_decode_1HI, /* cc-high = "00?" */
1603 fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1604 fxt1_decode_1ALPHA, /* alpha = "011" */
1605 fxt1_decode_1MIXED, /* mixed = "1??" */
1606 fxt1_decode_1MIXED, /* mixed = "1??" */
1607 fxt1_decode_1MIXED, /* mixed = "1??" */
1608 fxt1_decode_1MIXED /* mixed = "1??" */
1609 };
1610
1611 const GLubyte *code = (const GLubyte *)texture +
1612 ((j / 4) * (stride / 8) + (i / 8)) * 16;
1613 GLint mode = CC_SEL(code, 125);
1614 GLint t = i & 7;
1615
1616 if (t & 4) {
1617 t += 12;
1618 }
1619 t += (j & 3) * 4;
1620
1621 decode_1[mode](code, t, rgba);
1622 }
1623
1624
1625
1626
1627 static void
fetch_rgb_fxt1(const GLubyte * map,GLint rowStride,GLint i,GLint j,GLfloat * texel)1628 fetch_rgb_fxt1(const GLubyte *map,
1629 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1630 {
1631 GLubyte rgba[4];
1632 fxt1_decode_1(map, rowStride, i, j, rgba);
1633 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1634 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1635 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1636 texel[ACOMP] = 1.0F;
1637 }
1638
1639
1640 static void
fetch_rgba_fxt1(const GLubyte * map,GLint rowStride,GLint i,GLint j,GLfloat * texel)1641 fetch_rgba_fxt1(const GLubyte *map,
1642 GLint rowStride, GLint i, GLint j, GLfloat *texel)
1643 {
1644 GLubyte rgba[4];
1645 fxt1_decode_1(map, rowStride, i, j, rgba);
1646 texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
1647 texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
1648 texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
1649 texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
1650 }
1651
1652
1653 compressed_fetch_func
_mesa_get_fxt_fetch_func(mesa_format format)1654 _mesa_get_fxt_fetch_func(mesa_format format)
1655 {
1656 switch (format) {
1657 case MESA_FORMAT_RGB_FXT1:
1658 return fetch_rgb_fxt1;
1659 case MESA_FORMAT_RGBA_FXT1:
1660 return fetch_rgba_fxt1;
1661 default:
1662 return NULL;
1663 }
1664 }
1665