• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Mesa 3-D graphics library
3  * Version:  7.1
4  *
5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included
15  * in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 
26 /**
27  * \file texcompress_fxt1.c
28  * GL_3DFX_texture_compression_FXT1 support.
29  */
30 
31 
32 #include "glheader.h"
33 #include "imports.h"
34 #include "colormac.h"
35 #include "image.h"
36 #include "macros.h"
37 #include "mfeatures.h"
38 #include "mipmap.h"
39 #include "texcompress.h"
40 #include "texcompress_fxt1.h"
41 #include "texstore.h"
42 #include "swrast/s_context.h"
43 
44 
45 #if FEATURE_texture_fxt1
46 
47 
48 static void
49 fxt1_encode (GLuint width, GLuint height, GLint comps,
50              const void *source, GLint srcRowStride,
51              void *dest, GLint destRowStride);
52 
53 void
54 fxt1_decode_1 (const void *texture, GLint stride,
55                GLint i, GLint j, GLubyte *rgba);
56 
57 
58 /**
59  * Store user's image in rgb_fxt1 format.
60  */
61 GLboolean
_mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)62 _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
63 {
64    const GLubyte *pixels;
65    GLint srcRowStride;
66    GLubyte *dst;
67    const GLubyte *tempImage = NULL;
68 
69    ASSERT(dstFormat == MESA_FORMAT_RGB_FXT1);
70 
71    if (srcFormat != GL_RGB ||
72        srcType != GL_UNSIGNED_BYTE ||
73        ctx->_ImageTransferState ||
74        srcPacking->RowLength != srcWidth ||
75        srcPacking->SwapBytes) {
76       /* convert image to RGB/GLubyte */
77       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
78                                              baseInternalFormat,
79                                              _mesa_get_format_base_format(dstFormat),
80                                              srcWidth, srcHeight, srcDepth,
81                                              srcFormat, srcType, srcAddr,
82                                              srcPacking);
83       if (!tempImage)
84          return GL_FALSE; /* out of memory */
85       pixels = tempImage;
86       srcRowStride = 3 * srcWidth;
87       srcFormat = GL_RGB;
88    }
89    else {
90       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
91                                      srcFormat, srcType, 0, 0);
92 
93       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
94                                             srcType) / sizeof(GLubyte);
95    }
96 
97    dst = dstSlices[0];
98 
99    fxt1_encode(srcWidth, srcHeight, 3, pixels, srcRowStride,
100                dst, dstRowStride);
101 
102    if (tempImage)
103       free((void*) tempImage);
104 
105    return GL_TRUE;
106 }
107 
108 
109 /**
110  * Store user's image in rgba_fxt1 format.
111  */
112 GLboolean
_mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)113 _mesa_texstore_rgba_fxt1(TEXSTORE_PARAMS)
114 {
115    const GLubyte *pixels;
116    GLint srcRowStride;
117    GLubyte *dst;
118    const GLubyte *tempImage = NULL;
119 
120    ASSERT(dstFormat == MESA_FORMAT_RGBA_FXT1);
121 
122    if (srcFormat != GL_RGBA ||
123        srcType != GL_UNSIGNED_BYTE ||
124        ctx->_ImageTransferState ||
125        srcPacking->SwapBytes) {
126       /* convert image to RGBA/GLubyte */
127       tempImage = _mesa_make_temp_ubyte_image(ctx, dims,
128                                              baseInternalFormat,
129                                              _mesa_get_format_base_format(dstFormat),
130                                              srcWidth, srcHeight, srcDepth,
131                                              srcFormat, srcType, srcAddr,
132                                              srcPacking);
133       if (!tempImage)
134          return GL_FALSE; /* out of memory */
135       pixels = tempImage;
136       srcRowStride = 4 * srcWidth;
137       srcFormat = GL_RGBA;
138    }
139    else {
140       pixels = _mesa_image_address2d(srcPacking, srcAddr, srcWidth, srcHeight,
141                                      srcFormat, srcType, 0, 0);
142 
143       srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, srcFormat,
144                                             srcType) / sizeof(GLubyte);
145    }
146 
147    dst = dstSlices[0];
148 
149    fxt1_encode(srcWidth, srcHeight, 4, pixels, srcRowStride,
150                dst, dstRowStride);
151 
152    if (tempImage)
153       free((void*) tempImage);
154 
155    return GL_TRUE;
156 }
157 
158 
159 void
_mesa_fetch_texel_2d_f_rgba_fxt1(const struct swrast_texture_image * texImage,GLint i,GLint j,GLint k,GLfloat * texel)160 _mesa_fetch_texel_2d_f_rgba_fxt1( const struct swrast_texture_image *texImage,
161                                   GLint i, GLint j, GLint k, GLfloat *texel )
162 {
163    /* just sample as GLubyte and convert to float here */
164    GLubyte rgba[4];
165    (void) k;
166    fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
167    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
168    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
169    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
170    texel[ACOMP] = UBYTE_TO_FLOAT(rgba[ACOMP]);
171 }
172 
173 
174 void
_mesa_fetch_texel_2d_f_rgb_fxt1(const struct swrast_texture_image * texImage,GLint i,GLint j,GLint k,GLfloat * texel)175 _mesa_fetch_texel_2d_f_rgb_fxt1( const struct swrast_texture_image *texImage,
176                                  GLint i, GLint j, GLint k, GLfloat *texel )
177 {
178    /* just sample as GLubyte and convert to float here */
179    GLubyte rgba[4];
180    (void) k;
181    fxt1_decode_1(texImage->Map, texImage->RowStride, i, j, rgba);
182    texel[RCOMP] = UBYTE_TO_FLOAT(rgba[RCOMP]);
183    texel[GCOMP] = UBYTE_TO_FLOAT(rgba[GCOMP]);
184    texel[BCOMP] = UBYTE_TO_FLOAT(rgba[BCOMP]);
185    texel[ACOMP] = 1.0F;
186 }
187 
188 
189 
190 /***************************************************************************\
191  * FXT1 encoder
192  *
193  * The encoder was built by reversing the decoder,
194  * and is vaguely based on Texus2 by 3dfx. Note that this code
195  * is merely a proof of concept, since it is highly UNoptimized;
196  * moreover, it is sub-optimal due to initial conditions passed
197  * to Lloyd's algorithm (the interpolation modes are even worse).
198 \***************************************************************************/
199 
200 
201 #define MAX_COMP 4 /* ever needed maximum number of components in texel */
202 #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */
203 #define N_TEXELS 32 /* number of texels in a block (always 32) */
204 #define LL_N_REP 50 /* number of iterations in lloyd's vq */
205 #define LL_RMS_D 10 /* fault tolerance (maximum delta) */
206 #define LL_RMS_E 255 /* fault tolerance (maximum error) */
207 #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */
208 #define ISTBLACK(v) (*((GLuint *)(v)) == 0)
209 
210 
211 /*
212  * Define a 64-bit unsigned integer type and macros
213  */
214 #if 1
215 
216 #define FX64_NATIVE 1
217 
218 typedef uint64_t Fx64;
219 
220 #define FX64_MOV32(a, b) a = b
221 #define FX64_OR32(a, b)  a |= b
222 #define FX64_SHL(a, c)   a <<= c
223 
224 #else
225 
226 #define FX64_NATIVE 0
227 
228 typedef struct {
229    GLuint lo, hi;
230 } Fx64;
231 
232 #define FX64_MOV32(a, b) a.lo = b
233 #define FX64_OR32(a, b)  a.lo |= b
234 
235 #define FX64_SHL(a, c)                                 \
236    do {                                                \
237        if ((c) >= 32) {                                \
238           a.hi = a.lo << ((c) - 32);                   \
239           a.lo = 0;                                    \
240        } else {                                        \
241           a.hi = (a.hi << (c)) | (a.lo >> (32 - (c))); \
242           a.lo <<= (c);                                \
243        }                                               \
244    } while (0)
245 
246 #endif
247 
248 
249 #define F(i) (GLfloat)1 /* can be used to obtain an oblong metric: 0.30 / 0.59 / 0.11 */
250 #define SAFECDOT 1 /* for paranoids */
251 
252 #define MAKEIVEC(NV, NC, IV, B, V0, V1)  \
253    do {                                  \
254       /* compute interpolation vector */ \
255       GLfloat d2 = 0.0F;                 \
256       GLfloat rd2;                       \
257                                          \
258       for (i = 0; i < NC; i++) {         \
259          IV[i] = (V1[i] - V0[i]) * F(i); \
260          d2 += IV[i] * IV[i];            \
261       }                                  \
262       rd2 = (GLfloat)NV / d2;            \
263       B = 0;                             \
264       for (i = 0; i < NC; i++) {         \
265          IV[i] *= F(i);                  \
266          B -= IV[i] * V0[i];             \
267          IV[i] *= rd2;                   \
268       }                                  \
269       B = B * rd2 + 0.5f;                \
270    } while (0)
271 
272 #define CALCCDOT(TEXEL, NV, NC, IV, B, V)\
273    do {                                  \
274       GLfloat dot = 0.0F;                \
275       for (i = 0; i < NC; i++) {         \
276          dot += V[i] * IV[i];            \
277       }                                  \
278       TEXEL = (GLint)(dot + B);          \
279       if (SAFECDOT) {                    \
280          if (TEXEL < 0) {                \
281             TEXEL = 0;                   \
282          } else if (TEXEL > NV) {        \
283             TEXEL = NV;                  \
284          }                               \
285       }                                  \
286    } while (0)
287 
288 
289 static GLint
fxt1_bestcol(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[MAX_COMP],GLint nc)290 fxt1_bestcol (GLfloat vec[][MAX_COMP], GLint nv,
291               GLubyte input[MAX_COMP], GLint nc)
292 {
293    GLint i, j, best = -1;
294    GLfloat err = 1e9; /* big enough */
295 
296    for (j = 0; j < nv; j++) {
297       GLfloat e = 0.0F;
298       for (i = 0; i < nc; i++) {
299          e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]);
300       }
301       if (e < err) {
302          err = e;
303          best = j;
304       }
305    }
306 
307    return best;
308 }
309 
310 
311 static GLint
fxt1_worst(GLfloat vec[MAX_COMP],GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)312 fxt1_worst (GLfloat vec[MAX_COMP],
313             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
314 {
315    GLint i, k, worst = -1;
316    GLfloat err = -1.0F; /* small enough */
317 
318    for (k = 0; k < n; k++) {
319       GLfloat e = 0.0F;
320       for (i = 0; i < nc; i++) {
321          e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]);
322       }
323       if (e > err) {
324          err = e;
325          worst = k;
326       }
327    }
328 
329    return worst;
330 }
331 
332 
333 static GLint
fxt1_variance(GLdouble variance[MAX_COMP],GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)334 fxt1_variance (GLdouble variance[MAX_COMP],
335                GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
336 {
337    GLint i, k, best = 0;
338    GLint sx, sx2;
339    GLdouble var, maxvar = -1; /* small enough */
340    GLdouble teenth = 1.0 / n;
341 
342    for (i = 0; i < nc; i++) {
343       sx = sx2 = 0;
344       for (k = 0; k < n; k++) {
345          GLint t = input[k][i];
346          sx += t;
347          sx2 += t * t;
348       }
349       var = sx2 * teenth - sx * sx * teenth * teenth;
350       if (maxvar < var) {
351          maxvar = var;
352          best = i;
353       }
354       if (variance) {
355          variance[i] = var;
356       }
357    }
358 
359    return best;
360 }
361 
362 
363 static GLint
fxt1_choose(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)364 fxt1_choose (GLfloat vec[][MAX_COMP], GLint nv,
365              GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
366 {
367 #if 0
368    /* Choose colors from a grid.
369     */
370    GLint i, j;
371 
372    for (j = 0; j < nv; j++) {
373       GLint m = j * (n - 1) / (nv - 1);
374       for (i = 0; i < nc; i++) {
375          vec[j][i] = input[m][i];
376       }
377    }
378 #else
379    /* Our solution here is to find the darkest and brightest colors in
380     * the 8x4 tile and use those as the two representative colors.
381     * There are probably better algorithms to use (histogram-based).
382     */
383    GLint i, j, k;
384    GLint minSum = 2000; /* big enough */
385    GLint maxSum = -1; /* small enough */
386    GLint minCol = 0; /* phoudoin: silent compiler! */
387    GLint maxCol = 0; /* phoudoin: silent compiler! */
388 
389    struct {
390       GLint flag;
391       GLint key;
392       GLint freq;
393       GLint idx;
394    } hist[N_TEXELS];
395    GLint lenh = 0;
396 
397    memset(hist, 0, sizeof(hist));
398 
399    for (k = 0; k < n; k++) {
400       GLint l;
401       GLint key = 0;
402       GLint sum = 0;
403       for (i = 0; i < nc; i++) {
404          key <<= 8;
405          key |= input[k][i];
406          sum += input[k][i];
407       }
408       for (l = 0; l < n; l++) {
409          if (!hist[l].flag) {
410             /* alloc new slot */
411             hist[l].flag = !0;
412             hist[l].key = key;
413             hist[l].freq = 1;
414             hist[l].idx = k;
415             lenh = l + 1;
416             break;
417          } else if (hist[l].key == key) {
418             hist[l].freq++;
419             break;
420          }
421       }
422       if (minSum > sum) {
423          minSum = sum;
424          minCol = k;
425       }
426       if (maxSum < sum) {
427          maxSum = sum;
428          maxCol = k;
429       }
430    }
431 
432    if (lenh <= nv) {
433       for (j = 0; j < lenh; j++) {
434          for (i = 0; i < nc; i++) {
435             vec[j][i] = (GLfloat)input[hist[j].idx][i];
436          }
437       }
438       for (; j < nv; j++) {
439          for (i = 0; i < nc; i++) {
440             vec[j][i] = vec[0][i];
441          }
442       }
443       return 0;
444    }
445 
446    for (j = 0; j < nv; j++) {
447       for (i = 0; i < nc; i++) {
448          vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (GLfloat)(nv - 1);
449       }
450    }
451 #endif
452 
453    return !0;
454 }
455 
456 
457 static GLint
fxt1_lloyd(GLfloat vec[][MAX_COMP],GLint nv,GLubyte input[N_TEXELS][MAX_COMP],GLint nc,GLint n)458 fxt1_lloyd (GLfloat vec[][MAX_COMP], GLint nv,
459             GLubyte input[N_TEXELS][MAX_COMP], GLint nc, GLint n)
460 {
461    /* Use the generalized lloyd's algorithm for VQ:
462     *     find 4 color vectors.
463     *
464     *     for each sample color
465     *         sort to nearest vector.
466     *
467     *     replace each vector with the centroid of its matching colors.
468     *
469     *     repeat until RMS doesn't improve.
470     *
471     *     if a color vector has no samples, or becomes the same as another
472     *     vector, replace it with the color which is farthest from a sample.
473     *
474     * vec[][MAX_COMP]           initial vectors and resulting colors
475     * nv                        number of resulting colors required
476     * input[N_TEXELS][MAX_COMP] input texels
477     * nc                        number of components in input / vec
478     * n                         number of input samples
479     */
480 
481    GLint sum[MAX_VECT][MAX_COMP]; /* used to accumulate closest texels */
482    GLint cnt[MAX_VECT]; /* how many times a certain vector was chosen */
483    GLfloat error, lasterror = 1e9;
484 
485    GLint i, j, k, rep;
486 
487    /* the quantizer */
488    for (rep = 0; rep < LL_N_REP; rep++) {
489       /* reset sums & counters */
490       for (j = 0; j < nv; j++) {
491          for (i = 0; i < nc; i++) {
492             sum[j][i] = 0;
493          }
494          cnt[j] = 0;
495       }
496       error = 0;
497 
498       /* scan whole block */
499       for (k = 0; k < n; k++) {
500 #if 1
501          GLint best = -1;
502          GLfloat err = 1e9; /* big enough */
503          /* determine best vector */
504          for (j = 0; j < nv; j++) {
505             GLfloat e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) +
506                       (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) +
507                       (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]);
508             if (nc == 4) {
509                e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]);
510             }
511             if (e < err) {
512                err = e;
513                best = j;
514             }
515          }
516 #else
517          GLint best = fxt1_bestcol(vec, nv, input[k], nc, &err);
518 #endif
519          assert(best >= 0);
520          /* add in closest color */
521          for (i = 0; i < nc; i++) {
522             sum[best][i] += input[k][i];
523          }
524          /* mark this vector as used */
525          cnt[best]++;
526          /* accumulate error */
527          error += err;
528       }
529 
530       /* check RMS */
531       if ((error < LL_RMS_E) ||
532           ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) {
533          return !0; /* good match */
534       }
535       lasterror = error;
536 
537       /* move each vector to the barycenter of its closest colors */
538       for (j = 0; j < nv; j++) {
539          if (cnt[j]) {
540             GLfloat div = 1.0F / cnt[j];
541             for (i = 0; i < nc; i++) {
542                vec[j][i] = div * sum[j][i];
543             }
544          } else {
545             /* this vec has no samples or is identical with a previous vec */
546             GLint worst = fxt1_worst(vec[j], input, nc, n);
547             for (i = 0; i < nc; i++) {
548                vec[j][i] = input[worst][i];
549             }
550          }
551       }
552    }
553 
554    return 0; /* could not converge fast enough */
555 }
556 
557 
558 static void
fxt1_quantize_CHROMA(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])559 fxt1_quantize_CHROMA (GLuint *cc,
560                       GLubyte input[N_TEXELS][MAX_COMP])
561 {
562    const GLint n_vect = 4; /* 4 base vectors to find */
563    const GLint n_comp = 3; /* 3 components: R, G, B */
564    GLfloat vec[MAX_VECT][MAX_COMP];
565    GLint i, j, k;
566    Fx64 hi; /* high quadword */
567    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
568 
569    if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) {
570       fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS);
571    }
572 
573    FX64_MOV32(hi, 4); /* cc-chroma = "010" + unused bit */
574    for (j = n_vect - 1; j >= 0; j--) {
575       for (i = 0; i < n_comp; i++) {
576          /* add in colors */
577          FX64_SHL(hi, 5);
578          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
579       }
580    }
581    ((Fx64 *)cc)[1] = hi;
582 
583    lohi = lolo = 0;
584    /* right microtile */
585    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
586       lohi <<= 2;
587       lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
588    }
589    /* left microtile */
590    for (; k >= 0; k--) {
591       lolo <<= 2;
592       lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp);
593    }
594    cc[1] = lohi;
595    cc[0] = lolo;
596 }
597 
598 
599 static void
fxt1_quantize_ALPHA0(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP],GLubyte reord[N_TEXELS][MAX_COMP],GLint n)600 fxt1_quantize_ALPHA0 (GLuint *cc,
601                       GLubyte input[N_TEXELS][MAX_COMP],
602                       GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
603 {
604    const GLint n_vect = 3; /* 3 base vectors to find */
605    const GLint n_comp = 4; /* 4 components: R, G, B, A */
606    GLfloat vec[MAX_VECT][MAX_COMP];
607    GLint i, j, k;
608    Fx64 hi; /* high quadword */
609    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
610 
611    /* the last vector indicates zero */
612    for (i = 0; i < n_comp; i++) {
613       vec[n_vect][i] = 0;
614    }
615 
616    /* the first n texels in reord are guaranteed to be non-zero */
617    if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) {
618       fxt1_lloyd(vec, n_vect, reord, n_comp, n);
619    }
620 
621    FX64_MOV32(hi, 6); /* alpha = "011" + lerp = 0 */
622    for (j = n_vect - 1; j >= 0; j--) {
623       /* add in alphas */
624       FX64_SHL(hi, 5);
625       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
626    }
627    for (j = n_vect - 1; j >= 0; j--) {
628       for (i = 0; i < n_comp - 1; i++) {
629          /* add in colors */
630          FX64_SHL(hi, 5);
631          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
632       }
633    }
634    ((Fx64 *)cc)[1] = hi;
635 
636    lohi = lolo = 0;
637    /* right microtile */
638    for (k = N_TEXELS - 1; k >= N_TEXELS/2; k--) {
639       lohi <<= 2;
640       lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
641    }
642    /* left microtile */
643    for (; k >= 0; k--) {
644       lolo <<= 2;
645       lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp);
646    }
647    cc[1] = lohi;
648    cc[0] = lolo;
649 }
650 
651 
652 static void
fxt1_quantize_ALPHA1(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])653 fxt1_quantize_ALPHA1 (GLuint *cc,
654                       GLubyte input[N_TEXELS][MAX_COMP])
655 {
656    const GLint n_vect = 3; /* highest vector number in each microtile */
657    const GLint n_comp = 4; /* 4 components: R, G, B, A */
658    GLfloat vec[1 + 1 + 1][MAX_COMP]; /* 1.5 extrema for each sub-block */
659    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
660    GLint i, j, k;
661    Fx64 hi; /* high quadword */
662    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
663 
664    GLint minSum;
665    GLint maxSum;
666    GLint minColL = 0, maxColL = 0;
667    GLint minColR = 0, maxColR = 0;
668    GLint sumL = 0, sumR = 0;
669    GLint nn_comp;
670    /* Our solution here is to find the darkest and brightest colors in
671     * the 4x4 tile and use those as the two representative colors.
672     * There are probably better algorithms to use (histogram-based).
673     */
674    nn_comp = n_comp;
675    while ((minColL == maxColL) && nn_comp) {
676        minSum = 2000; /* big enough */
677        maxSum = -1; /* small enough */
678        for (k = 0; k < N_TEXELS / 2; k++) {
679            GLint sum = 0;
680            for (i = 0; i < nn_comp; i++) {
681                sum += input[k][i];
682            }
683            if (minSum > sum) {
684                minSum = sum;
685                minColL = k;
686            }
687            if (maxSum < sum) {
688                maxSum = sum;
689                maxColL = k;
690            }
691            sumL += sum;
692        }
693 
694        nn_comp--;
695    }
696 
697    nn_comp = n_comp;
698    while ((minColR == maxColR) && nn_comp) {
699        minSum = 2000; /* big enough */
700        maxSum = -1; /* small enough */
701        for (k = N_TEXELS / 2; k < N_TEXELS; k++) {
702            GLint sum = 0;
703            for (i = 0; i < nn_comp; i++) {
704                sum += input[k][i];
705            }
706            if (minSum > sum) {
707                minSum = sum;
708                minColR = k;
709            }
710            if (maxSum < sum) {
711                maxSum = sum;
712                maxColR = k;
713            }
714            sumR += sum;
715        }
716 
717        nn_comp--;
718    }
719 
720    /* choose the common vector (yuck!) */
721    {
722       GLint j1, j2;
723       GLint v1 = 0, v2 = 0;
724       GLfloat err = 1e9; /* big enough */
725       GLfloat tv[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
726       for (i = 0; i < n_comp; i++) {
727          tv[0][i] = input[minColL][i];
728          tv[1][i] = input[maxColL][i];
729          tv[2][i] = input[minColR][i];
730          tv[3][i] = input[maxColR][i];
731       }
732       for (j1 = 0; j1 < 2; j1++) {
733          for (j2 = 2; j2 < 4; j2++) {
734             GLfloat e = 0.0F;
735             for (i = 0; i < n_comp; i++) {
736                e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]);
737             }
738             if (e < err) {
739                err = e;
740                v1 = j1;
741                v2 = j2;
742             }
743          }
744       }
745       for (i = 0; i < n_comp; i++) {
746          vec[0][i] = tv[1 - v1][i];
747          vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR);
748          vec[2][i] = tv[5 - v2][i];
749       }
750    }
751 
752    /* left microtile */
753    cc[0] = 0;
754    if (minColL != maxColL) {
755       /* compute interpolation vector */
756       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
757 
758       /* add in texels */
759       lolo = 0;
760       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
761          GLint texel;
762          /* interpolate color */
763          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
764          /* add in texel */
765          lolo <<= 2;
766          lolo |= texel;
767       }
768 
769       cc[0] = lolo;
770    }
771 
772    /* right microtile */
773    cc[1] = 0;
774    if (minColR != maxColR) {
775       /* compute interpolation vector */
776       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]);
777 
778       /* add in texels */
779       lohi = 0;
780       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
781          GLint texel;
782          /* interpolate color */
783          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
784          /* add in texel */
785          lohi <<= 2;
786          lohi |= texel;
787       }
788 
789       cc[1] = lohi;
790    }
791 
792    FX64_MOV32(hi, 7); /* alpha = "011" + lerp = 1 */
793    for (j = n_vect - 1; j >= 0; j--) {
794       /* add in alphas */
795       FX64_SHL(hi, 5);
796       FX64_OR32(hi, (GLuint)(vec[j][ACOMP] / 8.0F));
797    }
798    for (j = n_vect - 1; j >= 0; j--) {
799       for (i = 0; i < n_comp - 1; i++) {
800          /* add in colors */
801          FX64_SHL(hi, 5);
802          FX64_OR32(hi, (GLuint)(vec[j][i] / 8.0F));
803       }
804    }
805    ((Fx64 *)cc)[1] = hi;
806 }
807 
808 
809 static void
fxt1_quantize_HI(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP],GLubyte reord[N_TEXELS][MAX_COMP],GLint n)810 fxt1_quantize_HI (GLuint *cc,
811                   GLubyte input[N_TEXELS][MAX_COMP],
812                   GLubyte reord[N_TEXELS][MAX_COMP], GLint n)
813 {
814    const GLint n_vect = 6; /* highest vector number */
815    const GLint n_comp = 3; /* 3 components: R, G, B */
816    GLfloat b = 0.0F;       /* phoudoin: silent compiler! */
817    GLfloat iv[MAX_COMP];   /* interpolation vector */
818    GLint i, k;
819    GLuint hihi; /* high quadword: hi dword */
820 
821    GLint minSum = 2000; /* big enough */
822    GLint maxSum = -1; /* small enough */
823    GLint minCol = 0; /* phoudoin: silent compiler! */
824    GLint maxCol = 0; /* phoudoin: silent compiler! */
825 
826    /* Our solution here is to find the darkest and brightest colors in
827     * the 8x4 tile and use those as the two representative colors.
828     * There are probably better algorithms to use (histogram-based).
829     */
830    for (k = 0; k < n; k++) {
831       GLint sum = 0;
832       for (i = 0; i < n_comp; i++) {
833          sum += reord[k][i];
834       }
835       if (minSum > sum) {
836          minSum = sum;
837          minCol = k;
838       }
839       if (maxSum < sum) {
840          maxSum = sum;
841          maxCol = k;
842       }
843    }
844 
845    hihi = 0; /* cc-hi = "00" */
846    for (i = 0; i < n_comp; i++) {
847       /* add in colors */
848       hihi <<= 5;
849       hihi |= reord[maxCol][i] >> 3;
850    }
851    for (i = 0; i < n_comp; i++) {
852       /* add in colors */
853       hihi <<= 5;
854       hihi |= reord[minCol][i] >> 3;
855    }
856    cc[3] = hihi;
857    cc[0] = cc[1] = cc[2] = 0;
858 
859    /* compute interpolation vector */
860    if (minCol != maxCol) {
861       MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]);
862    }
863 
864    /* add in texels */
865    for (k = N_TEXELS - 1; k >= 0; k--) {
866       GLint t = k * 3;
867       GLuint *kk = (GLuint *)((char *)cc + t / 8);
868       GLint texel = n_vect + 1; /* transparent black */
869 
870       if (!ISTBLACK(input[k])) {
871          if (minCol != maxCol) {
872             /* interpolate color */
873             CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
874             /* add in texel */
875             kk[0] |= texel << (t & 7);
876          }
877       } else {
878          /* add in texel */
879          kk[0] |= texel << (t & 7);
880       }
881    }
882 }
883 
884 
885 static void
fxt1_quantize_MIXED1(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])886 fxt1_quantize_MIXED1 (GLuint *cc,
887                       GLubyte input[N_TEXELS][MAX_COMP])
888 {
889    const GLint n_vect = 2; /* highest vector number in each microtile */
890    const GLint n_comp = 3; /* 3 components: R, G, B */
891    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
892    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
893    GLint i, j, k;
894    Fx64 hi; /* high quadword */
895    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
896 
897    GLint minSum;
898    GLint maxSum;
899    GLint minColL = 0, maxColL = -1;
900    GLint minColR = 0, maxColR = -1;
901 
902    /* Our solution here is to find the darkest and brightest colors in
903     * the 4x4 tile and use those as the two representative colors.
904     * There are probably better algorithms to use (histogram-based).
905     */
906    minSum = 2000; /* big enough */
907    maxSum = -1; /* small enough */
908    for (k = 0; k < N_TEXELS / 2; k++) {
909       if (!ISTBLACK(input[k])) {
910          GLint sum = 0;
911          for (i = 0; i < n_comp; i++) {
912             sum += input[k][i];
913          }
914          if (minSum > sum) {
915             minSum = sum;
916             minColL = k;
917          }
918          if (maxSum < sum) {
919             maxSum = sum;
920             maxColL = k;
921          }
922       }
923    }
924    minSum = 2000; /* big enough */
925    maxSum = -1; /* small enough */
926    for (; k < N_TEXELS; k++) {
927       if (!ISTBLACK(input[k])) {
928          GLint sum = 0;
929          for (i = 0; i < n_comp; i++) {
930             sum += input[k][i];
931          }
932          if (minSum > sum) {
933             minSum = sum;
934             minColR = k;
935          }
936          if (maxSum < sum) {
937             maxSum = sum;
938             maxColR = k;
939          }
940       }
941    }
942 
943    /* left microtile */
944    if (maxColL == -1) {
945       /* all transparent black */
946       cc[0] = ~0u;
947       for (i = 0; i < n_comp; i++) {
948          vec[0][i] = 0;
949          vec[1][i] = 0;
950       }
951    } else {
952       cc[0] = 0;
953       for (i = 0; i < n_comp; i++) {
954          vec[0][i] = input[minColL][i];
955          vec[1][i] = input[maxColL][i];
956       }
957       if (minColL != maxColL) {
958          /* compute interpolation vector */
959          MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
960 
961          /* add in texels */
962          lolo = 0;
963          for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
964             GLint texel = n_vect + 1; /* transparent black */
965             if (!ISTBLACK(input[k])) {
966                /* interpolate color */
967                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
968             }
969             /* add in texel */
970             lolo <<= 2;
971             lolo |= texel;
972          }
973          cc[0] = lolo;
974       }
975    }
976 
977    /* right microtile */
978    if (maxColR == -1) {
979       /* all transparent black */
980       cc[1] = ~0u;
981       for (i = 0; i < n_comp; i++) {
982          vec[2][i] = 0;
983          vec[3][i] = 0;
984       }
985    } else {
986       cc[1] = 0;
987       for (i = 0; i < n_comp; i++) {
988          vec[2][i] = input[minColR][i];
989          vec[3][i] = input[maxColR][i];
990       }
991       if (minColR != maxColR) {
992          /* compute interpolation vector */
993          MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
994 
995          /* add in texels */
996          lohi = 0;
997          for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
998             GLint texel = n_vect + 1; /* transparent black */
999             if (!ISTBLACK(input[k])) {
1000                /* interpolate color */
1001                CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1002             }
1003             /* add in texel */
1004             lohi <<= 2;
1005             lohi |= texel;
1006          }
1007          cc[1] = lohi;
1008       }
1009    }
1010 
1011    FX64_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1012    for (j = 2 * 2 - 1; j >= 0; j--) {
1013       for (i = 0; i < n_comp; i++) {
1014          /* add in colors */
1015          FX64_SHL(hi, 5);
1016          FX64_OR32(hi, vec[j][i] >> 3);
1017       }
1018    }
1019    ((Fx64 *)cc)[1] = hi;
1020 }
1021 
1022 
1023 static void
fxt1_quantize_MIXED0(GLuint * cc,GLubyte input[N_TEXELS][MAX_COMP])1024 fxt1_quantize_MIXED0 (GLuint *cc,
1025                       GLubyte input[N_TEXELS][MAX_COMP])
1026 {
1027    const GLint n_vect = 3; /* highest vector number in each microtile */
1028    const GLint n_comp = 3; /* 3 components: R, G, B */
1029    GLubyte vec[2 * 2][MAX_COMP]; /* 2 extrema for each sub-block */
1030    GLfloat b, iv[MAX_COMP]; /* interpolation vector */
1031    GLint i, j, k;
1032    Fx64 hi; /* high quadword */
1033    GLuint lohi, lolo; /* low quadword: hi dword, lo dword */
1034 
1035    GLint minColL = 0, maxColL = 0;
1036    GLint minColR = 0, maxColR = 0;
1037 #if 0
1038    GLint minSum;
1039    GLint maxSum;
1040 
1041    /* Our solution here is to find the darkest and brightest colors in
1042     * the 4x4 tile and use those as the two representative colors.
1043     * There are probably better algorithms to use (histogram-based).
1044     */
1045    minSum = 2000; /* big enough */
1046    maxSum = -1; /* small enough */
1047    for (k = 0; k < N_TEXELS / 2; k++) {
1048       GLint sum = 0;
1049       for (i = 0; i < n_comp; i++) {
1050          sum += input[k][i];
1051       }
1052       if (minSum > sum) {
1053          minSum = sum;
1054          minColL = k;
1055       }
1056       if (maxSum < sum) {
1057          maxSum = sum;
1058          maxColL = k;
1059       }
1060    }
1061    minSum = 2000; /* big enough */
1062    maxSum = -1; /* small enough */
1063    for (; k < N_TEXELS; k++) {
1064       GLint sum = 0;
1065       for (i = 0; i < n_comp; i++) {
1066          sum += input[k][i];
1067       }
1068       if (minSum > sum) {
1069          minSum = sum;
1070          minColR = k;
1071       }
1072       if (maxSum < sum) {
1073          maxSum = sum;
1074          maxColR = k;
1075       }
1076    }
1077 #else
1078    GLint minVal;
1079    GLint maxVal;
1080    GLint maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2);
1081    GLint maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2);
1082 
1083    /* Scan the channel with max variance for lo & hi
1084     * and use those as the two representative colors.
1085     */
1086    minVal = 2000; /* big enough */
1087    maxVal = -1; /* small enough */
1088    for (k = 0; k < N_TEXELS / 2; k++) {
1089       GLint t = input[k][maxVarL];
1090       if (minVal > t) {
1091          minVal = t;
1092          minColL = k;
1093       }
1094       if (maxVal < t) {
1095          maxVal = t;
1096          maxColL = k;
1097       }
1098    }
1099    minVal = 2000; /* big enough */
1100    maxVal = -1; /* small enough */
1101    for (; k < N_TEXELS; k++) {
1102       GLint t = input[k][maxVarR];
1103       if (minVal > t) {
1104          minVal = t;
1105          minColR = k;
1106       }
1107       if (maxVal < t) {
1108          maxVal = t;
1109          maxColR = k;
1110       }
1111    }
1112 #endif
1113 
1114    /* left microtile */
1115    cc[0] = 0;
1116    for (i = 0; i < n_comp; i++) {
1117       vec[0][i] = input[minColL][i];
1118       vec[1][i] = input[maxColL][i];
1119    }
1120    if (minColL != maxColL) {
1121       /* compute interpolation vector */
1122       MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]);
1123 
1124       /* add in texels */
1125       lolo = 0;
1126       for (k = N_TEXELS / 2 - 1; k >= 0; k--) {
1127          GLint texel;
1128          /* interpolate color */
1129          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1130          /* add in texel */
1131          lolo <<= 2;
1132          lolo |= texel;
1133       }
1134 
1135       /* funky encoding for LSB of green */
1136       if ((GLint)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) {
1137          for (i = 0; i < n_comp; i++) {
1138             vec[1][i] = input[minColL][i];
1139             vec[0][i] = input[maxColL][i];
1140          }
1141          lolo = ~lolo;
1142       }
1143 
1144       cc[0] = lolo;
1145    }
1146 
1147    /* right microtile */
1148    cc[1] = 0;
1149    for (i = 0; i < n_comp; i++) {
1150       vec[2][i] = input[minColR][i];
1151       vec[3][i] = input[maxColR][i];
1152    }
1153    if (minColR != maxColR) {
1154       /* compute interpolation vector */
1155       MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]);
1156 
1157       /* add in texels */
1158       lohi = 0;
1159       for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
1160          GLint texel;
1161          /* interpolate color */
1162          CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
1163          /* add in texel */
1164          lohi <<= 2;
1165          lohi |= texel;
1166       }
1167 
1168       /* funky encoding for LSB of green */
1169       if ((GLint)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) {
1170          for (i = 0; i < n_comp; i++) {
1171             vec[3][i] = input[minColR][i];
1172             vec[2][i] = input[maxColR][i];
1173          }
1174          lohi = ~lohi;
1175       }
1176 
1177       cc[1] = lohi;
1178    }
1179 
1180    FX64_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); /* chroma = "1" */
1181    for (j = 2 * 2 - 1; j >= 0; j--) {
1182       for (i = 0; i < n_comp; i++) {
1183          /* add in colors */
1184          FX64_SHL(hi, 5);
1185          FX64_OR32(hi, vec[j][i] >> 3);
1186       }
1187    }
1188    ((Fx64 *)cc)[1] = hi;
1189 }
1190 
1191 
1192 static void
fxt1_quantize(GLuint * cc,const GLubyte * lines[],GLint comps)1193 fxt1_quantize (GLuint *cc, const GLubyte *lines[], GLint comps)
1194 {
1195    GLint trualpha;
1196    GLubyte reord[N_TEXELS][MAX_COMP];
1197 
1198    GLubyte input[N_TEXELS][MAX_COMP];
1199    GLint i, k, l;
1200 
1201    if (comps == 3) {
1202       /* make the whole block opaque */
1203       memset(input, -1, sizeof(input));
1204    }
1205 
1206    /* 8 texels each line */
1207    for (l = 0; l < 4; l++) {
1208       for (k = 0; k < 4; k++) {
1209          for (i = 0; i < comps; i++) {
1210             input[k + l * 4][i] = *lines[l]++;
1211          }
1212       }
1213       for (; k < 8; k++) {
1214          for (i = 0; i < comps; i++) {
1215             input[k + l * 4 + 12][i] = *lines[l]++;
1216          }
1217       }
1218    }
1219 
1220    /* block layout:
1221     * 00, 01, 02, 03, 08, 09, 0a, 0b
1222     * 10, 11, 12, 13, 18, 19, 1a, 1b
1223     * 04, 05, 06, 07, 0c, 0d, 0e, 0f
1224     * 14, 15, 16, 17, 1c, 1d, 1e, 1f
1225     */
1226 
1227    /* [dBorca]
1228     * stupidity flows forth from this
1229     */
1230    l = N_TEXELS;
1231    trualpha = 0;
1232    if (comps == 4) {
1233       /* skip all transparent black texels */
1234       l = 0;
1235       for (k = 0; k < N_TEXELS; k++) {
1236          /* test all components against 0 */
1237          if (!ISTBLACK(input[k])) {
1238             /* texel is not transparent black */
1239             COPY_4UBV(reord[l], input[k]);
1240             if (reord[l][ACOMP] < (255 - ALPHA_TS)) {
1241                /* non-opaque texel */
1242                trualpha = !0;
1243             }
1244             l++;
1245          }
1246       }
1247    }
1248 
1249 #if 0
1250    if (trualpha) {
1251       fxt1_quantize_ALPHA0(cc, input, reord, l);
1252    } else if (l == 0) {
1253       cc[0] = cc[1] = cc[2] = -1;
1254       cc[3] = 0;
1255    } else if (l < N_TEXELS) {
1256       fxt1_quantize_HI(cc, input, reord, l);
1257    } else {
1258       fxt1_quantize_CHROMA(cc, input);
1259    }
1260    (void)fxt1_quantize_ALPHA1;
1261    (void)fxt1_quantize_MIXED1;
1262    (void)fxt1_quantize_MIXED0;
1263 #else
1264    if (trualpha) {
1265       fxt1_quantize_ALPHA1(cc, input);
1266    } else if (l == 0) {
1267       cc[0] = cc[1] = cc[2] = ~0u;
1268       cc[3] = 0;
1269    } else if (l < N_TEXELS) {
1270       fxt1_quantize_MIXED1(cc, input);
1271    } else {
1272       fxt1_quantize_MIXED0(cc, input);
1273    }
1274    (void)fxt1_quantize_ALPHA0;
1275    (void)fxt1_quantize_HI;
1276    (void)fxt1_quantize_CHROMA;
1277 #endif
1278 }
1279 
1280 
1281 
1282 /**
1283  * Upscale an image by replication, not (typical) stretching.
1284  * We use this when the image width or height is less than a
1285  * certain size (4, 8) and we need to upscale an image.
1286  */
1287 static void
upscale_teximage2d(GLsizei inWidth,GLsizei inHeight,GLsizei outWidth,GLsizei outHeight,GLint comps,const GLubyte * src,GLint srcRowStride,GLubyte * dest)1288 upscale_teximage2d(GLsizei inWidth, GLsizei inHeight,
1289                    GLsizei outWidth, GLsizei outHeight,
1290                    GLint comps, const GLubyte *src, GLint srcRowStride,
1291                    GLubyte *dest )
1292 {
1293    GLint i, j, k;
1294 
1295    ASSERT(outWidth >= inWidth);
1296    ASSERT(outHeight >= inHeight);
1297 #if 0
1298    ASSERT(inWidth == 1 || inWidth == 2 || inHeight == 1 || inHeight == 2);
1299    ASSERT((outWidth & 3) == 0);
1300    ASSERT((outHeight & 3) == 0);
1301 #endif
1302 
1303    for (i = 0; i < outHeight; i++) {
1304       const GLint ii = i % inHeight;
1305       for (j = 0; j < outWidth; j++) {
1306          const GLint jj = j % inWidth;
1307          for (k = 0; k < comps; k++) {
1308             dest[(i * outWidth + j) * comps + k]
1309                = src[ii * srcRowStride + jj * comps + k];
1310          }
1311       }
1312    }
1313 }
1314 
1315 
1316 static void
fxt1_encode(GLuint width,GLuint height,GLint comps,const void * source,GLint srcRowStride,void * dest,GLint destRowStride)1317 fxt1_encode (GLuint width, GLuint height, GLint comps,
1318              const void *source, GLint srcRowStride,
1319              void *dest, GLint destRowStride)
1320 {
1321    GLuint x, y;
1322    const GLubyte *data;
1323    GLuint *encoded = (GLuint *)dest;
1324    void *newSource = NULL;
1325 
1326    assert(comps == 3 || comps == 4);
1327 
1328    /* Replicate image if width is not M8 or height is not M4 */
1329    if ((width & 7) | (height & 3)) {
1330       GLint newWidth = (width + 7) & ~7;
1331       GLint newHeight = (height + 3) & ~3;
1332       newSource = malloc(comps * newWidth * newHeight * sizeof(GLubyte));
1333       if (!newSource) {
1334          GET_CURRENT_CONTEXT(ctx);
1335          _mesa_error(ctx, GL_OUT_OF_MEMORY, "texture compression");
1336          goto cleanUp;
1337       }
1338       upscale_teximage2d(width, height, newWidth, newHeight,
1339                          comps, (const GLubyte *) source,
1340                          srcRowStride, (GLubyte *) newSource);
1341       source = newSource;
1342       width = newWidth;
1343       height = newHeight;
1344       srcRowStride = comps * newWidth;
1345    }
1346 
1347    data = (const GLubyte *) source;
1348    destRowStride = (destRowStride - width * 2) / 4;
1349    for (y = 0; y < height; y += 4) {
1350       GLuint offs = 0 + (y + 0) * srcRowStride;
1351       for (x = 0; x < width; x += 8) {
1352          const GLubyte *lines[4];
1353          lines[0] = &data[offs];
1354          lines[1] = lines[0] + srcRowStride;
1355          lines[2] = lines[1] + srcRowStride;
1356          lines[3] = lines[2] + srcRowStride;
1357          offs += 8 * comps;
1358          fxt1_quantize(encoded, lines, comps);
1359          /* 128 bits per 8x4 block */
1360          encoded += 4;
1361       }
1362       encoded += destRowStride;
1363    }
1364 
1365  cleanUp:
1366    if (newSource != NULL) {
1367       free(newSource);
1368    }
1369 }
1370 
1371 
1372 /***************************************************************************\
1373  * FXT1 decoder
1374  *
1375  * The decoder is based on GL_3DFX_texture_compression_FXT1
1376  * specification and serves as a concept for the encoder.
1377 \***************************************************************************/
1378 
1379 
1380 /* lookup table for scaling 5 bit colors up to 8 bits */
1381 static const GLubyte _rgb_scale_5[] = {
1382    0,   8,   16,  25,  33,  41,  49,  58,
1383    66,  74,  82,  90,  99,  107, 115, 123,
1384    132, 140, 148, 156, 165, 173, 181, 189,
1385    197, 206, 214, 222, 230, 239, 247, 255
1386 };
1387 
1388 /* lookup table for scaling 6 bit colors up to 8 bits */
1389 static const GLubyte _rgb_scale_6[] = {
1390    0,   4,   8,   12,  16,  20,  24,  28,
1391    32,  36,  40,  45,  49,  53,  57,  61,
1392    65,  69,  73,  77,  81,  85,  89,  93,
1393    97,  101, 105, 109, 113, 117, 121, 125,
1394    130, 134, 138, 142, 146, 150, 154, 158,
1395    162, 166, 170, 174, 178, 182, 186, 190,
1396    194, 198, 202, 206, 210, 215, 219, 223,
1397    227, 231, 235, 239, 243, 247, 251, 255
1398 };
1399 
1400 
1401 #define CC_SEL(cc, which) (((GLuint *)(cc))[(which) / 32] >> ((which) & 31))
1402 #define UP5(c) _rgb_scale_5[(c) & 31]
1403 #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)]
1404 #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n)
1405 
1406 
1407 static void
fxt1_decode_1HI(const GLubyte * code,GLint t,GLubyte * rgba)1408 fxt1_decode_1HI (const GLubyte *code, GLint t, GLubyte *rgba)
1409 {
1410    const GLuint *cc;
1411 
1412    t *= 3;
1413    cc = (const GLuint *)(code + t / 8);
1414    t = (cc[0] >> (t & 7)) & 7;
1415 
1416    if (t == 7) {
1417       rgba[RCOMP] = rgba[GCOMP] = rgba[BCOMP] = rgba[ACOMP] = 0;
1418    } else {
1419       GLubyte r, g, b;
1420       cc = (const GLuint *)(code + 12);
1421       if (t == 0) {
1422          b = UP5(CC_SEL(cc, 0));
1423          g = UP5(CC_SEL(cc, 5));
1424          r = UP5(CC_SEL(cc, 10));
1425       } else if (t == 6) {
1426          b = UP5(CC_SEL(cc, 15));
1427          g = UP5(CC_SEL(cc, 20));
1428          r = UP5(CC_SEL(cc, 25));
1429       } else {
1430          b = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15)));
1431          g = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20)));
1432          r = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25)));
1433       }
1434       rgba[RCOMP] = r;
1435       rgba[GCOMP] = g;
1436       rgba[BCOMP] = b;
1437       rgba[ACOMP] = 255;
1438    }
1439 }
1440 
1441 
1442 static void
fxt1_decode_1CHROMA(const GLubyte * code,GLint t,GLubyte * rgba)1443 fxt1_decode_1CHROMA (const GLubyte *code, GLint t, GLubyte *rgba)
1444 {
1445    const GLuint *cc;
1446    GLuint kk;
1447 
1448    cc = (const GLuint *)code;
1449    if (t & 16) {
1450       cc++;
1451       t &= 15;
1452    }
1453    t = (cc[0] >> (t * 2)) & 3;
1454 
1455    t *= 15;
1456    cc = (const GLuint *)(code + 8 + t / 8);
1457    kk = cc[0] >> (t & 7);
1458    rgba[BCOMP] = UP5(kk);
1459    rgba[GCOMP] = UP5(kk >> 5);
1460    rgba[RCOMP] = UP5(kk >> 10);
1461    rgba[ACOMP] = 255;
1462 }
1463 
1464 
1465 static void
fxt1_decode_1MIXED(const GLubyte * code,GLint t,GLubyte * rgba)1466 fxt1_decode_1MIXED (const GLubyte *code, GLint t, GLubyte *rgba)
1467 {
1468    const GLuint *cc;
1469    GLuint col[2][3];
1470    GLint glsb, selb;
1471 
1472    cc = (const GLuint *)code;
1473    if (t & 16) {
1474       t &= 15;
1475       t = (cc[1] >> (t * 2)) & 3;
1476       /* col 2 */
1477       col[0][BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1478       col[0][GCOMP] = CC_SEL(cc, 99);
1479       col[0][RCOMP] = CC_SEL(cc, 104);
1480       /* col 3 */
1481       col[1][BCOMP] = CC_SEL(cc, 109);
1482       col[1][GCOMP] = CC_SEL(cc, 114);
1483       col[1][RCOMP] = CC_SEL(cc, 119);
1484       glsb = CC_SEL(cc, 126);
1485       selb = CC_SEL(cc, 33);
1486    } else {
1487       t = (cc[0] >> (t * 2)) & 3;
1488       /* col 0 */
1489       col[0][BCOMP] = CC_SEL(cc, 64);
1490       col[0][GCOMP] = CC_SEL(cc, 69);
1491       col[0][RCOMP] = CC_SEL(cc, 74);
1492       /* col 1 */
1493       col[1][BCOMP] = CC_SEL(cc, 79);
1494       col[1][GCOMP] = CC_SEL(cc, 84);
1495       col[1][RCOMP] = CC_SEL(cc, 89);
1496       glsb = CC_SEL(cc, 125);
1497       selb = CC_SEL(cc, 1);
1498    }
1499 
1500    if (CC_SEL(cc, 124) & 1) {
1501       /* alpha[0] == 1 */
1502 
1503       if (t == 3) {
1504          /* zero */
1505          rgba[RCOMP] = rgba[BCOMP] = rgba[GCOMP] = rgba[ACOMP] = 0;
1506       } else {
1507          GLubyte r, g, b;
1508          if (t == 0) {
1509             b = UP5(col[0][BCOMP]);
1510             g = UP5(col[0][GCOMP]);
1511             r = UP5(col[0][RCOMP]);
1512          } else if (t == 2) {
1513             b = UP5(col[1][BCOMP]);
1514             g = UP6(col[1][GCOMP], glsb);
1515             r = UP5(col[1][RCOMP]);
1516          } else {
1517             b = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2;
1518             g = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2;
1519             r = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2;
1520          }
1521          rgba[RCOMP] = r;
1522          rgba[GCOMP] = g;
1523          rgba[BCOMP] = b;
1524          rgba[ACOMP] = 255;
1525       }
1526    } else {
1527       /* alpha[0] == 0 */
1528       GLubyte r, g, b;
1529       if (t == 0) {
1530          b = UP5(col[0][BCOMP]);
1531          g = UP6(col[0][GCOMP], glsb ^ selb);
1532          r = UP5(col[0][RCOMP]);
1533       } else if (t == 3) {
1534          b = UP5(col[1][BCOMP]);
1535          g = UP6(col[1][GCOMP], glsb);
1536          r = UP5(col[1][RCOMP]);
1537       } else {
1538          b = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP]));
1539          g = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb),
1540                         UP6(col[1][GCOMP], glsb));
1541          r = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP]));
1542       }
1543       rgba[RCOMP] = r;
1544       rgba[GCOMP] = g;
1545       rgba[BCOMP] = b;
1546       rgba[ACOMP] = 255;
1547    }
1548 }
1549 
1550 
1551 static void
fxt1_decode_1ALPHA(const GLubyte * code,GLint t,GLubyte * rgba)1552 fxt1_decode_1ALPHA (const GLubyte *code, GLint t, GLubyte *rgba)
1553 {
1554    const GLuint *cc;
1555    GLubyte r, g, b, a;
1556 
1557    cc = (const GLuint *)code;
1558    if (CC_SEL(cc, 124) & 1) {
1559       /* lerp == 1 */
1560       GLuint col0[4];
1561 
1562       if (t & 16) {
1563          t &= 15;
1564          t = (cc[1] >> (t * 2)) & 3;
1565          /* col 2 */
1566          col0[BCOMP] = (*(const GLuint *)(code + 11)) >> 6;
1567          col0[GCOMP] = CC_SEL(cc, 99);
1568          col0[RCOMP] = CC_SEL(cc, 104);
1569          col0[ACOMP] = CC_SEL(cc, 119);
1570       } else {
1571          t = (cc[0] >> (t * 2)) & 3;
1572          /* col 0 */
1573          col0[BCOMP] = CC_SEL(cc, 64);
1574          col0[GCOMP] = CC_SEL(cc, 69);
1575          col0[RCOMP] = CC_SEL(cc, 74);
1576          col0[ACOMP] = CC_SEL(cc, 109);
1577       }
1578 
1579       if (t == 0) {
1580          b = UP5(col0[BCOMP]);
1581          g = UP5(col0[GCOMP]);
1582          r = UP5(col0[RCOMP]);
1583          a = UP5(col0[ACOMP]);
1584       } else if (t == 3) {
1585          b = UP5(CC_SEL(cc, 79));
1586          g = UP5(CC_SEL(cc, 84));
1587          r = UP5(CC_SEL(cc, 89));
1588          a = UP5(CC_SEL(cc, 114));
1589       } else {
1590          b = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79)));
1591          g = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84)));
1592          r = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89)));
1593          a = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114)));
1594       }
1595    } else {
1596       /* lerp == 0 */
1597 
1598       if (t & 16) {
1599          cc++;
1600          t &= 15;
1601       }
1602       t = (cc[0] >> (t * 2)) & 3;
1603 
1604       if (t == 3) {
1605          /* zero */
1606          r = g = b = a = 0;
1607       } else {
1608          GLuint kk;
1609          cc = (const GLuint *)code;
1610          a = UP5(cc[3] >> (t * 5 + 13));
1611          t *= 15;
1612          cc = (const GLuint *)(code + 8 + t / 8);
1613          kk = cc[0] >> (t & 7);
1614          b = UP5(kk);
1615          g = UP5(kk >> 5);
1616          r = UP5(kk >> 10);
1617       }
1618    }
1619    rgba[RCOMP] = r;
1620    rgba[GCOMP] = g;
1621    rgba[BCOMP] = b;
1622    rgba[ACOMP] = a;
1623 }
1624 
1625 
1626 void
fxt1_decode_1(const void * texture,GLint stride,GLint i,GLint j,GLubyte * rgba)1627 fxt1_decode_1 (const void *texture, GLint stride, /* in pixels */
1628                GLint i, GLint j, GLubyte *rgba)
1629 {
1630    static void (*decode_1[]) (const GLubyte *, GLint, GLubyte *) = {
1631       fxt1_decode_1HI,     /* cc-high   = "00?" */
1632       fxt1_decode_1HI,     /* cc-high   = "00?" */
1633       fxt1_decode_1CHROMA, /* cc-chroma = "010" */
1634       fxt1_decode_1ALPHA,  /* alpha     = "011" */
1635       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1636       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1637       fxt1_decode_1MIXED,  /* mixed     = "1??" */
1638       fxt1_decode_1MIXED   /* mixed     = "1??" */
1639    };
1640 
1641    const GLubyte *code = (const GLubyte *)texture +
1642                          ((j / 4) * (stride / 8) + (i / 8)) * 16;
1643    GLint mode = CC_SEL(code, 125);
1644    GLint t = i & 7;
1645 
1646    if (t & 4) {
1647       t += 12;
1648    }
1649    t += (j & 3) * 4;
1650 
1651    decode_1[mode](code, t, rgba);
1652 }
1653 
1654 
1655 #endif /* FEATURE_texture_fxt1 */
1656