• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * libtxc_dxtn
3  * Version:  1.0
4  *
5  * Copyright (C) 2004  Roland Scheidegger   All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included
15  * in all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 #ifndef TEXCOMPRESS_S3TC_TMP_H
26 #define TEXCOMPRESS_S3TC_TMP_H
27 
28 #ifdef __APPLE__
29 #include <OpenGL/gl.h>
30 #else
31 #include <GL/gl.h>
32 #endif
33 
34 typedef GLubyte GLchan;
35 #define UBYTE_TO_CHAN(b)  (b)
36 #define CHAN_MAX 255
37 #define RCOMP 0
38 #define GCOMP 1
39 #define BCOMP 2
40 #define ACOMP 3
41 
42 #define EXP5TO8R(packedcol)					\
43    ((((packedcol) >> 8) & 0xf8) | (((packedcol) >> 13) & 0x7))
44 
45 #define EXP6TO8G(packedcol)					\
46    ((((packedcol) >> 3) & 0xfc) | (((packedcol) >>  9) & 0x3))
47 
48 #define EXP5TO8B(packedcol)					\
49    ((((packedcol) << 3) & 0xf8) | (((packedcol) >>  2) & 0x7))
50 
51 #define EXP4TO8(col)						\
52    ((col) | ((col) << 4))
53 
54 /* inefficient. To be efficient, it would be necessary to decode 16 pixels at once */
55 
dxt135_decode_imageblock(const GLubyte * img_block_src,GLint i,GLint j,GLuint dxt_type,GLvoid * texel)56 static void dxt135_decode_imageblock ( const GLubyte *img_block_src,
57                          GLint i, GLint j, GLuint dxt_type, GLvoid *texel ) {
58    GLchan *rgba = (GLchan *) texel;
59    const GLushort color0 = img_block_src[0] | (img_block_src[1] << 8);
60    const GLushort color1 = img_block_src[2] | (img_block_src[3] << 8);
61    const GLuint bits = img_block_src[4] | (img_block_src[5] << 8) |
62       (img_block_src[6] << 16) | ((GLuint)img_block_src[7] << 24);
63    /* What about big/little endian? */
64    GLubyte bit_pos = 2 * (j * 4 + i) ;
65    GLubyte code = (GLubyte) ((bits >> bit_pos) & 3);
66 
67    rgba[ACOMP] = CHAN_MAX;
68    switch (code) {
69    case 0:
70       rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color0) );
71       rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color0) );
72       rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color0) );
73       break;
74    case 1:
75       rgba[RCOMP] = UBYTE_TO_CHAN( EXP5TO8R(color1) );
76       rgba[GCOMP] = UBYTE_TO_CHAN( EXP6TO8G(color1) );
77       rgba[BCOMP] = UBYTE_TO_CHAN( EXP5TO8B(color1) );
78       break;
79    case 2:
80       if ((dxt_type > 1) || (color0 > color1)) {
81          rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) * 2 + EXP5TO8R(color1)) / 3) );
82          rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) * 2 + EXP6TO8G(color1)) / 3) );
83          rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) * 2 + EXP5TO8B(color1)) / 3) );
84       }
85       else {
86          rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1)) / 2) );
87          rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1)) / 2) );
88          rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1)) / 2) );
89       }
90       break;
91    case 3:
92       if ((dxt_type > 1) || (color0 > color1)) {
93          rgba[RCOMP] = UBYTE_TO_CHAN( ((EXP5TO8R(color0) + EXP5TO8R(color1) * 2) / 3) );
94          rgba[GCOMP] = UBYTE_TO_CHAN( ((EXP6TO8G(color0) + EXP6TO8G(color1) * 2) / 3) );
95          rgba[BCOMP] = UBYTE_TO_CHAN( ((EXP5TO8B(color0) + EXP5TO8B(color1) * 2) / 3) );
96       }
97       else {
98          rgba[RCOMP] = 0;
99          rgba[GCOMP] = 0;
100          rgba[BCOMP] = 0;
101          if (dxt_type == 1) rgba[ACOMP] = UBYTE_TO_CHAN(0);
102       }
103       break;
104    default:
105    /* CANNOT happen (I hope) */
106       break;
107    }
108 }
109 
110 
fetch_2d_texel_rgb_dxt1(GLint srcRowStride,const GLubyte * pixdata,GLint i,GLint j,GLvoid * texel)111 static void fetch_2d_texel_rgb_dxt1(GLint srcRowStride, const GLubyte *pixdata,
112                          GLint i, GLint j, GLvoid *texel)
113 {
114    /* Extract the (i,j) pixel from pixdata and return it
115     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
116     */
117 
118    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8);
119    dxt135_decode_imageblock(blksrc, (i&3), (j&3), 0, texel);
120 }
121 
122 
fetch_2d_texel_rgba_dxt1(GLint srcRowStride,const GLubyte * pixdata,GLint i,GLint j,GLvoid * texel)123 static void fetch_2d_texel_rgba_dxt1(GLint srcRowStride, const GLubyte *pixdata,
124                          GLint i, GLint j, GLvoid *texel)
125 {
126    /* Extract the (i,j) pixel from pixdata and return it
127     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
128     */
129 
130    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 8);
131    dxt135_decode_imageblock(blksrc, (i&3), (j&3), 1, texel);
132 }
133 
fetch_2d_texel_rgba_dxt3(GLint srcRowStride,const GLubyte * pixdata,GLint i,GLint j,GLvoid * texel)134 static void fetch_2d_texel_rgba_dxt3(GLint srcRowStride, const GLubyte *pixdata,
135                          GLint i, GLint j, GLvoid *texel) {
136 
137    /* Extract the (i,j) pixel from pixdata and return it
138     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
139     */
140 
141    GLchan *rgba = (GLchan *) texel;
142    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16);
143    const GLubyte anibble = (blksrc[((j&3) * 4 + (i&3)) / 2] >> (4 * (i&1))) & 0xf;
144    dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
145    rgba[ACOMP] = UBYTE_TO_CHAN( (GLubyte)(EXP4TO8(anibble)) );
146 }
147 
fetch_2d_texel_rgba_dxt5(GLint srcRowStride,const GLubyte * pixdata,GLint i,GLint j,GLvoid * texel)148 static void fetch_2d_texel_rgba_dxt5(GLint srcRowStride, const GLubyte *pixdata,
149                          GLint i, GLint j, GLvoid *texel) {
150 
151    /* Extract the (i,j) pixel from pixdata and return it
152     * in texel[RCOMP], texel[GCOMP], texel[BCOMP], texel[ACOMP].
153     */
154 
155    GLchan *rgba = (GLchan *) texel;
156    const GLubyte *blksrc = (pixdata + ((srcRowStride + 3) / 4 * (j / 4) + (i / 4)) * 16);
157    const GLubyte alpha0 = blksrc[0];
158    const GLubyte alpha1 = blksrc[1];
159    const GLubyte bit_pos = ((j&3) * 4 + (i&3)) * 3;
160    const GLubyte acodelow = blksrc[2 + bit_pos / 8];
161    const GLubyte acodehigh = blksrc[3 + bit_pos / 8];
162    const GLubyte code = (acodelow >> (bit_pos & 0x7) |
163       (acodehigh  << (8 - (bit_pos & 0x7)))) & 0x7;
164    dxt135_decode_imageblock(blksrc + 8, (i&3), (j&3), 2, texel);
165    if (code == 0)
166       rgba[ACOMP] = UBYTE_TO_CHAN( alpha0 );
167    else if (code == 1)
168       rgba[ACOMP] = UBYTE_TO_CHAN( alpha1 );
169    else if (alpha0 > alpha1)
170       rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (8 - code) + (alpha1 * (code - 1))) / 7) );
171    else if (code < 6)
172       rgba[ACOMP] = UBYTE_TO_CHAN( ((alpha0 * (6 - code) + (alpha1 * (code - 1))) / 5) );
173    else if (code == 6)
174       rgba[ACOMP] = 0;
175    else
176       rgba[ACOMP] = CHAN_MAX;
177 }
178 
179 
180 /* weights used for error function, basically weights (unsquared 2/4/1) according to rgb->luminance conversion
181    not sure if this really reflects visual perception */
182 #define REDWEIGHT 4
183 #define GREENWEIGHT 16
184 #define BLUEWEIGHT 1
185 
186 #define ALPHACUT 127
187 
fancybasecolorsearch(UNUSED GLubyte * blkaddr,GLubyte srccolors[4][4][4],GLubyte * bestcolor[2],GLint numxpixels,GLint numypixels,UNUSED GLint type,UNUSED GLboolean haveAlpha)188 static void fancybasecolorsearch( UNUSED GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
189                            GLint numxpixels, GLint numypixels, UNUSED GLint type, UNUSED GLboolean haveAlpha)
190 {
191    /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
192 
193    /* TODO could also try to find a better encoding for the 3-color-encoding type, this really should be done
194       if it's rgba_dxt1 and we have alpha in the block, currently even values which will be mapped to black
195       due to their alpha value will influence the result */
196    GLint i, j, colors, z;
197    GLuint pixerror, pixerrorred, pixerrorgreen, pixerrorblue, pixerrorbest;
198    GLint colordist, blockerrlin[2][3];
199    GLubyte nrcolor[2];
200    GLint pixerrorcolorbest[3] = {0};
201    GLubyte enc = 0;
202    GLubyte cv[4][4];
203    GLubyte testcolor[2][3];
204 
205 /*   fprintf(stderr, "color begin 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
206       bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
207    if (((bestcolor[0][0] & 0xf8) << 8 | (bestcolor[0][1] & 0xfc) << 3 | bestcolor[0][2] >> 3) <
208       ((bestcolor[1][0] & 0xf8) << 8 | (bestcolor[1][1] & 0xfc) << 3 | bestcolor[1][2] >> 3)) {
209       testcolor[0][0] = bestcolor[0][0];
210       testcolor[0][1] = bestcolor[0][1];
211       testcolor[0][2] = bestcolor[0][2];
212       testcolor[1][0] = bestcolor[1][0];
213       testcolor[1][1] = bestcolor[1][1];
214       testcolor[1][2] = bestcolor[1][2];
215    }
216    else {
217       testcolor[1][0] = bestcolor[0][0];
218       testcolor[1][1] = bestcolor[0][1];
219       testcolor[1][2] = bestcolor[0][2];
220       testcolor[0][0] = bestcolor[1][0];
221       testcolor[0][1] = bestcolor[1][1];
222       testcolor[0][2] = bestcolor[1][2];
223    }
224 
225    for (i = 0; i < 3; i ++) {
226       cv[0][i] = testcolor[0][i];
227       cv[1][i] = testcolor[1][i];
228       cv[2][i] = (testcolor[0][i] * 2 + testcolor[1][i]) / 3;
229       cv[3][i] = (testcolor[0][i] + testcolor[1][i] * 2) / 3;
230    }
231 
232    blockerrlin[0][0] = 0;
233    blockerrlin[0][1] = 0;
234    blockerrlin[0][2] = 0;
235    blockerrlin[1][0] = 0;
236    blockerrlin[1][1] = 0;
237    blockerrlin[1][2] = 0;
238 
239    nrcolor[0] = 0;
240    nrcolor[1] = 0;
241 
242    for (j = 0; j < numypixels; j++) {
243       for (i = 0; i < numxpixels; i++) {
244          pixerrorbest = 0xffffffff;
245          for (colors = 0; colors < 4; colors++) {
246             colordist = srccolors[j][i][0] - (cv[colors][0]);
247             pixerror = colordist * colordist * REDWEIGHT;
248             pixerrorred = colordist;
249             colordist = srccolors[j][i][1] - (cv[colors][1]);
250             pixerror += colordist * colordist * GREENWEIGHT;
251             pixerrorgreen = colordist;
252             colordist = srccolors[j][i][2] - (cv[colors][2]);
253             pixerror += colordist * colordist * BLUEWEIGHT;
254             pixerrorblue = colordist;
255             if (pixerror < pixerrorbest) {
256                enc = colors;
257                pixerrorbest = pixerror;
258                pixerrorcolorbest[0] = pixerrorred;
259                pixerrorcolorbest[1] = pixerrorgreen;
260                pixerrorcolorbest[2] = pixerrorblue;
261             }
262          }
263          if (enc == 0) {
264             for (z = 0; z < 3; z++) {
265                blockerrlin[0][z] += 3 * pixerrorcolorbest[z];
266             }
267             nrcolor[0] += 3;
268          }
269          else if (enc == 2) {
270             for (z = 0; z < 3; z++) {
271                blockerrlin[0][z] += 2 * pixerrorcolorbest[z];
272             }
273             nrcolor[0] += 2;
274             for (z = 0; z < 3; z++) {
275                blockerrlin[1][z] += 1 * pixerrorcolorbest[z];
276             }
277             nrcolor[1] += 1;
278          }
279          else if (enc == 3) {
280             for (z = 0; z < 3; z++) {
281                blockerrlin[0][z] += 1 * pixerrorcolorbest[z];
282             }
283             nrcolor[0] += 1;
284             for (z = 0; z < 3; z++) {
285                blockerrlin[1][z] += 2 * pixerrorcolorbest[z];
286             }
287             nrcolor[1] += 2;
288          }
289          else if (enc == 1) {
290             for (z = 0; z < 3; z++) {
291                blockerrlin[1][z] += 3 * pixerrorcolorbest[z];
292             }
293             nrcolor[1] += 3;
294          }
295       }
296    }
297    if (nrcolor[0] == 0) nrcolor[0] = 1;
298    if (nrcolor[1] == 0) nrcolor[1] = 1;
299    for (j = 0; j < 2; j++) {
300       for (i = 0; i < 3; i++) {
301 	 GLint newvalue = testcolor[j][i] + blockerrlin[j][i] / nrcolor[j];
302 	 if (newvalue <= 0)
303 	    testcolor[j][i] = 0;
304 	 else if (newvalue >= 255)
305 	    testcolor[j][i] = 255;
306 	 else testcolor[j][i] = newvalue;
307       }
308    }
309 
310    if ((abs(testcolor[0][0] - testcolor[1][0]) < 8) &&
311        (abs(testcolor[0][1] - testcolor[1][1]) < 4) &&
312        (abs(testcolor[0][2] - testcolor[1][2]) < 8)) {
313        /* both colors are so close they might get encoded as the same 16bit values */
314       GLubyte coldiffred, coldiffgreen, coldiffblue, coldiffmax, factor, ind0, ind1;
315 
316       coldiffred = abs(testcolor[0][0] - testcolor[1][0]);
317       coldiffgreen = 2 * abs(testcolor[0][1] - testcolor[1][1]);
318       coldiffblue = abs(testcolor[0][2] - testcolor[1][2]);
319       coldiffmax = coldiffred;
320       if (coldiffmax < coldiffgreen) coldiffmax = coldiffgreen;
321       if (coldiffmax < coldiffblue) coldiffmax = coldiffblue;
322       if (coldiffmax > 0) {
323          if (coldiffmax > 4) factor = 2;
324          else if (coldiffmax > 2) factor = 3;
325          else factor = 4;
326          /* Won't do much if the color value is near 255... */
327          /* argh so many ifs */
328          if (testcolor[1][1] >= testcolor[0][1]) {
329             ind1 = 1; ind0 = 0;
330          }
331          else {
332             ind1 = 0; ind0 = 1;
333          }
334          if ((testcolor[ind1][1] + factor * coldiffgreen) <= 255)
335             testcolor[ind1][1] += factor * coldiffgreen;
336          else testcolor[ind1][1] = 255;
337          if ((testcolor[ind1][0] - testcolor[ind0][1]) > 0) {
338             if ((testcolor[ind1][0] + factor * coldiffred) <= 255)
339                testcolor[ind1][0] += factor * coldiffred;
340             else testcolor[ind1][0] = 255;
341          }
342          else {
343             if ((testcolor[ind0][0] + factor * coldiffred) <= 255)
344                testcolor[ind0][0] += factor * coldiffred;
345             else testcolor[ind0][0] = 255;
346          }
347          if ((testcolor[ind1][2] - testcolor[ind0][2]) > 0) {
348             if ((testcolor[ind1][2] + factor * coldiffblue) <= 255)
349                testcolor[ind1][2] += factor * coldiffblue;
350             else testcolor[ind1][2] = 255;
351          }
352          else {
353             if ((testcolor[ind0][2] + factor * coldiffblue) <= 255)
354                testcolor[ind0][2] += factor * coldiffblue;
355             else testcolor[ind0][2] = 255;
356          }
357       }
358    }
359 
360    if (((testcolor[0][0] & 0xf8) << 8 | (testcolor[0][1] & 0xfc) << 3 | testcolor[0][2] >> 3) <
361       ((testcolor[1][0] & 0xf8) << 8 | (testcolor[1][1] & 0xfc) << 3 | testcolor[1][2]) >> 3) {
362       for (i = 0; i < 3; i++) {
363          bestcolor[0][i] = testcolor[0][i];
364          bestcolor[1][i] = testcolor[1][i];
365       }
366    }
367    else {
368       for (i = 0; i < 3; i++) {
369          bestcolor[0][i] = testcolor[1][i];
370          bestcolor[1][i] = testcolor[0][i];
371       }
372    }
373 
374 /*     fprintf(stderr, "color end 0 r/g/b %d/%d/%d, 1 r/g/b %d/%d/%d\n",
375      bestcolor[0][0], bestcolor[0][1], bestcolor[0][2], bestcolor[1][0], bestcolor[1][1], bestcolor[1][2]);*/
376 }
377 
378 
379 
storedxtencodedblock(GLubyte * blkaddr,GLubyte srccolors[4][4][4],GLubyte * bestcolor[2],GLint numxpixels,GLint numypixels,GLuint type,GLboolean haveAlpha)380 static void storedxtencodedblock( GLubyte *blkaddr, GLubyte srccolors[4][4][4], GLubyte *bestcolor[2],
381                            GLint numxpixels, GLint numypixels, GLuint type, GLboolean haveAlpha)
382 {
383    /* use same luminance-weighted distance metric to determine encoding as for finding the base colors */
384 
385    GLint i, j, colors;
386    GLuint testerror, testerror2, pixerror, pixerrorbest;
387    GLint colordist;
388    GLushort color0, color1, tempcolor;
389    GLuint bits = 0, bits2 = 0;
390    GLubyte *colorptr;
391    GLubyte enc = 0;
392    GLubyte cv[4][4];
393 
394    bestcolor[0][0] = bestcolor[0][0] & 0xf8;
395    bestcolor[0][1] = bestcolor[0][1] & 0xfc;
396    bestcolor[0][2] = bestcolor[0][2] & 0xf8;
397    bestcolor[1][0] = bestcolor[1][0] & 0xf8;
398    bestcolor[1][1] = bestcolor[1][1] & 0xfc;
399    bestcolor[1][2] = bestcolor[1][2] & 0xf8;
400 
401    color0 = bestcolor[0][0] << 8 | bestcolor[0][1] << 3 | bestcolor[0][2] >> 3;
402    color1 = bestcolor[1][0] << 8 | bestcolor[1][1] << 3 | bestcolor[1][2] >> 3;
403    if (color0 < color1) {
404       tempcolor = color0; color0 = color1; color1 = tempcolor;
405       colorptr = bestcolor[0]; bestcolor[0] = bestcolor[1]; bestcolor[1] = colorptr;
406    }
407 
408 
409    for (i = 0; i < 3; i++) {
410       cv[0][i] = bestcolor[0][i];
411       cv[1][i] = bestcolor[1][i];
412       cv[2][i] = (bestcolor[0][i] * 2 + bestcolor[1][i]) / 3;
413       cv[3][i] = (bestcolor[0][i] + bestcolor[1][i] * 2) / 3;
414    }
415 
416    testerror = 0;
417    for (j = 0; j < numypixels; j++) {
418       for (i = 0; i < numxpixels; i++) {
419          pixerrorbest = 0xffffffff;
420          for (colors = 0; colors < 4; colors++) {
421             colordist = srccolors[j][i][0] - cv[colors][0];
422             pixerror = colordist * colordist * REDWEIGHT;
423             colordist = srccolors[j][i][1] - cv[colors][1];
424             pixerror += colordist * colordist * GREENWEIGHT;
425             colordist = srccolors[j][i][2] - cv[colors][2];
426             pixerror += colordist * colordist * BLUEWEIGHT;
427             if (pixerror < pixerrorbest) {
428                pixerrorbest = pixerror;
429                enc = colors;
430             }
431          }
432          testerror += pixerrorbest;
433          bits |= (uint32_t)enc << (2 * (j * 4 + i));
434       }
435    }
436    /* some hw might disagree but actually decoding should always use 4-color encoding
437       for non-dxt1 formats */
438    if (type == GL_COMPRESSED_RGB_S3TC_DXT1_EXT || type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
439       for (i = 0; i < 3; i++) {
440          cv[2][i] = (bestcolor[0][i] + bestcolor[1][i]) / 2;
441          /* this isn't used. Looks like the black color constant can only be used
442             with RGB_DXT1 if I read the spec correctly (note though that the radeon gpu disagrees,
443             it will decode 3 to black even with DXT3/5), and due to how the color searching works
444             it won't get used even then */
445          cv[3][i] = 0;
446       }
447       testerror2 = 0;
448       for (j = 0; j < numypixels; j++) {
449          for (i = 0; i < numxpixels; i++) {
450             pixerrorbest = 0xffffffff;
451             if ((type == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) && (srccolors[j][i][3] <= ALPHACUT)) {
452                enc = 3;
453                pixerrorbest = 0; /* don't calculate error */
454             }
455             else {
456                /* we're calculating the same what we have done already for colors 0-1 above... */
457                for (colors = 0; colors < 3; colors++) {
458                   colordist = srccolors[j][i][0] - cv[colors][0];
459                   pixerror = colordist * colordist * REDWEIGHT;
460                   colordist = srccolors[j][i][1] - cv[colors][1];
461                   pixerror += colordist * colordist * GREENWEIGHT;
462                   colordist = srccolors[j][i][2] - cv[colors][2];
463                   pixerror += colordist * colordist * BLUEWEIGHT;
464                   if (pixerror < pixerrorbest) {
465                      pixerrorbest = pixerror;
466                      /* need to exchange colors later */
467                      if (colors > 1) enc = colors;
468                      else enc = colors ^ 1;
469                   }
470                }
471             }
472             testerror2 += pixerrorbest;
473             bits2 |= (uint32_t)enc << (2 * (j * 4 + i));
474          }
475       }
476    } else {
477       testerror2 = 0xffffffff;
478    }
479 
480    /* finally we're finished, write back colors and bits */
481    if ((testerror > testerror2) || (haveAlpha)) {
482       *blkaddr++ = color1 & 0xff;
483       *blkaddr++ = color1 >> 8;
484       *blkaddr++ = color0 & 0xff;
485       *blkaddr++ = color0 >> 8;
486       *blkaddr++ = bits2 & 0xff;
487       *blkaddr++ = ( bits2 >> 8) & 0xff;
488       *blkaddr++ = ( bits2 >> 16) & 0xff;
489       *blkaddr = bits2 >> 24;
490    }
491    else {
492       *blkaddr++ = color0 & 0xff;
493       *blkaddr++ = color0 >> 8;
494       *blkaddr++ = color1 & 0xff;
495       *blkaddr++ = color1 >> 8;
496       *blkaddr++ = bits & 0xff;
497       *blkaddr++ = ( bits >> 8) & 0xff;
498       *blkaddr++ = ( bits >> 16) & 0xff;
499       *blkaddr = bits >> 24;
500    }
501 }
502 
encodedxtcolorblockfaster(GLubyte * blkaddr,GLubyte srccolors[4][4][4],GLint numxpixels,GLint numypixels,GLuint type)503 static void encodedxtcolorblockfaster( GLubyte *blkaddr, GLubyte srccolors[4][4][4],
504                          GLint numxpixels, GLint numypixels, GLuint type )
505 {
506 /* simplistic approach. We need two base colors, simply use the "highest" and the "lowest" color
507    present in the picture as base colors */
508 
509    /* define lowest and highest color as shortest and longest vector to 0/0/0, though the
510       vectors are weighted similar to their importance in rgb-luminance conversion
511       doesn't work too well though...
512       This seems to be a rather difficult problem */
513 
514    GLubyte *bestcolor[2];
515    GLubyte basecolors[2][3];
516    GLubyte i, j;
517    GLuint lowcv, highcv, testcv;
518    GLboolean haveAlpha = GL_FALSE;
519 
520    lowcv = highcv = srccolors[0][0][0] * srccolors[0][0][0] * REDWEIGHT +
521                           srccolors[0][0][1] * srccolors[0][0][1] * GREENWEIGHT +
522                           srccolors[0][0][2] * srccolors[0][0][2] * BLUEWEIGHT;
523    bestcolor[0] = bestcolor[1] = srccolors[0][0];
524    for (j = 0; j < numypixels; j++) {
525       for (i = 0; i < numxpixels; i++) {
526          /* don't use this as a base color if the pixel will get black/transparent anyway */
527          if ((type != GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) || (srccolors[j][i][3] > ALPHACUT)) {
528             testcv = srccolors[j][i][0] * srccolors[j][i][0] * REDWEIGHT +
529                      srccolors[j][i][1] * srccolors[j][i][1] * GREENWEIGHT +
530                      srccolors[j][i][2] * srccolors[j][i][2] * BLUEWEIGHT;
531             if (testcv > highcv) {
532                highcv = testcv;
533                bestcolor[1] = srccolors[j][i];
534             }
535             else if (testcv < lowcv) {
536                lowcv = testcv;
537                bestcolor[0] = srccolors[j][i];
538             }
539          }
540          else haveAlpha = GL_TRUE;
541       }
542    }
543    /* make sure the original color values won't get touched... */
544    for (j = 0; j < 2; j++) {
545       for (i = 0; i < 3; i++) {
546          basecolors[j][i] = bestcolor[j][i];
547       }
548    }
549    bestcolor[0] = basecolors[0];
550    bestcolor[1] = basecolors[1];
551 
552    /* try to find better base colors */
553    fancybasecolorsearch(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
554    /* find the best encoding for these colors, and store the result */
555    storedxtencodedblock(blkaddr, srccolors, bestcolor, numxpixels, numypixels, type, haveAlpha);
556 }
557 
writedxt5encodedalphablock(GLubyte * blkaddr,GLubyte alphabase1,GLubyte alphabase2,GLubyte alphaenc[16])558 static void writedxt5encodedalphablock( GLubyte *blkaddr, GLubyte alphabase1, GLubyte alphabase2,
559                          GLubyte alphaenc[16])
560 {
561    *blkaddr++ = alphabase1;
562    *blkaddr++ = alphabase2;
563    *blkaddr++ = alphaenc[0] | (alphaenc[1] << 3) | ((alphaenc[2] & 3) << 6);
564    *blkaddr++ = (alphaenc[2] >> 2) | (alphaenc[3] << 1) | (alphaenc[4] << 4) | ((alphaenc[5] & 1) << 7);
565    *blkaddr++ = (alphaenc[5] >> 1) | (alphaenc[6] << 2) | (alphaenc[7] << 5);
566    *blkaddr++ = alphaenc[8] | (alphaenc[9] << 3) | ((alphaenc[10] & 3) << 6);
567    *blkaddr++ = (alphaenc[10] >> 2) | (alphaenc[11] << 1) | (alphaenc[12] << 4) | ((alphaenc[13] & 1) << 7);
568    *blkaddr++ = (alphaenc[13] >> 1) | (alphaenc[14] << 2) | (alphaenc[15] << 5);
569 }
570 
encodedxt5alpha(GLubyte * blkaddr,GLubyte srccolors[4][4][4],GLint numxpixels,GLint numypixels)571 static void encodedxt5alpha(GLubyte *blkaddr, GLubyte srccolors[4][4][4],
572                             GLint numxpixels, GLint numypixels)
573 {
574    GLubyte alphabase[2], alphause[2];
575    GLshort alphatest[2];
576    GLuint alphablockerror1, alphablockerror2, alphablockerror3;
577    GLubyte i, j, aindex, acutValues[7];
578    GLubyte alphaenc1[16], alphaenc2[16], alphaenc3[16];
579    GLboolean alphaabsmin = GL_FALSE;
580    GLboolean alphaabsmax = GL_FALSE;
581    GLshort alphadist;
582 
583    /* find lowest and highest alpha value in block, alphabase[0] lowest, alphabase[1] highest */
584    alphabase[0] = 0xff; alphabase[1] = 0x0;
585    for (j = 0; j < numypixels; j++) {
586       for (i = 0; i < numxpixels; i++) {
587          if (srccolors[j][i][3] == 0)
588             alphaabsmin = GL_TRUE;
589          else if (srccolors[j][i][3] == 255)
590             alphaabsmax = GL_TRUE;
591          else {
592             if (srccolors[j][i][3] > alphabase[1])
593                alphabase[1] = srccolors[j][i][3];
594             if (srccolors[j][i][3] < alphabase[0])
595                alphabase[0] = srccolors[j][i][3];
596          }
597       }
598    }
599 
600 
601    if ((alphabase[0] > alphabase[1]) && !(alphaabsmin && alphaabsmax)) { /* one color, either max or min */
602       /* shortcut here since it is a very common case (and also avoids later problems) */
603       /* || (alphabase[0] == alphabase[1] && !alphaabsmin && !alphaabsmax) */
604       /* could also thest for alpha0 == alpha1 (and not min/max), but probably not common, so don't bother */
605 
606       *blkaddr++ = srccolors[0][0][3];
607       blkaddr++;
608       *blkaddr++ = 0;
609       *blkaddr++ = 0;
610       *blkaddr++ = 0;
611       *blkaddr++ = 0;
612       *blkaddr++ = 0;
613       *blkaddr++ = 0;
614 /*      fprintf(stderr, "enc0 used\n");*/
615       return;
616    }
617 
618    /* find best encoding for alpha0 > alpha1 */
619    /* it's possible this encoding is better even if both alphaabsmin and alphaabsmax are true */
620    alphablockerror1 = 0x0;
621    alphablockerror2 = 0xffffffff;
622    alphablockerror3 = 0xffffffff;
623    if (alphaabsmin) alphause[0] = 0;
624    else alphause[0] = alphabase[0];
625    if (alphaabsmax) alphause[1] = 255;
626    else alphause[1] = alphabase[1];
627    /* calculate the 7 cut values, just the middle between 2 of the computed alpha values */
628    for (aindex = 0; aindex < 7; aindex++) {
629       /* don't forget here is always rounded down */
630       acutValues[aindex] = (alphause[0] * (2*aindex + 1) + alphause[1] * (14 - (2*aindex + 1))) / 14;
631    }
632 
633    for (j = 0; j < numypixels; j++) {
634       for (i = 0; i < numxpixels; i++) {
635          /* maybe it's overkill to have the most complicated calculation just for the error
636             calculation which we only need to figure out if encoding1 or encoding2 is better... */
637          if (srccolors[j][i][3] > acutValues[0]) {
638             alphaenc1[4*j + i] = 0;
639             alphadist = srccolors[j][i][3] - alphause[1];
640          }
641          else if (srccolors[j][i][3] > acutValues[1]) {
642             alphaenc1[4*j + i] = 2;
643             alphadist = srccolors[j][i][3] - (alphause[1] * 6 + alphause[0] * 1) / 7;
644          }
645          else if (srccolors[j][i][3] > acutValues[2]) {
646             alphaenc1[4*j + i] = 3;
647             alphadist = srccolors[j][i][3] - (alphause[1] * 5 + alphause[0] * 2) / 7;
648          }
649          else if (srccolors[j][i][3] > acutValues[3]) {
650             alphaenc1[4*j + i] = 4;
651             alphadist = srccolors[j][i][3] - (alphause[1] * 4 + alphause[0] * 3) / 7;
652          }
653          else if (srccolors[j][i][3] > acutValues[4]) {
654             alphaenc1[4*j + i] = 5;
655             alphadist = srccolors[j][i][3] - (alphause[1] * 3 + alphause[0] * 4) / 7;
656          }
657          else if (srccolors[j][i][3] > acutValues[5]) {
658             alphaenc1[4*j + i] = 6;
659             alphadist = srccolors[j][i][3] - (alphause[1] * 2 + alphause[0] * 5) / 7;
660          }
661          else if (srccolors[j][i][3] > acutValues[6]) {
662             alphaenc1[4*j + i] = 7;
663             alphadist = srccolors[j][i][3] - (alphause[1] * 1 + alphause[0] * 6) / 7;
664          }
665          else {
666             alphaenc1[4*j + i] = 1;
667             alphadist = srccolors[j][i][3] - alphause[0];
668          }
669          alphablockerror1 += alphadist * alphadist;
670       }
671    }
672 /*      for (i = 0; i < 16; i++) {
673          fprintf(stderr, "%d ", alphaenc1[i]);
674       }
675       fprintf(stderr, "cutVals ");
676       for (i = 0; i < 8; i++) {
677          fprintf(stderr, "%d ", acutValues[i]);
678       }
679       fprintf(stderr, "srcVals ");
680       for (j = 0; j < numypixels; j++)
681          for (i = 0; i < numxpixels; i++) {
682             fprintf(stderr, "%d ", srccolors[j][i][3]);
683          }
684 
685       fprintf(stderr, "\n");
686    }*/
687    /* it's not very likely this encoding is better if both alphaabsmin and alphaabsmax
688       are false but try it anyway */
689    if (alphablockerror1 >= 32) {
690 
691       /* don't bother if encoding is already very good, this condition should also imply
692       we have valid alphabase colors which we absolutely need (alphabase[0] <= alphabase[1]) */
693       alphablockerror2 = 0;
694       for (aindex = 0; aindex < 5; aindex++) {
695          /* don't forget here is always rounded down */
696          acutValues[aindex] = (alphabase[0] * (10 - (2*aindex + 1)) + alphabase[1] * (2*aindex + 1)) / 10;
697       }
698       for (j = 0; j < numypixels; j++) {
699          for (i = 0; i < numxpixels; i++) {
700              /* maybe it's overkill to have the most complicated calculation just for the error
701                calculation which we only need to figure out if encoding1 or encoding2 is better... */
702             if (srccolors[j][i][3] == 0) {
703                alphaenc2[4*j + i] = 6;
704                alphadist = 0;
705             }
706             else if (srccolors[j][i][3] == 255) {
707                alphaenc2[4*j + i] = 7;
708                alphadist = 0;
709             }
710             else if (srccolors[j][i][3] <= acutValues[0]) {
711                alphaenc2[4*j + i] = 0;
712                alphadist = srccolors[j][i][3] - alphabase[0];
713             }
714             else if (srccolors[j][i][3] <= acutValues[1]) {
715                alphaenc2[4*j + i] = 2;
716                alphadist = srccolors[j][i][3] - (alphabase[0] * 4 + alphabase[1] * 1) / 5;
717             }
718             else if (srccolors[j][i][3] <= acutValues[2]) {
719                alphaenc2[4*j + i] = 3;
720                alphadist = srccolors[j][i][3] - (alphabase[0] * 3 + alphabase[1] * 2) / 5;
721             }
722             else if (srccolors[j][i][3] <= acutValues[3]) {
723                alphaenc2[4*j + i] = 4;
724                alphadist = srccolors[j][i][3] - (alphabase[0] * 2 + alphabase[1] * 3) / 5;
725             }
726             else if (srccolors[j][i][3] <= acutValues[4]) {
727                alphaenc2[4*j + i] = 5;
728                alphadist = srccolors[j][i][3] - (alphabase[0] * 1 + alphabase[1] * 4) / 5;
729             }
730             else {
731                alphaenc2[4*j + i] = 1;
732                alphadist = srccolors[j][i][3] - alphabase[1];
733             }
734             alphablockerror2 += alphadist * alphadist;
735          }
736       }
737 
738 
739       /* skip this if the error is already very small
740          this encoding is MUCH better on average than #2 though, but expensive! */
741       if ((alphablockerror2 > 96) && (alphablockerror1 > 96)) {
742          GLshort blockerrlin1 = 0;
743          GLshort blockerrlin2 = 0;
744          GLubyte nralphainrangelow = 0;
745          GLubyte nralphainrangehigh = 0;
746          alphatest[0] = 0xff;
747          alphatest[1] = 0x0;
748          /* if we have large range it's likely there are values close to 0/255, try to map them to 0/255 */
749          for (j = 0; j < numypixels; j++) {
750             for (i = 0; i < numxpixels; i++) {
751                if ((srccolors[j][i][3] > alphatest[1]) && (srccolors[j][i][3] < (255 -(alphabase[1] - alphabase[0]) / 28)))
752                   alphatest[1] = srccolors[j][i][3];
753                if ((srccolors[j][i][3] < alphatest[0]) && (srccolors[j][i][3] > (alphabase[1] - alphabase[0]) / 28))
754                   alphatest[0] = srccolors[j][i][3];
755             }
756          }
757           /* shouldn't happen too often, don't really care about those degenerated cases */
758           if (alphatest[1] <= alphatest[0]) {
759              alphatest[0] = 1;
760              alphatest[1] = 254;
761 /*             fprintf(stderr, "only 1 or 0 colors for encoding!\n");*/
762          }
763          for (aindex = 0; aindex < 5; aindex++) {
764          /* don't forget here is always rounded down */
765             acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
766          }
767 
768          /* find the "average" difference between the alpha values and the next encoded value.
769             This is then used to calculate new base values.
770             Should there be some weighting, i.e. those values closer to alphatest[x] have more weight,
771             since they will see more improvement, and also because the values in the middle are somewhat
772             likely to get no improvement at all (because the base values might move in different directions)?
773             OTOH it would mean the values in the middle are even less likely to get an improvement
774          */
775          for (j = 0; j < numypixels; j++) {
776             for (i = 0; i < numxpixels; i++) {
777                if (srccolors[j][i][3] <= alphatest[0] / 2) {
778                }
779                else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
780                }
781                else if (srccolors[j][i][3] <= acutValues[0]) {
782                   blockerrlin1 += (srccolors[j][i][3] - alphatest[0]);
783                   nralphainrangelow += 1;
784                }
785                else if (srccolors[j][i][3] <= acutValues[1]) {
786                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
787                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5);
788                   nralphainrangelow += 1;
789                   nralphainrangehigh += 1;
790                }
791                else if (srccolors[j][i][3] <= acutValues[2]) {
792                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
793                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5);
794                   nralphainrangelow += 1;
795                   nralphainrangehigh += 1;
796                }
797                else if (srccolors[j][i][3] <= acutValues[3]) {
798                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
799                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5);
800                   nralphainrangelow += 1;
801                   nralphainrangehigh += 1;
802                }
803                else if (srccolors[j][i][3] <= acutValues[4]) {
804                   blockerrlin1 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
805                   blockerrlin2 += (srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5);
806                   nralphainrangelow += 1;
807                   nralphainrangehigh += 1;
808                   }
809                else {
810                   blockerrlin2 += (srccolors[j][i][3] - alphatest[1]);
811                   nralphainrangehigh += 1;
812                }
813             }
814          }
815          /* shouldn't happen often, needed to avoid div by zero */
816          if (nralphainrangelow == 0) nralphainrangelow = 1;
817          if (nralphainrangehigh == 0) nralphainrangehigh = 1;
818          alphatest[0] = alphatest[0] + (blockerrlin1 / nralphainrangelow);
819 /*         fprintf(stderr, "block err lin low %d, nr %d\n", blockerrlin1, nralphainrangelow);
820          fprintf(stderr, "block err lin high %d, nr %d\n", blockerrlin2, nralphainrangehigh);*/
821          /* again shouldn't really happen often... */
822          if (alphatest[0] < 0) {
823             alphatest[0] = 0;
824 /*            fprintf(stderr, "adj alpha base val to 0\n");*/
825          }
826          alphatest[1] = alphatest[1] + (blockerrlin2 / nralphainrangehigh);
827          if (alphatest[1] > 255) {
828             alphatest[1] = 255;
829 /*            fprintf(stderr, "adj alpha base val to 255\n");*/
830          }
831 
832          alphablockerror3 = 0;
833          for (aindex = 0; aindex < 5; aindex++) {
834          /* don't forget here is always rounded down */
835             acutValues[aindex] = (alphatest[0] * (10 - (2*aindex + 1)) + alphatest[1] * (2*aindex + 1)) / 10;
836          }
837          for (j = 0; j < numypixels; j++) {
838             for (i = 0; i < numxpixels; i++) {
839                 /* maybe it's overkill to have the most complicated calculation just for the error
840                   calculation which we only need to figure out if encoding1 or encoding2 is better... */
841                if (srccolors[j][i][3] <= alphatest[0] / 2) {
842                   alphaenc3[4*j + i] = 6;
843                   alphadist = srccolors[j][i][3];
844                }
845                else if (srccolors[j][i][3] > ((255 + alphatest[1]) / 2)) {
846                   alphaenc3[4*j + i] = 7;
847                   alphadist = 255 - srccolors[j][i][3];
848                }
849                else if (srccolors[j][i][3] <= acutValues[0]) {
850                   alphaenc3[4*j + i] = 0;
851                   alphadist = srccolors[j][i][3] - alphatest[0];
852                }
853                else if (srccolors[j][i][3] <= acutValues[1]) {
854                  alphaenc3[4*j + i] = 2;
855                  alphadist = srccolors[j][i][3] - (alphatest[0] * 4 + alphatest[1] * 1) / 5;
856                }
857                else if (srccolors[j][i][3] <= acutValues[2]) {
858                   alphaenc3[4*j + i] = 3;
859                   alphadist = srccolors[j][i][3] - (alphatest[0] * 3 + alphatest[1] * 2) / 5;
860                }
861                else if (srccolors[j][i][3] <= acutValues[3]) {
862                   alphaenc3[4*j + i] = 4;
863                   alphadist = srccolors[j][i][3] - (alphatest[0] * 2 + alphatest[1] * 3) / 5;
864                }
865                else if (srccolors[j][i][3] <= acutValues[4]) {
866                   alphaenc3[4*j + i] = 5;
867                   alphadist = srccolors[j][i][3] - (alphatest[0] * 1 + alphatest[1] * 4) / 5;
868                }
869                else {
870                   alphaenc3[4*j + i] = 1;
871                   alphadist = srccolors[j][i][3] - alphatest[1];
872                }
873                alphablockerror3 += alphadist * alphadist;
874             }
875          }
876       }
877    }
878   /* write the alpha values and encoding back. */
879    if ((alphablockerror1 <= alphablockerror2) && (alphablockerror1 <= alphablockerror3)) {
880 /*      if (alphablockerror1 > 96) fprintf(stderr, "enc1 used, error %d\n", alphablockerror1);*/
881       writedxt5encodedalphablock( blkaddr, alphause[1], alphause[0], alphaenc1 );
882    }
883    else if (alphablockerror2 <= alphablockerror3) {
884 /*      if (alphablockerror2 > 96) fprintf(stderr, "enc2 used, error %d\n", alphablockerror2);*/
885       writedxt5encodedalphablock( blkaddr, alphabase[0], alphabase[1], alphaenc2 );
886    }
887    else {
888 /*      fprintf(stderr, "enc3 used, error %d\n", alphablockerror3);*/
889       writedxt5encodedalphablock( blkaddr, (GLubyte)alphatest[0], (GLubyte)alphatest[1], alphaenc3 );
890    }
891 }
892 
extractsrccolors(GLubyte srcpixels[4][4][4],const GLchan * srcaddr,GLint srcRowStride,GLint numxpixels,GLint numypixels,GLint comps)893 static void extractsrccolors( GLubyte srcpixels[4][4][4], const GLchan *srcaddr,
894                          GLint srcRowStride, GLint numxpixels, GLint numypixels, GLint comps)
895 {
896    GLubyte i, j, c;
897    const GLchan *curaddr;
898    for (j = 0; j < numypixels; j++) {
899       curaddr = srcaddr + j * srcRowStride * comps;
900       for (i = 0; i < numxpixels; i++) {
901          for (c = 0; c < comps; c++) {
902             srcpixels[j][i][c] = *curaddr++ / (CHAN_MAX / 255);
903          }
904       }
905    }
906 }
907 
908 
909 static void
tx_compress_dxt1(int srccomps,int width,int height,const GLubyte * srcPixData,GLubyte * dest,int dstRowStride,unsigned dstComps)910 tx_compress_dxt1(int srccomps, int width, int height,
911                  const GLubyte *srcPixData, GLubyte *dest, int dstRowStride,
912                  unsigned dstComps)
913 {
914    GLenum destFormat = dstComps == 3 ? GL_COMPRESSED_RGB_S3TC_DXT1_EXT
915                                      : GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
916    GLubyte *blkaddr = dest;
917    GLubyte srcpixels[4][4][4];
918    const GLchan *srcaddr = srcPixData;
919    int numxpixels, numypixels;
920 
921    /* hmm we used to get called without dstRowStride... */
922    int dstRowDiff = dstRowStride >= (width * 2) ?
923                     dstRowStride - (((width + 3) & ~3) * 2) : 0;
924    /* fprintf(stderr, "dxt1 tex width %d tex height %d dstRowStride %d\n",
925               width, height, dstRowStride); */
926    for (int j = 0; j < height; j += 4) {
927       if (height > j + 3) numypixels = 4;
928       else numypixels = height - j;
929       srcaddr = srcPixData + j * width * srccomps;
930       for (int i = 0; i < width; i += 4) {
931          if (width > i + 3) numxpixels = 4;
932          else numxpixels = width - i;
933          extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
934          encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
935          srcaddr += srccomps * numxpixels;
936          blkaddr += 8;
937       }
938       blkaddr += dstRowDiff;
939    }
940 }
941 
942 static void
tx_compress_dxt3(int srccomps,int width,int height,const GLubyte * srcPixData,GLubyte * dest,int dstRowStride)943 tx_compress_dxt3(int srccomps, int width, int height,
944                  const GLubyte *srcPixData, GLubyte *dest, int dstRowStride)
945 {
946    GLenum destFormat = GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
947    GLubyte *blkaddr = dest;
948    GLubyte srcpixels[4][4][4];
949    const GLchan *srcaddr = srcPixData;
950    int numxpixels, numypixels;
951 
952    int dstRowDiff = dstRowStride >= (width * 4) ?
953                     dstRowStride - (((width + 3) & ~3) * 4) : 0;
954    /* fprintf(stderr, "dxt3 tex width %d tex height %d dstRowStride %d\n",
955               width, height, dstRowStride); */
956    for (int j = 0; j < height; j += 4) {
957       if (height > j + 3) numypixels = 4;
958       else numypixels = height - j;
959       srcaddr = srcPixData + j * width * srccomps;
960       for (int i = 0; i < width; i += 4) {
961          if (width > i + 3) numxpixels = 4;
962          else numxpixels = width - i;
963          extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
964          *blkaddr++ = (srcpixels[0][0][3] >> 4) | (srcpixels[0][1][3] & 0xf0);
965          *blkaddr++ = (srcpixels[0][2][3] >> 4) | (srcpixels[0][3][3] & 0xf0);
966          *blkaddr++ = (srcpixels[1][0][3] >> 4) | (srcpixels[1][1][3] & 0xf0);
967          *blkaddr++ = (srcpixels[1][2][3] >> 4) | (srcpixels[1][3][3] & 0xf0);
968          *blkaddr++ = (srcpixels[2][0][3] >> 4) | (srcpixels[2][1][3] & 0xf0);
969          *blkaddr++ = (srcpixels[2][2][3] >> 4) | (srcpixels[2][3][3] & 0xf0);
970          *blkaddr++ = (srcpixels[3][0][3] >> 4) | (srcpixels[3][1][3] & 0xf0);
971          *blkaddr++ = (srcpixels[3][2][3] >> 4) | (srcpixels[3][3][3] & 0xf0);
972          encodedxtcolorblockfaster(blkaddr, srcpixels, numxpixels, numypixels, destFormat);
973          srcaddr += srccomps * numxpixels;
974          blkaddr += 8;
975       }
976       blkaddr += dstRowDiff;
977    }
978 }
979 
980 static void
tx_compress_dxt5(int srccomps,int width,int height,const GLubyte * srcPixData,GLubyte * dest,int dstRowStride)981 tx_compress_dxt5(int srccomps, int width, int height,
982                  const GLubyte *srcPixData, GLubyte *dest, int dstRowStride)
983 {
984    GLenum destFormat = GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
985    GLubyte *blkaddr = dest;
986    GLubyte srcpixels[4][4][4];
987    const GLchan *srcaddr = srcPixData;
988    int numxpixels, numypixels;
989 
990    int dstRowDiff = dstRowStride >= (width * 4) ?
991                     dstRowStride - (((width + 3) & ~3) * 4) : 0;
992    /* fprintf(stderr, "dxt5 tex width %d tex height %d dstRowStride %d\n",
993               width, height, dstRowStride); */
994    for (int j = 0; j < height; j += 4) {
995       if (height > j + 3) numypixels = 4;
996       else numypixels = height - j;
997       srcaddr = srcPixData + j * width * srccomps;
998       for (int i = 0; i < width; i += 4) {
999          if (width > i + 3) numxpixels = 4;
1000          else numxpixels = width - i;
1001          extractsrccolors(srcpixels, srcaddr, width, numxpixels, numypixels, srccomps);
1002          encodedxt5alpha(blkaddr, srcpixels, numxpixels, numypixels);
1003          encodedxtcolorblockfaster(blkaddr + 8, srcpixels, numxpixels, numypixels, destFormat);
1004          srcaddr += srccomps * numxpixels;
1005          blkaddr += 16;
1006       }
1007       blkaddr += dstRowDiff;
1008    }
1009 }
1010 
1011 static void
tx_compress_dxtn(GLint srccomps,GLint width,GLint height,const GLubyte * srcPixData,GLenum destFormat,GLubyte * dest,GLint dstRowStride)1012 tx_compress_dxtn(GLint srccomps, GLint width, GLint height,
1013                  const GLubyte *srcPixData, GLenum destFormat,
1014                  GLubyte *dest, GLint dstRowStride)
1015 {
1016    switch (destFormat) {
1017    case GL_COMPRESSED_RGB_S3TC_DXT1_EXT:
1018       tx_compress_dxt1(srccomps, width, height, srcPixData,
1019                        dest, dstRowStride, 3);
1020       break;
1021    case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
1022       tx_compress_dxt1(srccomps, width, height, srcPixData,
1023                        dest, dstRowStride, 4);
1024       break;
1025    case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
1026       tx_compress_dxt3(srccomps, width, height, srcPixData,
1027                        dest, dstRowStride);
1028       break;
1029    case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
1030       tx_compress_dxt5(srccomps, width, height, srcPixData,
1031                        dest, dstRowStride);
1032       break;
1033    default:
1034       unreachable("unknown DXTn format");
1035    }
1036 }
1037 
1038 #endif
1039