• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2     SDL - Simple DirectMedia Layer
3     Copyright (C) 1997-2012 Sam Lantinga
4 
5     This library is free software; you can redistribute it and/or
6     modify it under the terms of the GNU Lesser General Public
7     License as published by the Free Software Foundation; either
8     version 2.1 of the License, or (at your option) any later version.
9 
10     This library is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13     Lesser General Public License for more details.
14 
15     You should have received a copy of the GNU Lesser General Public
16     License along with this library; if not, write to the Free Software
17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18 
19     Sam Lantinga
20     slouken@libsdl.org
21 */
22 #include "SDL_config.h"
23 
24 /*
25  * RLE encoding for software colorkey and alpha-channel acceleration
26  *
27  * Original version by Sam Lantinga
28  *
29  * Mattias Engdeg�rd (Yorick): Rewrite. New encoding format, encoder and
30  * decoder. Added per-surface alpha blitter. Added per-pixel alpha
31  * format, encoder and blitter.
32  *
33  * Many thanks to Xark and johns for hints, benchmarks and useful comments
34  * leading to this code.
35  *
36  * Welcome to Macro Mayhem.
37  */
38 
39 /*
40  * The encoding translates the image data to a stream of segments of the form
41  *
42  * <skip> <run> <data>
43  *
44  * where <skip> is the number of transparent pixels to skip,
45  *       <run>  is the number of opaque pixels to blit,
46  * and   <data> are the pixels themselves.
47  *
48  * This basic structure is used both for colorkeyed surfaces, used for simple
49  * binary transparency and for per-surface alpha blending, and for surfaces
50  * with per-pixel alpha. The details differ, however:
51  *
52  * Encoding of colorkeyed surfaces:
53  *
54  *   Encoded pixels always have the same format as the target surface.
55  *   <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth
56  *   where they are 16 bit. This makes the pixel data aligned at all times.
57  *   Segments never wrap around from one scan line to the next.
58  *
59  *   The end of the sequence is marked by a zero <skip>,<run> pair at the *
60  *   beginning of a line.
61  *
62  * Encoding of surfaces with per-pixel alpha:
63  *
64  *   The sequence begins with a struct RLEDestFormat describing the target
65  *   pixel format, to provide reliable un-encoding.
66  *
67  *   Each scan line is encoded twice: First all completely opaque pixels,
68  *   encoded in the target format as described above, and then all
69  *   partially transparent (translucent) pixels (where 1 <= alpha <= 254),
70  *   in the following 32-bit format:
71  *
72  *   For 32-bit targets, each pixel has the target RGB format but with
73  *   the alpha value occupying the highest 8 bits. The <skip> and <run>
74  *   counts are 16 bit.
75  *
76  *   For 16-bit targets, each pixel has the target RGB format, but with
77  *   the middle component (usually green) shifted 16 steps to the left,
78  *   and the hole filled with the 5 most significant bits of the alpha value.
79  *   i.e. if the target has the format         rrrrrggggggbbbbb,
80  *   the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb.
81  *   The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit
82  *   for the translucent lines. Two padding bytes may be inserted
83  *   before each translucent line to keep them 32-bit aligned.
84  *
85  *   The end of the sequence is marked by a zero <skip>,<run> pair at the
86  *   beginning of an opaque line.
87  */
88 
89 #include "SDL_video.h"
90 #include "SDL_sysvideo.h"
91 #include "SDL_blit.h"
92 #include "SDL_RLEaccel_c.h"
93 
94 /* Force MMX to 0; this blows up on almost every major compiler now. --ryan. */
95 #if 0 && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && SDL_ASSEMBLY_ROUTINES
96 #define MMX_ASMBLIT
97 #endif
98 
99 #ifdef MMX_ASMBLIT
100 #include "mmx.h"
101 #include "SDL_cpuinfo.h"
102 #endif
103 
104 #ifndef MAX
105 #define MAX(a, b) ((a) > (b) ? (a) : (b))
106 #endif
107 #ifndef MIN
108 #define MIN(a, b) ((a) < (b) ? (a) : (b))
109 #endif
110 
111 #define PIXEL_COPY(to, from, len, bpp)			\
112 do {							\
113     if(bpp == 4) {					\
114 	SDL_memcpy4(to, from, (size_t)(len));		\
115     } else {						\
116 	SDL_memcpy(to, from, (size_t)(len) * (bpp));	\
117     }							\
118 } while(0)
119 
120 /*
121  * Various colorkey blit methods, for opaque and per-surface alpha
122  */
123 
124 #define OPAQUE_BLIT(to, from, length, bpp, alpha)	\
125     PIXEL_COPY(to, from, length, bpp)
126 
127 #ifdef MMX_ASMBLIT
128 
129 #define ALPHA_BLIT32_888MMX(to, from, length, bpp, alpha)	\
130     do {							\
131 	Uint32 *srcp = (Uint32 *)(from);			\
132 	Uint32 *dstp = (Uint32 *)(to);				\
133         int i = 0x00FF00FF;					\
134         movd_m2r(*(&i), mm3);					\
135         punpckldq_r2r(mm3, mm3);				\
136         i = 0xFF000000;						\
137         movd_m2r(*(&i), mm7);					\
138         punpckldq_r2r(mm7, mm7);				\
139         i = alpha | alpha << 16;				\
140         movd_m2r(*(&i), mm4);					\
141         punpckldq_r2r(mm4, mm4);				\
142 	pcmpeqd_r2r(mm5,mm5); /* set mm5 to "1" */		\
143 	pxor_r2r(mm7, mm5); /* make clear alpha mask */		\
144         i = length;						\
145 	if(i & 1) {						\
146           movd_m2r((*srcp), mm1); /* src -> mm1 */		\
147           punpcklbw_r2r(mm1, mm1);				\
148           pand_r2r(mm3, mm1);					\
149 	  movd_m2r((*dstp), mm2); /* dst -> mm2 */		\
150           punpcklbw_r2r(mm2, mm2);				\
151           pand_r2r(mm3, mm2);					\
152 	  psubw_r2r(mm2, mm1);					\
153 	  pmullw_r2r(mm4, mm1);					\
154 	  psrlw_i2r(8, mm1);					\
155 	  paddw_r2r(mm1, mm2);					\
156 	  pand_r2r(mm3, mm2);					\
157 	  packuswb_r2r(mm2, mm2);				\
158 	  pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */		\
159 	  movd_r2m(mm2, *dstp);					\
160 	  ++srcp;						\
161 	  ++dstp;						\
162 	  i--;							\
163 	}							\
164 	for(; i > 0; --i) {					\
165           movq_m2r((*srcp), mm0);				\
166 	  movq_r2r(mm0, mm1);					\
167           punpcklbw_r2r(mm0, mm0);				\
168 	  movq_m2r((*dstp), mm2);				\
169 	  punpckhbw_r2r(mm1, mm1);				\
170 	  movq_r2r(mm2, mm6);					\
171           pand_r2r(mm3, mm0);					\
172           punpcklbw_r2r(mm2, mm2);				\
173 	  pand_r2r(mm3, mm1);					\
174 	  punpckhbw_r2r(mm6, mm6);				\
175           pand_r2r(mm3, mm2);					\
176 	  psubw_r2r(mm2, mm0);					\
177 	  pmullw_r2r(mm4, mm0);					\
178 	  pand_r2r(mm3, mm6);					\
179 	  psubw_r2r(mm6, mm1);					\
180 	  pmullw_r2r(mm4, mm1);					\
181 	  psrlw_i2r(8, mm0);					\
182 	  paddw_r2r(mm0, mm2);					\
183 	  psrlw_i2r(8, mm1);					\
184 	  paddw_r2r(mm1, mm6);					\
185 	  pand_r2r(mm3, mm2);					\
186 	  pand_r2r(mm3, mm6);					\
187 	  packuswb_r2r(mm2, mm2);				\
188 	  packuswb_r2r(mm6, mm6);				\
189 	  psrlq_i2r(32, mm2);					\
190 	  psllq_i2r(32, mm6);					\
191 	  por_r2r(mm6, mm2);					\
192 	  pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */		\
193          movq_r2m(mm2, *dstp);					\
194 	  srcp += 2;						\
195 	  dstp += 2;						\
196 	  i--;							\
197 	}							\
198 	emms();							\
199     } while(0)
200 
201 #define ALPHA_BLIT16_565MMX(to, from, length, bpp, alpha)	\
202     do {						\
203         int i, n = 0;					\
204 	Uint16 *srcp = (Uint16 *)(from);		\
205 	Uint16 *dstp = (Uint16 *)(to);			\
206         Uint32 ALPHA = 0xF800;				\
207 	movd_m2r(*(&ALPHA), mm1);			\
208         punpcklwd_r2r(mm1, mm1);			\
209         punpcklwd_r2r(mm1, mm1);			\
210 	ALPHA = 0x07E0;					\
211 	movd_m2r(*(&ALPHA), mm4);			\
212         punpcklwd_r2r(mm4, mm4);			\
213         punpcklwd_r2r(mm4, mm4);			\
214 	ALPHA = 0x001F;					\
215 	movd_m2r(*(&ALPHA), mm7);			\
216         punpcklwd_r2r(mm7, mm7);			\
217         punpcklwd_r2r(mm7, mm7);			\
218 	alpha &= ~(1+2+4);				\
219         i = (Uint32)alpha | (Uint32)alpha << 16;	\
220         movd_m2r(*(&i), mm0);				\
221         punpckldq_r2r(mm0, mm0);			\
222         ALPHA = alpha >> 3;				\
223         i = ((int)(length) & 3);			\
224 	for(; i > 0; --i) {				\
225 	    Uint32 s = *srcp++;				\
226 	    Uint32 d = *dstp;				\
227 	    s = (s | s << 16) & 0x07e0f81f;		\
228 	    d = (d | d << 16) & 0x07e0f81f;		\
229 	    d += (s - d) * ALPHA >> 5;			\
230 	    d &= 0x07e0f81f;				\
231 	    *dstp++ = d | d >> 16;			\
232 	    n++;					\
233 	}						\
234 	i = (int)(length) - n;				\
235 	for(; i > 0; --i) {				\
236 	  movq_m2r((*dstp), mm3);			\
237 	  movq_m2r((*srcp), mm2);			\
238 	  movq_r2r(mm2, mm5);				\
239 	  pand_r2r(mm1 , mm5);				\
240 	  psrlq_i2r(11, mm5);				\
241 	  movq_r2r(mm3, mm6);				\
242 	  pand_r2r(mm1 , mm6);				\
243 	  psrlq_i2r(11, mm6);				\
244 	  psubw_r2r(mm6, mm5);				\
245 	  pmullw_r2r(mm0, mm5);				\
246 	  psrlw_i2r(8, mm5);				\
247 	  paddw_r2r(mm5, mm6);				\
248 	  psllq_i2r(11, mm6);				\
249 	  pand_r2r(mm1, mm6);				\
250 	  movq_r2r(mm4, mm5);				\
251 	  por_r2r(mm7, mm5);				\
252 	  pand_r2r(mm5, mm3);				\
253 	  por_r2r(mm6, mm3);				\
254 	  movq_r2r(mm2, mm5);				\
255 	  pand_r2r(mm4 , mm5);				\
256 	  psrlq_i2r(5, mm5);				\
257 	  movq_r2r(mm3, mm6);				\
258 	  pand_r2r(mm4 , mm6);				\
259 	  psrlq_i2r(5, mm6);				\
260 	  psubw_r2r(mm6, mm5);				\
261 	  pmullw_r2r(mm0, mm5);				\
262 	  psrlw_i2r(8, mm5);				\
263 	  paddw_r2r(mm5, mm6);				\
264 	  psllq_i2r(5, mm6);				\
265 	  pand_r2r(mm4, mm6);				\
266 	  movq_r2r(mm1, mm5);				\
267 	  por_r2r(mm7, mm5);				\
268 	  pand_r2r(mm5, mm3);				\
269 	  por_r2r(mm6, mm3);				\
270 	  movq_r2r(mm2, mm5);				\
271 	  pand_r2r(mm7 , mm5);				\
272           movq_r2r(mm3, mm6);				\
273 	  pand_r2r(mm7 , mm6);				\
274 	  psubw_r2r(mm6, mm5);				\
275 	  pmullw_r2r(mm0, mm5);				\
276 	  psrlw_i2r(8, mm5);				\
277 	  paddw_r2r(mm5, mm6);				\
278 	  pand_r2r(mm7, mm6);				\
279 	  movq_r2r(mm1, mm5);				\
280 	  por_r2r(mm4, mm5);				\
281 	  pand_r2r(mm5, mm3);				\
282 	  por_r2r(mm6, mm3);				\
283 	  movq_r2m(mm3, *dstp);				\
284 	  srcp += 4;					\
285 	  dstp += 4;					\
286 	  i -= 3;					\
287 	}						\
288 	emms();						\
289     } while(0)
290 
291 #define ALPHA_BLIT16_555MMX(to, from, length, bpp, alpha)	\
292     do {						\
293         int i, n = 0;					\
294 	Uint16 *srcp = (Uint16 *)(from);		\
295 	Uint16 *dstp = (Uint16 *)(to);			\
296         Uint32 ALPHA = 0x7C00;				\
297 	movd_m2r(*(&ALPHA), mm1);			\
298         punpcklwd_r2r(mm1, mm1);			\
299         punpcklwd_r2r(mm1, mm1);			\
300 	ALPHA = 0x03E0;					\
301         movd_m2r(*(&ALPHA), mm4);			\
302         punpcklwd_r2r(mm4, mm4);			\
303         punpcklwd_r2r(mm4, mm4);			\
304 	ALPHA = 0x001F;					\
305 	movd_m2r(*(&ALPHA), mm7);			\
306         punpcklwd_r2r(mm7, mm7);			\
307         punpcklwd_r2r(mm7, mm7);			\
308 	alpha &= ~(1+2+4);				\
309         i = (Uint32)alpha | (Uint32)alpha << 16;	\
310         movd_m2r(*(&i), mm0);				\
311         punpckldq_r2r(mm0, mm0);			\
312         i = ((int)(length) & 3);				\
313         ALPHA = alpha >> 3;				\
314 	for(; i > 0; --i) {				\
315 	    Uint32 s = *srcp++;				\
316 	    Uint32 d = *dstp;				\
317 	    s = (s | s << 16) & 0x03e07c1f;		\
318 	    d = (d | d << 16) & 0x03e07c1f;		\
319 	    d += (s - d) * ALPHA >> 5;			\
320 	    d &= 0x03e07c1f;				\
321 	    *dstp++ = d | d >> 16;			\
322 	    n++;					\
323 	}						\
324 	i = (int)(length) - n;				\
325 	for(; i > 0; --i) {				\
326 	  movq_m2r((*dstp), mm3);			\
327 	  movq_m2r((*srcp), mm2);			\
328 	  movq_r2r(mm2, mm5);				\
329 	  pand_r2r(mm1 , mm5);				\
330 	  psrlq_i2r(10, mm5);				\
331 	  movq_r2r(mm3, mm6);				\
332 	  pand_r2r(mm1 , mm6);				\
333 	  psrlq_i2r(10, mm6);				\
334 	  psubw_r2r(mm6, mm5);				\
335 	  pmullw_r2r(mm0, mm5);				\
336 	  psrlw_i2r(8, mm5);				\
337 	  paddw_r2r(mm5, mm6);				\
338 	  psllq_i2r(10, mm6);				\
339 	  pand_r2r(mm1, mm6);				\
340 	  movq_r2r(mm4, mm5);				\
341 	  por_r2r(mm7, mm5);				\
342 	  pand_r2r(mm5, mm3);				\
343 	  por_r2r(mm6, mm3);				\
344 	  movq_r2r(mm2, mm5);				\
345 	  pand_r2r(mm4 , mm5);				\
346 	  psrlq_i2r(5, mm5);				\
347 	  movq_r2r(mm3, mm6);				\
348 	  pand_r2r(mm4 , mm6);				\
349 	  psrlq_i2r(5, mm6);				\
350 	  psubw_r2r(mm6, mm5);				\
351 	  pmullw_r2r(mm0, mm5);				\
352 	  psrlw_i2r(8, mm5);				\
353 	  paddw_r2r(mm5, mm6);				\
354 	  psllq_i2r(5, mm6);				\
355 	  pand_r2r(mm4, mm6);				\
356 	  movq_r2r(mm1, mm5);				\
357 	  por_r2r(mm7, mm5);				\
358 	  pand_r2r(mm5, mm3);				\
359 	  por_r2r(mm6, mm3);				\
360 	  movq_r2r(mm2, mm5);				\
361 	  pand_r2r(mm7 , mm5);				\
362           movq_r2r(mm3, mm6);				\
363 	  pand_r2r(mm7 , mm6);				\
364 	  psubw_r2r(mm6, mm5);				\
365 	  pmullw_r2r(mm0, mm5);				\
366 	  psrlw_i2r(8, mm5);				\
367 	  paddw_r2r(mm5, mm6);				\
368 	  pand_r2r(mm7, mm6);				\
369 	  movq_r2r(mm1, mm5);				\
370 	  por_r2r(mm4, mm5);				\
371 	  pand_r2r(mm5, mm3);				\
372 	  por_r2r(mm6, mm3);				\
373 	  movq_r2m(mm3, *dstp);				\
374 	  srcp += 4;					\
375 	  dstp += 4;					\
376 	  i -= 3;					\
377 	}						\
378 	emms();						\
379     } while(0)
380 
381 #endif
382 
383 /*
384  * For 32bpp pixels on the form 0x00rrggbb:
385  * If we treat the middle component separately, we can process the two
386  * remaining in parallel. This is safe to do because of the gap to the left
387  * of each component, so the bits from the multiplication don't collide.
388  * This can be used for any RGB permutation of course.
389  */
390 #define ALPHA_BLIT32_888(to, from, length, bpp, alpha)		\
391     do {							\
392         int i;							\
393 	Uint32 *src = (Uint32 *)(from);				\
394 	Uint32 *dst = (Uint32 *)(to);				\
395 	for(i = 0; i < (int)(length); i++) {			\
396 	    Uint32 s = *src++;					\
397 	    Uint32 d = *dst;					\
398 	    Uint32 s1 = s & 0xff00ff;				\
399 	    Uint32 d1 = d & 0xff00ff;				\
400 	    d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;	\
401 	    s &= 0xff00;					\
402 	    d &= 0xff00;					\
403 	    d = (d + ((s - d) * alpha >> 8)) & 0xff00;		\
404 	    *dst++ = d1 | d;					\
405 	}							\
406     } while(0)
407 
408 /*
409  * For 16bpp pixels we can go a step further: put the middle component
410  * in the high 16 bits of a 32 bit word, and process all three RGB
411  * components at the same time. Since the smallest gap is here just
412  * 5 bits, we have to scale alpha down to 5 bits as well.
413  */
414 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha)	\
415     do {						\
416         int i;						\
417 	Uint16 *src = (Uint16 *)(from);			\
418 	Uint16 *dst = (Uint16 *)(to);			\
419 	Uint32 ALPHA = alpha >> 3;			\
420 	for(i = 0; i < (int)(length); i++) {		\
421 	    Uint32 s = *src++;				\
422 	    Uint32 d = *dst;				\
423 	    s = (s | s << 16) & 0x07e0f81f;		\
424 	    d = (d | d << 16) & 0x07e0f81f;		\
425 	    d += (s - d) * ALPHA >> 5;			\
426 	    d &= 0x07e0f81f;				\
427 	    *dst++ = (Uint16)(d | d >> 16);			\
428 	}						\
429     } while(0)
430 
431 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha)	\
432     do {						\
433         int i;						\
434 	Uint16 *src = (Uint16 *)(from);			\
435 	Uint16 *dst = (Uint16 *)(to);			\
436 	Uint32 ALPHA = alpha >> 3;			\
437 	for(i = 0; i < (int)(length); i++) {		\
438 	    Uint32 s = *src++;				\
439 	    Uint32 d = *dst;				\
440 	    s = (s | s << 16) & 0x03e07c1f;		\
441 	    d = (d | d << 16) & 0x03e07c1f;		\
442 	    d += (s - d) * ALPHA >> 5;			\
443 	    d &= 0x03e07c1f;				\
444 	    *dst++ = (Uint16)(d | d >> 16);			\
445 	}						\
446     } while(0)
447 
448 /*
449  * The general slow catch-all function, for remaining depths and formats
450  */
451 #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha)			\
452     do {								\
453         int i;								\
454 	Uint8 *src = from;						\
455 	Uint8 *dst = to;						\
456 	for(i = 0; i < (int)(length); i++) {				\
457 	    Uint32 s, d;						\
458 	    unsigned rs, gs, bs, rd, gd, bd;				\
459 	    switch(bpp) {						\
460 	    case 2:							\
461 		s = *(Uint16 *)src;					\
462 		d = *(Uint16 *)dst;					\
463 		break;							\
464 	    case 3:							\
465 		if(SDL_BYTEORDER == SDL_BIG_ENDIAN) {			\
466 		    s = (src[0] << 16) | (src[1] << 8) | src[2];	\
467 		    d = (dst[0] << 16) | (dst[1] << 8) | dst[2];	\
468 		} else {						\
469 		    s = (src[2] << 16) | (src[1] << 8) | src[0];	\
470 		    d = (dst[2] << 16) | (dst[1] << 8) | dst[0];	\
471 		}							\
472 		break;							\
473 	    case 4:							\
474 		s = *(Uint32 *)src;					\
475 		d = *(Uint32 *)dst;					\
476 		break;							\
477 	    }								\
478 	    RGB_FROM_PIXEL(s, fmt, rs, gs, bs);				\
479 	    RGB_FROM_PIXEL(d, fmt, rd, gd, bd);				\
480 	    rd += (rs - rd) * alpha >> 8;				\
481 	    gd += (gs - gd) * alpha >> 8;				\
482 	    bd += (bs - bd) * alpha >> 8;				\
483 	    PIXEL_FROM_RGB(d, fmt, rd, gd, bd);				\
484 	    switch(bpp) {						\
485 	    case 2:							\
486 		*(Uint16 *)dst = (Uint16)d;					\
487 		break;							\
488 	    case 3:							\
489 		if(SDL_BYTEORDER == SDL_BIG_ENDIAN) {			\
490 		    dst[0] = (Uint8)(d >> 16);					\
491 		    dst[1] = (Uint8)(d >> 8);					\
492 		    dst[2] = (Uint8)(d);						\
493 		} else {						\
494 		    dst[0] = (Uint8)d;						\
495 		    dst[1] = (Uint8)(d >> 8);					\
496 		    dst[2] = (Uint8)(d >> 16);					\
497 		}							\
498 		break;							\
499 	    case 4:							\
500 		*(Uint32 *)dst = d;					\
501 		break;							\
502 	    }								\
503 	    src += bpp;							\
504 	    dst += bpp;							\
505 	}								\
506     } while(0)
507 
508 #ifdef MMX_ASMBLIT
509 
510 #define ALPHA_BLIT32_888_50MMX(to, from, length, bpp, alpha)		\
511     do {								\
512 	Uint32 *srcp = (Uint32 *)(from);				\
513 	Uint32 *dstp = (Uint32 *)(to);					\
514         int i = 0x00fefefe;						\
515         movd_m2r(*(&i), mm4);						\
516         punpckldq_r2r(mm4, mm4);					\
517         i = 0x00010101;							\
518         movd_m2r(*(&i), mm3);						\
519         punpckldq_r2r(mm3, mm3);					\
520         i = (int)(length);						\
521         if( i & 1 ) {							\
522 	  Uint32 s = *srcp++;						\
523 	  Uint32 d = *dstp;						\
524 	  *dstp++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)	\
525 		     + (s & d & 0x00010101);				\
526 	  i--;								\
527 	}								\
528 	for(; i > 0; --i) {						\
529 	    movq_m2r((*dstp), mm2); /* dst -> mm2 */			\
530 	    movq_r2r(mm2, mm6);	/* dst -> mm6 */			\
531 	    movq_m2r((*srcp), mm1); /* src -> mm1 */			\
532 	    movq_r2r(mm1, mm5);	/* src -> mm5 */			\
533 	    pand_r2r(mm4, mm6);	/* dst & 0x00fefefe -> mm6 */		\
534 	    pand_r2r(mm4, mm5); /* src & 0x00fefefe -> mm5 */		\
535 	    paddd_r2r(mm6, mm5); /* (dst & 0x00fefefe) + (dst & 0x00fefefe) -> mm5 */	\
536 	    psrld_i2r(1, mm5);						\
537 	    pand_r2r(mm1, mm2);	/* s & d -> mm2 */			\
538 	    pand_r2r(mm3, mm2);	/* s & d & 0x00010101 -> mm2 */		\
539 	    paddd_r2r(mm5, mm2);					\
540 	    movq_r2m(mm2, (*dstp));					\
541 	    dstp += 2;							\
542 	    srcp += 2;							\
543 	    i--;							\
544 	}								\
545 	emms();								\
546     } while(0)
547 
548 #endif
549 
550 /*
551  * Special case: 50% alpha (alpha=128)
552  * This is treated specially because it can be optimized very well, and
553  * since it is good for many cases of semi-translucency.
554  * The theory is to do all three components at the same time:
555  * First zero the lowest bit of each component, which gives us room to
556  * add them. Then shift right and add the sum of the lowest bits.
557  */
558 #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha)		\
559     do {								\
560         int i;								\
561 	Uint32 *src = (Uint32 *)(from);					\
562 	Uint32 *dst = (Uint32 *)(to);					\
563 	for(i = 0; i < (int)(length); i++) {				\
564 	    Uint32 s = *src++;						\
565 	    Uint32 d = *dst;						\
566 	    *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)	\
567 		     + (s & d & 0x00010101);				\
568 	}								\
569     } while(0)
570 
571 /*
572  * For 16bpp, we can actually blend two pixels in parallel, if we take
573  * care to shift before we add, not after.
574  */
575 
576 /* helper: blend a single 16 bit pixel at 50% */
577 #define BLEND16_50(dst, src, mask)			\
578     do {						\
579 	Uint32 s = *src++;				\
580 	Uint32 d = *dst;				\
581 	*dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) +	\
582 	                  (s & d & (~mask & 0xffff)));		\
583     } while(0)
584 
585 /* basic 16bpp blender. mask is the pixels to keep when adding. */
586 #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask)		\
587     do {								\
588 	unsigned n = (length);						\
589 	Uint16 *src = (Uint16 *)(from);					\
590 	Uint16 *dst = (Uint16 *)(to);					\
591 	if(((uintptr_t)src ^ (uintptr_t)dst) & 3) {			\
592 	    /* source and destination not in phase, blit one by one */	\
593 	    while(n--)							\
594 		BLEND16_50(dst, src, mask);				\
595 	} else {							\
596 	    if((uintptr_t)src & 3) {					\
597 		/* first odd pixel */					\
598 		BLEND16_50(dst, src, mask);				\
599 		n--;							\
600 	    }								\
601 	    for(; n > 1; n -= 2) {					\
602 		Uint32 s = *(Uint32 *)src;				\
603 		Uint32 d = *(Uint32 *)dst;				\
604 		*(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1)	\
605 		               + ((d & (mask | mask << 16)) >> 1)	\
606 		               + (s & d & (~(mask | mask << 16)));	\
607 		src += 2;						\
608 		dst += 2;						\
609 	    }								\
610 	    if(n)							\
611 		BLEND16_50(dst, src, mask); /* last odd pixel */	\
612 	}								\
613     } while(0)
614 
615 #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha)	\
616     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de)
617 
618 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha)	\
619     ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde)
620 
621 #ifdef MMX_ASMBLIT
622 
623 #define CHOOSE_BLIT(blitter, alpha, fmt)				\
624     do {								\
625         if(alpha == 255) {						\
626 	    switch(fmt->BytesPerPixel) {				\
627 	    case 1: blitter(1, Uint8, OPAQUE_BLIT); break;		\
628 	    case 2: blitter(2, Uint8, OPAQUE_BLIT); break;		\
629 	    case 3: blitter(3, Uint8, OPAQUE_BLIT); break;		\
630 	    case 4: blitter(4, Uint16, OPAQUE_BLIT); break;		\
631 	    }								\
632 	} else {							\
633 	    switch(fmt->BytesPerPixel) {				\
634 	    case 1:							\
635 		/* No 8bpp alpha blitting */				\
636 		break;							\
637 									\
638 	    case 2:							\
639 		switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) {		\
640 		case 0xffff:						\
641 		    if(fmt->Gmask == 0x07e0				\
642 		       || fmt->Rmask == 0x07e0				\
643 		       || fmt->Bmask == 0x07e0) {			\
644 			if(alpha == 128)				\
645 			    blitter(2, Uint8, ALPHA_BLIT16_565_50);	\
646 			else {						\
647 			    if(SDL_HasMMX())				\
648 				blitter(2, Uint8, ALPHA_BLIT16_565MMX);	\
649 			    else					\
650 				blitter(2, Uint8, ALPHA_BLIT16_565);	\
651 			}						\
652 		    } else						\
653 			goto general16;					\
654 		    break;						\
655 									\
656 		case 0x7fff:						\
657 		    if(fmt->Gmask == 0x03e0				\
658 		       || fmt->Rmask == 0x03e0				\
659 		       || fmt->Bmask == 0x03e0) {			\
660 			if(alpha == 128)				\
661 			    blitter(2, Uint8, ALPHA_BLIT16_555_50);	\
662 			else {						\
663 			    if(SDL_HasMMX())				\
664 				blitter(2, Uint8, ALPHA_BLIT16_555MMX);	\
665 			    else					\
666 				blitter(2, Uint8, ALPHA_BLIT16_555);	\
667 			}						\
668 			break;						\
669 		    }							\
670 		    /* fallthrough */					\
671 									\
672 		default:						\
673 		general16:						\
674 		    blitter(2, Uint8, ALPHA_BLIT_ANY);			\
675 		}							\
676 		break;							\
677 									\
678 	    case 3:							\
679 		blitter(3, Uint8, ALPHA_BLIT_ANY);			\
680 		break;							\
681 									\
682 	    case 4:							\
683 		if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff	\
684 		   && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00	\
685 		       || fmt->Bmask == 0xff00)) {			\
686 		    if(alpha == 128)					\
687 		    {							\
688 			if(SDL_HasMMX())				\
689 				blitter(4, Uint16, ALPHA_BLIT32_888_50MMX);\
690 			else						\
691 				blitter(4, Uint16, ALPHA_BLIT32_888_50);\
692 		    }							\
693 		    else						\
694 		    {							\
695 			if(SDL_HasMMX())				\
696 				blitter(4, Uint16, ALPHA_BLIT32_888MMX);\
697 			else						\
698 				blitter(4, Uint16, ALPHA_BLIT32_888);	\
699 		    }							\
700 		} else							\
701 		    blitter(4, Uint16, ALPHA_BLIT_ANY);			\
702 		break;							\
703 	    }								\
704 	}								\
705     } while(0)
706 
707 #else
708 
709 #define CHOOSE_BLIT(blitter, alpha, fmt)				\
710     do {								\
711         if(alpha == 255) {						\
712 	    switch(fmt->BytesPerPixel) {				\
713 	    case 1: blitter(1, Uint8, OPAQUE_BLIT); break;		\
714 	    case 2: blitter(2, Uint8, OPAQUE_BLIT); break;		\
715 	    case 3: blitter(3, Uint8, OPAQUE_BLIT); break;		\
716 	    case 4: blitter(4, Uint16, OPAQUE_BLIT); break;		\
717 	    }								\
718 	} else {							\
719 	    switch(fmt->BytesPerPixel) {				\
720 	    case 1:							\
721 		/* No 8bpp alpha blitting */				\
722 		break;							\
723 									\
724 	    case 2:							\
725 		switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) {		\
726 		case 0xffff:						\
727 		    if(fmt->Gmask == 0x07e0				\
728 		       || fmt->Rmask == 0x07e0				\
729 		       || fmt->Bmask == 0x07e0) {			\
730 			if(alpha == 128)				\
731 			    blitter(2, Uint8, ALPHA_BLIT16_565_50);	\
732 			else {						\
733 			    blitter(2, Uint8, ALPHA_BLIT16_565);	\
734 			}						\
735 		    } else						\
736 			goto general16;					\
737 		    break;						\
738 									\
739 		case 0x7fff:						\
740 		    if(fmt->Gmask == 0x03e0				\
741 		       || fmt->Rmask == 0x03e0				\
742 		       || fmt->Bmask == 0x03e0) {			\
743 			if(alpha == 128)				\
744 			    blitter(2, Uint8, ALPHA_BLIT16_555_50);	\
745 			else {						\
746 			    blitter(2, Uint8, ALPHA_BLIT16_555);	\
747 			}						\
748 			break;						\
749 		    }							\
750 		    /* fallthrough */					\
751 									\
752 		default:						\
753 		general16:						\
754 		    blitter(2, Uint8, ALPHA_BLIT_ANY);			\
755 		}							\
756 		break;							\
757 									\
758 	    case 3:							\
759 		blitter(3, Uint8, ALPHA_BLIT_ANY);			\
760 		break;							\
761 									\
762 	    case 4:							\
763 		if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff	\
764 		   && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00	\
765 		       || fmt->Bmask == 0xff00)) {			\
766 		    if(alpha == 128)					\
767 			blitter(4, Uint16, ALPHA_BLIT32_888_50);	\
768 		    else						\
769 			blitter(4, Uint16, ALPHA_BLIT32_888);		\
770 		} else							\
771 		    blitter(4, Uint16, ALPHA_BLIT_ANY);			\
772 		break;							\
773 	    }								\
774 	}								\
775     } while(0)
776 
777 #endif
778 
779 /*
780  * This takes care of the case when the surface is clipped on the left and/or
781  * right. Top clipping has already been taken care of.
782  */
RLEClipBlit(int w,Uint8 * srcbuf,SDL_Surface * dst,Uint8 * dstbuf,SDL_Rect * srcrect,unsigned alpha)783 static void RLEClipBlit(int w, Uint8 *srcbuf, SDL_Surface *dst,
784 			Uint8 *dstbuf, SDL_Rect *srcrect, unsigned alpha)
785 {
786     SDL_PixelFormat *fmt = dst->format;
787 
788 #define RLECLIPBLIT(bpp, Type, do_blit)					   \
789     do {								   \
790 	int linecount = srcrect->h;					   \
791 	int ofs = 0;							   \
792 	int left = srcrect->x;						   \
793 	int right = left + srcrect->w;					   \
794 	dstbuf -= left * bpp;						   \
795 	for(;;) {							   \
796 	    int run;							   \
797 	    ofs += *(Type *)srcbuf;					   \
798 	    run = ((Type *)srcbuf)[1];					   \
799 	    srcbuf += 2 * sizeof(Type);					   \
800 	    if(run) {							   \
801 		/* clip to left and right borders */			   \
802 		if(ofs < right) {					   \
803 		    int start = 0;					   \
804 		    int len = run;					   \
805 		    int startcol;					   \
806 		    if(left - ofs > 0) {				   \
807 			start = left - ofs;				   \
808 			len -= start;					   \
809 			if(len <= 0)					   \
810 			    goto nocopy ## bpp ## do_blit;		   \
811 		    }							   \
812 		    startcol = ofs + start;				   \
813 		    if(len > right - startcol)				   \
814 			len = right - startcol;				   \
815 		    do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \
816 			    len, bpp, alpha);				   \
817 		}							   \
818 	    nocopy ## bpp ## do_blit:					   \
819 		srcbuf += run * bpp;					   \
820 		ofs += run;						   \
821 	    } else if(!ofs)						   \
822 		break;							   \
823 	    if(ofs == w) {						   \
824 		ofs = 0;						   \
825 		dstbuf += dst->pitch;					   \
826 		if(!--linecount)					   \
827 		    break;						   \
828 	    }								   \
829 	}								   \
830     } while(0)
831 
832     CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt);
833 
834 #undef RLECLIPBLIT
835 
836 }
837 
838 
839 /* blit a colorkeyed RLE surface */
SDL_RLEBlit(SDL_Surface * src,SDL_Rect * srcrect,SDL_Surface * dst,SDL_Rect * dstrect)840 int SDL_RLEBlit(SDL_Surface *src, SDL_Rect *srcrect,
841 		SDL_Surface *dst, SDL_Rect *dstrect)
842 {
843 	Uint8 *dstbuf;
844 	Uint8 *srcbuf;
845 	int x, y;
846 	int w = src->w;
847 	unsigned alpha;
848 
849 	/* Lock the destination if necessary */
850 	if ( SDL_MUSTLOCK(dst) ) {
851 		if ( SDL_LockSurface(dst) < 0 ) {
852 			return(-1);
853 		}
854 	}
855 
856 	/* Set up the source and destination pointers */
857 	x = dstrect->x;
858 	y = dstrect->y;
859 	dstbuf = (Uint8 *)dst->pixels
860 	         + y * dst->pitch + x * src->format->BytesPerPixel;
861 	srcbuf = (Uint8 *)src->map->sw_data->aux_data;
862 
863 	{
864 	    /* skip lines at the top if neccessary */
865 	    int vskip = srcrect->y;
866 	    int ofs = 0;
867 	    if(vskip) {
868 
869 #define RLESKIP(bpp, Type)			\
870 		for(;;) {			\
871 		    int run;			\
872 		    ofs += *(Type *)srcbuf;	\
873 		    run = ((Type *)srcbuf)[1];	\
874 		    srcbuf += sizeof(Type) * 2;	\
875 		    if(run) {			\
876 			srcbuf += run * bpp;	\
877 			ofs += run;		\
878 		    } else if(!ofs)		\
879 			goto done;		\
880 		    if(ofs == w) {		\
881 			ofs = 0;		\
882 			if(!--vskip)		\
883 			    break;		\
884 		    }				\
885 		}
886 
887 		switch(src->format->BytesPerPixel) {
888 		case 1: RLESKIP(1, Uint8); break;
889 		case 2: RLESKIP(2, Uint8); break;
890 		case 3: RLESKIP(3, Uint8); break;
891 		case 4: RLESKIP(4, Uint16); break;
892 		}
893 
894 #undef RLESKIP
895 
896 	    }
897 	}
898 
899 	alpha = (src->flags & SDL_SRCALPHA) == SDL_SRCALPHA
900 	        ? src->format->alpha : 255;
901 	/* if left or right edge clipping needed, call clip blit */
902 	if ( srcrect->x || srcrect->w != src->w ) {
903 	    RLEClipBlit(w, srcbuf, dst, dstbuf, srcrect, alpha);
904 	} else {
905 	    SDL_PixelFormat *fmt = src->format;
906 
907 #define RLEBLIT(bpp, Type, do_blit)					      \
908 	    do {							      \
909 		int linecount = srcrect->h;				      \
910 		int ofs = 0;						      \
911 		for(;;) {						      \
912 		    unsigned run;					      \
913 		    ofs += *(Type *)srcbuf;				      \
914 		    run = ((Type *)srcbuf)[1];				      \
915 		    srcbuf += 2 * sizeof(Type);				      \
916 		    if(run) {						      \
917 			do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \
918 			srcbuf += run * bpp;				      \
919 			ofs += run;					      \
920 		    } else if(!ofs)					      \
921 			break;						      \
922 		    if(ofs == w) {					      \
923 			ofs = 0;					      \
924 			dstbuf += dst->pitch;				      \
925 			if(!--linecount)				      \
926 			    break;					      \
927 		    }							      \
928 		}							      \
929 	    } while(0)
930 
931 	    CHOOSE_BLIT(RLEBLIT, alpha, fmt);
932 
933 #undef RLEBLIT
934 	}
935 
936 done:
937 	/* Unlock the destination if necessary */
938 	if ( SDL_MUSTLOCK(dst) ) {
939 		SDL_UnlockSurface(dst);
940 	}
941 	return(0);
942 }
943 
944 #undef OPAQUE_BLIT
945 
946 /*
947  * Per-pixel blitting macros for translucent pixels:
948  * These use the same techniques as the per-surface blitting macros
949  */
950 
951 /*
952  * For 32bpp pixels, we have made sure the alpha is stored in the top
953  * 8 bits, so proceed as usual
954  */
955 #define BLIT_TRANSL_888(src, dst)				\
956     do {							\
957         Uint32 s = src;						\
958 	Uint32 d = dst;						\
959 	unsigned alpha = s >> 24;				\
960 	Uint32 s1 = s & 0xff00ff;				\
961 	Uint32 d1 = d & 0xff00ff;				\
962 	d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;	\
963 	s &= 0xff00;						\
964 	d &= 0xff00;						\
965 	d = (d + ((s - d) * alpha >> 8)) & 0xff00;		\
966 	dst = d1 | d;						\
967     } while(0)
968 
969 /*
970  * For 16bpp pixels, we have stored the 5 most significant alpha bits in
971  * bits 5-10. As before, we can process all 3 RGB components at the same time.
972  */
973 #define BLIT_TRANSL_565(src, dst)		\
974     do {					\
975 	Uint32 s = src;				\
976 	Uint32 d = dst;				\
977 	unsigned alpha = (s & 0x3e0) >> 5;	\
978 	s &= 0x07e0f81f;			\
979 	d = (d | d << 16) & 0x07e0f81f;		\
980 	d += (s - d) * alpha >> 5;		\
981 	d &= 0x07e0f81f;			\
982 	dst = (Uint16)(d | d >> 16);			\
983     } while(0)
984 
985 #define BLIT_TRANSL_555(src, dst)		\
986     do {					\
987 	Uint32 s = src;				\
988 	Uint32 d = dst;				\
989 	unsigned alpha = (s & 0x3e0) >> 5;	\
990 	s &= 0x03e07c1f;			\
991 	d = (d | d << 16) & 0x03e07c1f;		\
992 	d += (s - d) * alpha >> 5;		\
993 	d &= 0x03e07c1f;			\
994 	dst = (Uint16)(d | d >> 16);			\
995     } while(0)
996 
997 /* used to save the destination format in the encoding. Designed to be
998    macro-compatible with SDL_PixelFormat but without the unneeded fields */
999 typedef struct {
1000 	Uint8  BytesPerPixel;
1001 	Uint8  Rloss;
1002 	Uint8  Gloss;
1003 	Uint8  Bloss;
1004 	Uint8  Rshift;
1005 	Uint8  Gshift;
1006 	Uint8  Bshift;
1007 	Uint8  Ashift;
1008 	Uint32 Rmask;
1009 	Uint32 Gmask;
1010 	Uint32 Bmask;
1011 	Uint32 Amask;
1012 } RLEDestFormat;
1013 
1014 /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */
RLEAlphaClipBlit(int w,Uint8 * srcbuf,SDL_Surface * dst,Uint8 * dstbuf,SDL_Rect * srcrect)1015 static void RLEAlphaClipBlit(int w, Uint8 *srcbuf, SDL_Surface *dst,
1016 			     Uint8 *dstbuf, SDL_Rect *srcrect)
1017 {
1018     SDL_PixelFormat *df = dst->format;
1019     /*
1020      * clipped blitter: Ptype is the destination pixel type,
1021      * Ctype the translucent count type, and do_blend the macro
1022      * to blend one pixel.
1023      */
1024 #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend)			  \
1025     do {								  \
1026 	int linecount = srcrect->h;					  \
1027 	int left = srcrect->x;						  \
1028 	int right = left + srcrect->w;					  \
1029 	dstbuf -= left * sizeof(Ptype);					  \
1030 	do {								  \
1031 	    int ofs = 0;						  \
1032 	    /* blit opaque pixels on one line */			  \
1033 	    do {							  \
1034 		unsigned run;						  \
1035 		ofs += ((Ctype *)srcbuf)[0];				  \
1036 		run = ((Ctype *)srcbuf)[1];				  \
1037 		srcbuf += 2 * sizeof(Ctype);				  \
1038 		if(run) {						  \
1039 		    /* clip to left and right borders */		  \
1040 		    int cofs = ofs;					  \
1041 		    int crun = run;					  \
1042 		    if(left - cofs > 0) {				  \
1043 			crun -= left - cofs;				  \
1044 			cofs = left;					  \
1045 		    }							  \
1046 		    if(crun > right - cofs)				  \
1047 			crun = right - cofs;				  \
1048 		    if(crun > 0)					  \
1049 			PIXEL_COPY(dstbuf + cofs * sizeof(Ptype),	  \
1050 				   srcbuf + (cofs - ofs) * sizeof(Ptype), \
1051 				   (unsigned)crun, sizeof(Ptype));	  \
1052 		    srcbuf += run * sizeof(Ptype);			  \
1053 		    ofs += run;						  \
1054 		} else if(!ofs)						  \
1055 		    return;						  \
1056 	    } while(ofs < w);						  \
1057 	    /* skip padding if necessary */				  \
1058 	    if(sizeof(Ptype) == 2)					  \
1059 		srcbuf += (uintptr_t)srcbuf & 2;			  \
1060 	    /* blit translucent pixels on the same line */		  \
1061 	    ofs = 0;							  \
1062 	    do {							  \
1063 		unsigned run;						  \
1064 		ofs += ((Uint16 *)srcbuf)[0];				  \
1065 		run = ((Uint16 *)srcbuf)[1];				  \
1066 		srcbuf += 4;						  \
1067 		if(run) {						  \
1068 		    /* clip to left and right borders */		  \
1069 		    int cofs = ofs;					  \
1070 		    int crun = run;					  \
1071 		    if(left - cofs > 0) {				  \
1072 			crun -= left - cofs;				  \
1073 			cofs = left;					  \
1074 		    }							  \
1075 		    if(crun > right - cofs)				  \
1076 			crun = right - cofs;				  \
1077 		    if(crun > 0) {					  \
1078 			Ptype *dst = (Ptype *)dstbuf + cofs;		  \
1079 			Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs);	  \
1080 			int i;						  \
1081 			for(i = 0; i < crun; i++)			  \
1082 			    do_blend(src[i], dst[i]);			  \
1083 		    }							  \
1084 		    srcbuf += run * 4;					  \
1085 		    ofs += run;						  \
1086 		}							  \
1087 	    } while(ofs < w);						  \
1088 	    dstbuf += dst->pitch;					  \
1089 	} while(--linecount);						  \
1090     } while(0)
1091 
1092     switch(df->BytesPerPixel) {
1093     case 2:
1094 	if(df->Gmask == 0x07e0 || df->Rmask == 0x07e0
1095 	   || df->Bmask == 0x07e0)
1096 	    RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565);
1097 	else
1098 	    RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555);
1099 	break;
1100     case 4:
1101 	RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888);
1102 	break;
1103     }
1104 }
1105 
1106 /* blit a pixel-alpha RLE surface */
SDL_RLEAlphaBlit(SDL_Surface * src,SDL_Rect * srcrect,SDL_Surface * dst,SDL_Rect * dstrect)1107 int SDL_RLEAlphaBlit(SDL_Surface *src, SDL_Rect *srcrect,
1108 		     SDL_Surface *dst, SDL_Rect *dstrect)
1109 {
1110     int x, y;
1111     int w = src->w;
1112     Uint8 *srcbuf, *dstbuf;
1113     SDL_PixelFormat *df = dst->format;
1114 
1115     /* Lock the destination if necessary */
1116     if ( SDL_MUSTLOCK(dst) ) {
1117 	if ( SDL_LockSurface(dst) < 0 ) {
1118 	    return -1;
1119 	}
1120     }
1121 
1122     x = dstrect->x;
1123     y = dstrect->y;
1124     dstbuf = (Uint8 *)dst->pixels
1125 	     + y * dst->pitch + x * df->BytesPerPixel;
1126     srcbuf = (Uint8 *)src->map->sw_data->aux_data + sizeof(RLEDestFormat);
1127 
1128     {
1129 	/* skip lines at the top if necessary */
1130 	int vskip = srcrect->y;
1131 	if(vskip) {
1132 	    int ofs;
1133 	    if(df->BytesPerPixel == 2) {
1134 		/* the 16/32 interleaved format */
1135 		do {
1136 		    /* skip opaque line */
1137 		    ofs = 0;
1138 		    do {
1139 			int run;
1140 			ofs += srcbuf[0];
1141 			run = srcbuf[1];
1142 			srcbuf += 2;
1143 			if(run) {
1144 			    srcbuf += 2 * run;
1145 			    ofs += run;
1146 			} else if(!ofs)
1147 			    goto done;
1148 		    } while(ofs < w);
1149 
1150 		    /* skip padding */
1151 		    srcbuf += (uintptr_t)srcbuf & 2;
1152 
1153 		    /* skip translucent line */
1154 		    ofs = 0;
1155 		    do {
1156 			int run;
1157 			ofs += ((Uint16 *)srcbuf)[0];
1158 			run = ((Uint16 *)srcbuf)[1];
1159 			srcbuf += 4 * (run + 1);
1160 			ofs += run;
1161 		    } while(ofs < w);
1162 		} while(--vskip);
1163 	    } else {
1164 		/* the 32/32 interleaved format */
1165 		vskip <<= 1;	/* opaque and translucent have same format */
1166 		do {
1167 		    ofs = 0;
1168 		    do {
1169 			int run;
1170 			ofs += ((Uint16 *)srcbuf)[0];
1171 			run = ((Uint16 *)srcbuf)[1];
1172 			srcbuf += 4;
1173 			if(run) {
1174 			    srcbuf += 4 * run;
1175 			    ofs += run;
1176 			} else if(!ofs)
1177 			    goto done;
1178 		    } while(ofs < w);
1179 		} while(--vskip);
1180 	    }
1181 	}
1182     }
1183 
1184     /* if left or right edge clipping needed, call clip blit */
1185     if(srcrect->x || srcrect->w != src->w) {
1186 	RLEAlphaClipBlit(w, srcbuf, dst, dstbuf, srcrect);
1187     } else {
1188 
1189 	/*
1190 	 * non-clipped blitter. Ptype is the destination pixel type,
1191 	 * Ctype the translucent count type, and do_blend the
1192 	 * macro to blend one pixel.
1193 	 */
1194 #define RLEALPHABLIT(Ptype, Ctype, do_blend)				 \
1195 	do {								 \
1196 	    int linecount = srcrect->h;					 \
1197 	    do {							 \
1198 		int ofs = 0;						 \
1199 		/* blit opaque pixels on one line */			 \
1200 		do {							 \
1201 		    unsigned run;					 \
1202 		    ofs += ((Ctype *)srcbuf)[0];			 \
1203 		    run = ((Ctype *)srcbuf)[1];				 \
1204 		    srcbuf += 2 * sizeof(Ctype);			 \
1205 		    if(run) {						 \
1206 			PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \
1207 				   run, sizeof(Ptype));			 \
1208 			srcbuf += run * sizeof(Ptype);			 \
1209 			ofs += run;					 \
1210 		    } else if(!ofs)					 \
1211 			goto done;					 \
1212 		} while(ofs < w);					 \
1213 		/* skip padding if necessary */				 \
1214 		if(sizeof(Ptype) == 2)					 \
1215 		    srcbuf += (uintptr_t)srcbuf & 2;		 	 \
1216 		/* blit translucent pixels on the same line */		 \
1217 		ofs = 0;						 \
1218 		do {							 \
1219 		    unsigned run;					 \
1220 		    ofs += ((Uint16 *)srcbuf)[0];			 \
1221 		    run = ((Uint16 *)srcbuf)[1];			 \
1222 		    srcbuf += 4;					 \
1223 		    if(run) {						 \
1224 			Ptype *dst = (Ptype *)dstbuf + ofs;		 \
1225 			unsigned i;					 \
1226 			for(i = 0; i < run; i++) {			 \
1227 			    Uint32 src = *(Uint32 *)srcbuf;		 \
1228 			    do_blend(src, *dst);			 \
1229 			    srcbuf += 4;				 \
1230 			    dst++;					 \
1231 			}						 \
1232 			ofs += run;					 \
1233 		    }							 \
1234 		} while(ofs < w);					 \
1235 		dstbuf += dst->pitch;					 \
1236 	    } while(--linecount);					 \
1237 	} while(0)
1238 
1239 	switch(df->BytesPerPixel) {
1240 	case 2:
1241 	    if(df->Gmask == 0x07e0 || df->Rmask == 0x07e0
1242 	       || df->Bmask == 0x07e0)
1243 		RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565);
1244 	    else
1245 		RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555);
1246 	    break;
1247 	case 4:
1248 	    RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888);
1249 	    break;
1250 	}
1251     }
1252 
1253  done:
1254     /* Unlock the destination if necessary */
1255     if ( SDL_MUSTLOCK(dst) ) {
1256 	SDL_UnlockSurface(dst);
1257     }
1258     return 0;
1259 }
1260 
1261 /*
1262  * Auxiliary functions:
1263  * The encoding functions take 32bpp rgb + a, and
1264  * return the number of bytes copied to the destination.
1265  * The decoding functions copy to 32bpp rgb + a, and
1266  * return the number of bytes copied from the source.
1267  * These are only used in the encoder and un-RLE code and are therefore not
1268  * highly optimised.
1269  */
1270 
1271 /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */
copy_opaque_16(void * dst,Uint32 * src,int n,SDL_PixelFormat * sfmt,SDL_PixelFormat * dfmt)1272 static int copy_opaque_16(void *dst, Uint32 *src, int n,
1273 			  SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt)
1274 {
1275     int i;
1276     Uint16 *d = dst;
1277     for(i = 0; i < n; i++) {
1278 	unsigned r, g, b;
1279 	RGB_FROM_PIXEL(*src, sfmt, r, g, b);
1280 	PIXEL_FROM_RGB(*d, dfmt, r, g, b);
1281 	src++;
1282 	d++;
1283     }
1284     return n * 2;
1285 }
1286 
1287 /* decode opaque pixels from 16bpp to 32bpp rgb + a */
uncopy_opaque_16(Uint32 * dst,void * src,int n,RLEDestFormat * sfmt,SDL_PixelFormat * dfmt)1288 static int uncopy_opaque_16(Uint32 *dst, void *src, int n,
1289 			    RLEDestFormat *sfmt, SDL_PixelFormat *dfmt)
1290 {
1291     int i;
1292     Uint16 *s = src;
1293     unsigned alpha = dfmt->Amask ? 255 : 0;
1294     for(i = 0; i < n; i++) {
1295 	unsigned r, g, b;
1296 	RGB_FROM_PIXEL(*s, sfmt, r, g, b);
1297 	PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha);
1298 	s++;
1299 	dst++;
1300     }
1301     return n * 2;
1302 }
1303 
1304 
1305 
1306 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */
copy_transl_565(void * dst,Uint32 * src,int n,SDL_PixelFormat * sfmt,SDL_PixelFormat * dfmt)1307 static int copy_transl_565(void *dst, Uint32 *src, int n,
1308 			   SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt)
1309 {
1310     int i;
1311     Uint32 *d = dst;
1312     for(i = 0; i < n; i++) {
1313 	unsigned r, g, b, a;
1314 	Uint16 pix;
1315 	RGBA_FROM_8888(*src, sfmt, r, g, b, a);
1316 	PIXEL_FROM_RGB(pix, dfmt, r, g, b);
1317 	*d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0);
1318 	src++;
1319 	d++;
1320     }
1321     return n * 4;
1322 }
1323 
1324 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */
copy_transl_555(void * dst,Uint32 * src,int n,SDL_PixelFormat * sfmt,SDL_PixelFormat * dfmt)1325 static int copy_transl_555(void *dst, Uint32 *src, int n,
1326 			   SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt)
1327 {
1328     int i;
1329     Uint32 *d = dst;
1330     for(i = 0; i < n; i++) {
1331 	unsigned r, g, b, a;
1332 	Uint16 pix;
1333 	RGBA_FROM_8888(*src, sfmt, r, g, b, a);
1334 	PIXEL_FROM_RGB(pix, dfmt, r, g, b);
1335 	*d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0);
1336 	src++;
1337 	d++;
1338     }
1339     return n * 4;
1340 }
1341 
1342 /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */
uncopy_transl_16(Uint32 * dst,void * src,int n,RLEDestFormat * sfmt,SDL_PixelFormat * dfmt)1343 static int uncopy_transl_16(Uint32 *dst, void *src, int n,
1344 			    RLEDestFormat *sfmt, SDL_PixelFormat *dfmt)
1345 {
1346     int i;
1347     Uint32 *s = src;
1348     for(i = 0; i < n; i++) {
1349 	unsigned r, g, b, a;
1350 	Uint32 pix = *s++;
1351 	a = (pix & 0x3e0) >> 2;
1352 	pix = (pix & ~0x3e0) | pix >> 16;
1353 	RGB_FROM_PIXEL(pix, sfmt, r, g, b);
1354 	PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
1355 	dst++;
1356     }
1357     return n * 4;
1358 }
1359 
1360 /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
copy_32(void * dst,Uint32 * src,int n,SDL_PixelFormat * sfmt,SDL_PixelFormat * dfmt)1361 static int copy_32(void *dst, Uint32 *src, int n,
1362 		   SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt)
1363 {
1364     int i;
1365     Uint32 *d = dst;
1366     for(i = 0; i < n; i++) {
1367 	unsigned r, g, b, a;
1368 	Uint32 pixel;
1369 	RGBA_FROM_8888(*src, sfmt, r, g, b, a);
1370 	PIXEL_FROM_RGB(pixel, dfmt, r, g, b);
1371 	*d++ = pixel | a << 24;
1372 	src++;
1373     }
1374     return n * 4;
1375 }
1376 
1377 /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */
uncopy_32(Uint32 * dst,void * src,int n,RLEDestFormat * sfmt,SDL_PixelFormat * dfmt)1378 static int uncopy_32(Uint32 *dst, void *src, int n,
1379 		     RLEDestFormat *sfmt, SDL_PixelFormat *dfmt)
1380 {
1381     int i;
1382     Uint32 *s = src;
1383     for(i = 0; i < n; i++) {
1384 	unsigned r, g, b, a;
1385 	Uint32 pixel = *s++;
1386 	RGB_FROM_PIXEL(pixel, sfmt, r, g, b);
1387 	a = pixel >> 24;
1388 	PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a);
1389 	dst++;
1390     }
1391     return n * 4;
1392 }
1393 
1394 #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255)
1395 
1396 #define ISTRANSL(pixel, fmt)	\
1397     ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U)
1398 
1399 /* convert surface to be quickly alpha-blittable onto dest, if possible */
RLEAlphaSurface(SDL_Surface * surface)1400 static int RLEAlphaSurface(SDL_Surface *surface)
1401 {
1402     SDL_Surface *dest;
1403     SDL_PixelFormat *df;
1404     int maxsize = 0;
1405     int max_opaque_run;
1406     int max_transl_run = 65535;
1407     unsigned masksum;
1408     Uint8 *rlebuf, *dst;
1409     int (*copy_opaque)(void *, Uint32 *, int,
1410 		       SDL_PixelFormat *, SDL_PixelFormat *);
1411     int (*copy_transl)(void *, Uint32 *, int,
1412 		       SDL_PixelFormat *, SDL_PixelFormat *);
1413 
1414     dest = surface->map->dst;
1415     if(!dest)
1416 	return -1;
1417     df = dest->format;
1418     if(surface->format->BitsPerPixel != 32)
1419 	return -1;		/* only 32bpp source supported */
1420 
1421     /* find out whether the destination is one we support,
1422        and determine the max size of the encoded result */
1423     masksum = df->Rmask | df->Gmask | df->Bmask;
1424     switch(df->BytesPerPixel) {
1425     case 2:
1426 	/* 16bpp: only support 565 and 555 formats */
1427 	switch(masksum) {
1428 	case 0xffff:
1429 	    if(df->Gmask == 0x07e0
1430 	       || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) {
1431 		copy_opaque = copy_opaque_16;
1432 		copy_transl = copy_transl_565;
1433 	    } else
1434 		return -1;
1435 	    break;
1436 	case 0x7fff:
1437 	    if(df->Gmask == 0x03e0
1438 	       || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) {
1439 		copy_opaque = copy_opaque_16;
1440 		copy_transl = copy_transl_555;
1441 	    } else
1442 		return -1;
1443 	    break;
1444 	default:
1445 	    return -1;
1446 	}
1447 	max_opaque_run = 255;	/* runs stored as bytes */
1448 
1449 	/* worst case is alternating opaque and translucent pixels,
1450 	   with room for alignment padding between lines */
1451 	maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2;
1452 	break;
1453     case 4:
1454 	if(masksum != 0x00ffffff)
1455 	    return -1;		/* requires unused high byte */
1456 	copy_opaque = copy_32;
1457 	copy_transl = copy_32;
1458 	max_opaque_run = 255;	/* runs stored as short ints */
1459 
1460 	/* worst case is alternating opaque and translucent pixels */
1461 	maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4;
1462 	break;
1463     default:
1464 	return -1;		/* anything else unsupported right now */
1465     }
1466 
1467     maxsize += sizeof(RLEDestFormat);
1468     rlebuf = (Uint8 *)SDL_malloc(maxsize);
1469     if(!rlebuf) {
1470 	SDL_OutOfMemory();
1471 	return -1;
1472     }
1473     {
1474 	/* save the destination format so we can undo the encoding later */
1475 	RLEDestFormat *r = (RLEDestFormat *)rlebuf;
1476 	r->BytesPerPixel = df->BytesPerPixel;
1477 	r->Rloss = df->Rloss;
1478 	r->Gloss = df->Gloss;
1479 	r->Bloss = df->Bloss;
1480 	r->Rshift = df->Rshift;
1481 	r->Gshift = df->Gshift;
1482 	r->Bshift = df->Bshift;
1483 	r->Ashift = df->Ashift;
1484 	r->Rmask = df->Rmask;
1485 	r->Gmask = df->Gmask;
1486 	r->Bmask = df->Bmask;
1487 	r->Amask = df->Amask;
1488     }
1489     dst = rlebuf + sizeof(RLEDestFormat);
1490 
1491     /* Do the actual encoding */
1492     {
1493 	int x, y;
1494 	int h = surface->h, w = surface->w;
1495 	SDL_PixelFormat *sf = surface->format;
1496 	Uint32 *src = (Uint32 *)surface->pixels;
1497 	Uint8 *lastline = dst;	/* end of last non-blank line */
1498 
1499 	/* opaque counts are 8 or 16 bits, depending on target depth */
1500 #define ADD_OPAQUE_COUNTS(n, m)			\
1501 	if(df->BytesPerPixel == 4) {		\
1502 	    ((Uint16 *)dst)[0] = n;		\
1503 	    ((Uint16 *)dst)[1] = m;		\
1504 	    dst += 4;				\
1505 	} else {				\
1506 	    dst[0] = n;				\
1507 	    dst[1] = m;				\
1508 	    dst += 2;				\
1509 	}
1510 
1511 	/* translucent counts are always 16 bit */
1512 #define ADD_TRANSL_COUNTS(n, m)		\
1513 	(((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4)
1514 
1515 	for(y = 0; y < h; y++) {
1516 	    int runstart, skipstart;
1517 	    int blankline = 0;
1518 	    /* First encode all opaque pixels of a scan line */
1519 	    x = 0;
1520 	    do {
1521 		int run, skip, len;
1522 		skipstart = x;
1523 		while(x < w && !ISOPAQUE(src[x], sf))
1524 		    x++;
1525 		runstart = x;
1526 		while(x < w && ISOPAQUE(src[x], sf))
1527 		    x++;
1528 		skip = runstart - skipstart;
1529 		if(skip == w)
1530 		    blankline = 1;
1531 		run = x - runstart;
1532 		while(skip > max_opaque_run) {
1533 		    ADD_OPAQUE_COUNTS(max_opaque_run, 0);
1534 		    skip -= max_opaque_run;
1535 		}
1536 		len = MIN(run, max_opaque_run);
1537 		ADD_OPAQUE_COUNTS(skip, len);
1538 		dst += copy_opaque(dst, src + runstart, len, sf, df);
1539 		runstart += len;
1540 		run -= len;
1541 		while(run) {
1542 		    len = MIN(run, max_opaque_run);
1543 		    ADD_OPAQUE_COUNTS(0, len);
1544 		    dst += copy_opaque(dst, src + runstart, len, sf, df);
1545 		    runstart += len;
1546 		    run -= len;
1547 		}
1548 	    } while(x < w);
1549 
1550 	    /* Make sure the next output address is 32-bit aligned */
1551 	    dst += (uintptr_t)dst & 2;
1552 
1553 	    /* Next, encode all translucent pixels of the same scan line */
1554 	    x = 0;
1555 	    do {
1556 		int run, skip, len;
1557 		skipstart = x;
1558 		while(x < w && !ISTRANSL(src[x], sf))
1559 		    x++;
1560 		runstart = x;
1561 		while(x < w && ISTRANSL(src[x], sf))
1562 		    x++;
1563 		skip = runstart - skipstart;
1564 		blankline &= (skip == w);
1565 		run = x - runstart;
1566 		while(skip > max_transl_run) {
1567 		    ADD_TRANSL_COUNTS(max_transl_run, 0);
1568 		    skip -= max_transl_run;
1569 		}
1570 		len = MIN(run, max_transl_run);
1571 		ADD_TRANSL_COUNTS(skip, len);
1572 		dst += copy_transl(dst, src + runstart, len, sf, df);
1573 		runstart += len;
1574 		run -= len;
1575 		while(run) {
1576 		    len = MIN(run, max_transl_run);
1577 		    ADD_TRANSL_COUNTS(0, len);
1578 		    dst += copy_transl(dst, src + runstart, len, sf, df);
1579 		    runstart += len;
1580 		    run -= len;
1581 		}
1582 		if(!blankline)
1583 		    lastline = dst;
1584 	    } while(x < w);
1585 
1586 	    src += surface->pitch >> 2;
1587 	}
1588 	dst = lastline;		/* back up past trailing blank lines */
1589 	ADD_OPAQUE_COUNTS(0, 0);
1590     }
1591 
1592 #undef ADD_OPAQUE_COUNTS
1593 #undef ADD_TRANSL_COUNTS
1594 
1595     /* Now that we have it encoded, release the original pixels */
1596     if((surface->flags & SDL_PREALLOC) != SDL_PREALLOC
1597        && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) {
1598 	SDL_free( surface->pixels );
1599 	surface->pixels = NULL;
1600     }
1601 
1602     /* realloc the buffer to release unused memory */
1603     {
1604 	Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
1605 	if(!p)
1606 	    p = rlebuf;
1607 	surface->map->sw_data->aux_data = p;
1608     }
1609 
1610     return 0;
1611 }
1612 
getpix_8(Uint8 * srcbuf)1613 static Uint32 getpix_8(Uint8 *srcbuf)
1614 {
1615     return *srcbuf;
1616 }
1617 
getpix_16(Uint8 * srcbuf)1618 static Uint32 getpix_16(Uint8 *srcbuf)
1619 {
1620     return *(Uint16 *)srcbuf;
1621 }
1622 
getpix_24(Uint8 * srcbuf)1623 static Uint32 getpix_24(Uint8 *srcbuf)
1624 {
1625 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
1626     return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16);
1627 #else
1628     return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2];
1629 #endif
1630 }
1631 
getpix_32(Uint8 * srcbuf)1632 static Uint32 getpix_32(Uint8 *srcbuf)
1633 {
1634     return *(Uint32 *)srcbuf;
1635 }
1636 
1637 typedef Uint32 (*getpix_func)(Uint8 *);
1638 
1639 static getpix_func getpixes[4] = {
1640     getpix_8, getpix_16, getpix_24, getpix_32
1641 };
1642 
RLEColorkeySurface(SDL_Surface * surface)1643 static int RLEColorkeySurface(SDL_Surface *surface)
1644 {
1645         Uint8 *rlebuf, *dst;
1646 	int maxn;
1647 	int y;
1648 	Uint8 *srcbuf, *lastline;
1649 	int maxsize = 0;
1650 	int bpp = surface->format->BytesPerPixel;
1651 	getpix_func getpix;
1652 	Uint32 ckey, rgbmask;
1653 	int w, h;
1654 
1655 	/* calculate the worst case size for the compressed surface */
1656 	switch(bpp) {
1657 	case 1:
1658 	    /* worst case is alternating opaque and transparent pixels,
1659 	       starting with an opaque pixel */
1660 	    maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2;
1661 	    break;
1662 	case 2:
1663 	case 3:
1664 	    /* worst case is solid runs, at most 255 pixels wide */
1665 	    maxsize = surface->h * (2 * (surface->w / 255 + 1)
1666 				    + surface->w * bpp) + 2;
1667 	    break;
1668 	case 4:
1669 	    /* worst case is solid runs, at most 65535 pixels wide */
1670 	    maxsize = surface->h * (4 * (surface->w / 65535 + 1)
1671 				    + surface->w * 4) + 4;
1672 	    break;
1673 	}
1674 
1675 	rlebuf = (Uint8 *)SDL_malloc(maxsize);
1676 	if ( rlebuf == NULL ) {
1677 		SDL_OutOfMemory();
1678 		return(-1);
1679 	}
1680 
1681 	/* Set up the conversion */
1682 	srcbuf = (Uint8 *)surface->pixels;
1683 	maxn = bpp == 4 ? 65535 : 255;
1684 	dst = rlebuf;
1685 	rgbmask = ~surface->format->Amask;
1686 	ckey = surface->format->colorkey & rgbmask;
1687 	lastline = dst;
1688 	getpix = getpixes[bpp - 1];
1689 	w = surface->w;
1690 	h = surface->h;
1691 
1692 #define ADD_COUNTS(n, m)			\
1693 	if(bpp == 4) {				\
1694 	    ((Uint16 *)dst)[0] = n;		\
1695 	    ((Uint16 *)dst)[1] = m;		\
1696 	    dst += 4;				\
1697 	} else {				\
1698 	    dst[0] = n;				\
1699 	    dst[1] = m;				\
1700 	    dst += 2;				\
1701 	}
1702 
1703 	for(y = 0; y < h; y++) {
1704 	    int x = 0;
1705 	    int blankline = 0;
1706 	    do {
1707 		int run, skip, len;
1708 		int runstart;
1709 		int skipstart = x;
1710 
1711 		/* find run of transparent, then opaque pixels */
1712 		while(x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey)
1713 		    x++;
1714 		runstart = x;
1715 		while(x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey)
1716 		    x++;
1717 		skip = runstart - skipstart;
1718 		if(skip == w)
1719 		    blankline = 1;
1720 		run = x - runstart;
1721 
1722 		/* encode segment */
1723 		while(skip > maxn) {
1724 		    ADD_COUNTS(maxn, 0);
1725 		    skip -= maxn;
1726 		}
1727 		len = MIN(run, maxn);
1728 		ADD_COUNTS(skip, len);
1729 		SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
1730 		dst += len * bpp;
1731 		run -= len;
1732 		runstart += len;
1733 		while(run) {
1734 		    len = MIN(run, maxn);
1735 		    ADD_COUNTS(0, len);
1736 		    SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp);
1737 		    dst += len * bpp;
1738 		    runstart += len;
1739 		    run -= len;
1740 		}
1741 		if(!blankline)
1742 		    lastline = dst;
1743 	    } while(x < w);
1744 
1745 	    srcbuf += surface->pitch;
1746 	}
1747 	dst = lastline;		/* back up bast trailing blank lines */
1748 	ADD_COUNTS(0, 0);
1749 
1750 #undef ADD_COUNTS
1751 
1752 	/* Now that we have it encoded, release the original pixels */
1753 	if((surface->flags & SDL_PREALLOC) != SDL_PREALLOC
1754 	   && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) {
1755 	    SDL_free( surface->pixels );
1756 	    surface->pixels = NULL;
1757 	}
1758 
1759 	/* realloc the buffer to release unused memory */
1760 	{
1761 	    /* If realloc returns NULL, the original block is left intact */
1762 	    Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf);
1763 	    if(!p)
1764 		p = rlebuf;
1765 	    surface->map->sw_data->aux_data = p;
1766 	}
1767 
1768 	return(0);
1769 }
1770 
SDL_RLESurface(SDL_Surface * surface)1771 int SDL_RLESurface(SDL_Surface *surface)
1772 {
1773 	int retcode;
1774 
1775 	/* Clear any previous RLE conversion */
1776 	if ( (surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL ) {
1777 		SDL_UnRLESurface(surface, 1);
1778 	}
1779 
1780 	/* We don't support RLE encoding of bitmaps */
1781 	if ( surface->format->BitsPerPixel < 8 ) {
1782 		return(-1);
1783 	}
1784 
1785 	/* Lock the surface if it's in hardware */
1786 	if ( SDL_MUSTLOCK(surface) ) {
1787 		if ( SDL_LockSurface(surface) < 0 ) {
1788 			return(-1);
1789 		}
1790 	}
1791 
1792 	/* Encode */
1793 	if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) {
1794 	    retcode = RLEColorkeySurface(surface);
1795 	} else {
1796 	    if((surface->flags & SDL_SRCALPHA) == SDL_SRCALPHA
1797 	       && surface->format->Amask != 0)
1798 		retcode = RLEAlphaSurface(surface);
1799 	    else
1800 		retcode = -1;	/* no RLE for per-surface alpha sans ckey */
1801 	}
1802 
1803 	/* Unlock the surface if it's in hardware */
1804 	if ( SDL_MUSTLOCK(surface) ) {
1805 		SDL_UnlockSurface(surface);
1806 	}
1807 
1808 	if(retcode < 0)
1809 	    return -1;
1810 
1811 	/* The surface is now accelerated */
1812 	surface->flags |= SDL_RLEACCEL;
1813 
1814 	return(0);
1815 }
1816 
1817 /*
1818  * Un-RLE a surface with pixel alpha
1819  * This may not give back exactly the image before RLE-encoding; all
1820  * completely transparent pixels will be lost, and colour and alpha depth
1821  * may have been reduced (when encoding for 16bpp targets).
1822  */
UnRLEAlpha(SDL_Surface * surface)1823 static SDL_bool UnRLEAlpha(SDL_Surface *surface)
1824 {
1825     Uint8 *srcbuf;
1826     Uint32 *dst;
1827     SDL_PixelFormat *sf = surface->format;
1828     RLEDestFormat *df = surface->map->sw_data->aux_data;
1829     int (*uncopy_opaque)(Uint32 *, void *, int,
1830 			 RLEDestFormat *, SDL_PixelFormat *);
1831     int (*uncopy_transl)(Uint32 *, void *, int,
1832 			 RLEDestFormat *, SDL_PixelFormat *);
1833     int w = surface->w;
1834     int bpp = df->BytesPerPixel;
1835 
1836     if(bpp == 2) {
1837 	uncopy_opaque = uncopy_opaque_16;
1838 	uncopy_transl = uncopy_transl_16;
1839     } else {
1840 	uncopy_opaque = uncopy_transl = uncopy_32;
1841     }
1842 
1843     surface->pixels = SDL_malloc(surface->h * surface->pitch);
1844     if ( !surface->pixels ) {
1845         return(SDL_FALSE);
1846     }
1847     /* fill background with transparent pixels */
1848     SDL_memset(surface->pixels, 0, surface->h * surface->pitch);
1849 
1850     dst = surface->pixels;
1851     srcbuf = (Uint8 *)(df + 1);
1852     for(;;) {
1853 	/* copy opaque pixels */
1854 	int ofs = 0;
1855 	do {
1856 	    unsigned run;
1857 	    if(bpp == 2) {
1858 		ofs += srcbuf[0];
1859 		run = srcbuf[1];
1860 		srcbuf += 2;
1861 	    } else {
1862 		ofs += ((Uint16 *)srcbuf)[0];
1863 		run = ((Uint16 *)srcbuf)[1];
1864 		srcbuf += 4;
1865 	    }
1866 	    if(run) {
1867 		srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf);
1868 		ofs += run;
1869 	    } else if(!ofs)
1870 		return(SDL_TRUE);
1871 	} while(ofs < w);
1872 
1873 	/* skip padding if needed */
1874 	if(bpp == 2)
1875 	    srcbuf += (uintptr_t)srcbuf & 2;
1876 
1877 	/* copy translucent pixels */
1878 	ofs = 0;
1879 	do {
1880 	    unsigned run;
1881 	    ofs += ((Uint16 *)srcbuf)[0];
1882 	    run = ((Uint16 *)srcbuf)[1];
1883 	    srcbuf += 4;
1884 	    if(run) {
1885 		srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf);
1886 		ofs += run;
1887 	    }
1888 	} while(ofs < w);
1889 	dst += surface->pitch >> 2;
1890     }
1891     /* Make the compiler happy */
1892     return(SDL_TRUE);
1893 }
1894 
SDL_UnRLESurface(SDL_Surface * surface,int recode)1895 void SDL_UnRLESurface(SDL_Surface *surface, int recode)
1896 {
1897     if ( (surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL ) {
1898 	surface->flags &= ~SDL_RLEACCEL;
1899 
1900 	if(recode && (surface->flags & SDL_PREALLOC) != SDL_PREALLOC
1901 	   && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) {
1902 	    if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) {
1903 		SDL_Rect full;
1904 		unsigned alpha_flag;
1905 
1906 		/* re-create the original surface */
1907 		surface->pixels = SDL_malloc(surface->h * surface->pitch);
1908 		if ( !surface->pixels ) {
1909 			/* Oh crap... */
1910 			surface->flags |= SDL_RLEACCEL;
1911 			return;
1912 		}
1913 
1914 		/* fill it with the background colour */
1915 		SDL_FillRect(surface, NULL, surface->format->colorkey);
1916 
1917 		/* now render the encoded surface */
1918 		full.x = full.y = 0;
1919 		full.w = surface->w;
1920 		full.h = surface->h;
1921 		alpha_flag = surface->flags & SDL_SRCALPHA;
1922 		surface->flags &= ~SDL_SRCALPHA; /* opaque blit */
1923 		SDL_RLEBlit(surface, &full, surface, &full);
1924 		surface->flags |= alpha_flag;
1925 	    } else {
1926 		if ( !UnRLEAlpha(surface) ) {
1927 		    /* Oh crap... */
1928 		    surface->flags |= SDL_RLEACCEL;
1929 		    return;
1930 		}
1931 	    }
1932 	}
1933 
1934 	if ( surface->map && surface->map->sw_data->aux_data ) {
1935 	    SDL_free(surface->map->sw_data->aux_data);
1936 	    surface->map->sw_data->aux_data = NULL;
1937 	}
1938     }
1939 }
1940 
1941 
1942