1 /* Copyright (C) 2007-2008 The Android Open Source Project
2 **
3 ** This software is licensed under the terms of the GNU General Public
4 ** License version 2, as published by the Free Software Foundation, and
5 ** may be copied, distributed, and modified under those terms.
6 **
7 ** This program is distributed in the hope that it will be useful,
8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 ** GNU General Public License for more details.
11 */
12 /* this file contains template code and may be included multiple times */
13
14 #ifndef ARGB_T_DEFINED
15 #define ARGB_T_DEFINED
16
17 #if USE_MMX
18 #include <mmintrin.h>
19
20 typedef __m64 mmx_t;
21 typedef mmx_t argb_t;
22
23 static inline mmx_t
mmx_load8888(unsigned value,mmx_t zero)24 mmx_load8888( unsigned value, mmx_t zero )
25 {
26 return _mm_unpacklo_pi8( _mm_cvtsi32_si64 (value), zero);
27 }
28
29 static inline unsigned
mmx_save8888(mmx_t argb,mmx_t zero)30 mmx_save8888( mmx_t argb, mmx_t zero )
31 {
32 return (unsigned) _mm_cvtsi64_si32( _mm_packs_pu16( argb, zero ) );
33 }
34
35 static inline mmx_t
mmx_expand16(int value)36 mmx_expand16( int value )
37 {
38 mmx_t t1 = _mm_cvtsi32_si64( value );
39 return _mm_packs_pi32( t1, t1 );
40 }
41
42 static inline mmx_t
mmx_mulshift(mmx_t argb,int multiplier,int rshift,mmx_t zero)43 mmx_mulshift( mmx_t argb, int multiplier, int rshift, mmx_t zero )
44 {
45 mmx_t ar = _mm_unpackhi_pi16(argb, zero );
46 mmx_t gb = _mm_unpacklo_pi16(argb, zero );
47 mmx_t mult = mmx_expand16(multiplier);
48
49 ar = _mm_srli_pi32( _mm_madd_pi16( ar, mult ), rshift );
50 gb = _mm_srli_pi32( _mm_madd_pi16( gb, mult ), rshift );
51
52 return _mm_packs_pi32( gb, ar );
53 }
54
55 static inline mmx_t
mmx_interp255(mmx_t m1,mmx_t m2,mmx_t zero,int alpha)56 mmx_interp255( mmx_t m1, mmx_t m2, mmx_t zero, int alpha )
57 {
58 mmx_t mult, mult2, t1, t2, r1, r2;
59
60 // m1 = [ a1 | r1 | g1 | b1 ]
61 // m2 = [ a2 | r2 | g2 | b2 ]
62 alpha = (alpha << 16) | (alpha ^ 255);
63 mult = _mm_cvtsi32_si64( alpha ); // mult = [ 0 | 0 | a | 1-a ]
64 mult2 = _mm_slli_si64( mult, 32 ); // mult2 = [ a | 1-a | 0 | 0 ]
65 mult = _mm_or_si64( mult, mult2 ); // mults = [ a | 1-a | a | 1-a ]
66
67 t1 = _mm_unpackhi_pi16( m1, m2 ); // t1 = [ a2 | a1 | r2 | r1 ]
68 r1 = _mm_madd_pi16( t1, mult ); // r1 = [ ra | rr ]
69
70 t2 = _mm_unpacklo_pi16( m1, m2 ); // t1 = [ g2 | g1 | b2 | b1 ]
71 r2 = _mm_madd_pi16( t2, mult ); // r2 = [ rg | rb ]
72
73 r1 = _mm_srli_pi32( r1, 8 );
74 r2 = _mm_srli_pi32( r2, 8 );
75
76 return _mm_packs_pi32( r2, r1 );
77 }
78
79 #define ARGB_DECL_ZERO() mmx_t _zero = _mm_setzero_si64()
80 #define ARGB_DECL(x) mmx_t x
81 #define ARGB_DECL2(x1,x2) mmx_t x1, x2
82 #define ARGB_ZERO(x) x = _zero
83 #define ARGB_UNPACK(x,v) x = mmx_load8888((v), _zero)
84 #define ARGB_PACK(x) mmx_save8888(x, _zero)
85 #define ARGB_COPY(x,y) x = y
86 #define ARGB_SUM(x1,x2,x3) x1 = _mm_add_pi32(x2, x3)
87 #define ARGB_REDUCE(x,red) \
88 ({ \
89 int _red = (red) >> 8; \
90 if (_red < 256) \
91 x = mmx_mulshift( x, _red, 8, _zero ); \
92 })
93
94 #define ARGB_INTERP255(x1,x2,x3,alpha) \
95 x1 = mmx_interp255( x2, x3, _zero, (alpha))
96
97 #define ARGB_ADDW_11(x1,x2,x3) \
98 ARGB_SUM(x1,x2,x3)
99
100 #define ARGB_ADDW_31(x1,x2,x3) \
101 ({ \
102 mmx_t _t1 = _mm_add_pi16(x2, x3); \
103 mmx_t _t2 = _mm_slli_pi16(x2, 1); \
104 x1 = _mm_add_pi16(_t1, _t2); \
105 })
106
107 #define ARGB_ADDW_13(x1,x2,x3) \
108 ({ \
109 mmx_t _t1 = _mm_add_pi16(x2, x3); \
110 mmx_t _t2 = _mm_slli_pi16(x3, 1); \
111 x1 = _mm_add_pi16(_t1, _t2); \
112 })
113
114 #define ARGB_SHR(x1,x2,s) \
115 x1 = _mm_srli_pi16(x2, s)
116
117
118 #define ARGB_MULSHIFT(x1,x2,v,s) \
119 x1 = mmx_mulshift(x2, v, s, _zero)
120
121 #define ARGB_BEGIN _mm_empty()
122 #define ARGB_DONE _mm_empty()
123
124 #define ARGB_RESCALE_SHIFT 10
125 #define ARGB_DECL_SCALE(s2,s) int s2 = (int)((s)*(s)*(1 << ARGB_RESCALE_SHIFT))
126 #define ARGB_RESCALE(x,s2) x = mmx_mulshift( x, s2, ARGB_RESCALE_SHIFT, _zero )
127
128 #else /* !USE_MMX */
129
130 typedef uint32_t argb_t;
131
132 #define ARGB_DECL_ZERO() /* nothing */
133 #define ARGB_DECL(x) argb_t x##_ag, x##_rb
134 #define ARGB_DECL2(x1,x2) argb_t x1##_ag, x1##_rb, x2##_ag, x2##_rb
135 #define ARGB_ZERO(x) (x##_ag = x##_rb = 0)
136 #define ARGB_COPY(x,y) (x##_ag = y##_ag, x##_rb = y##_rb)
137
138 #define ARGB_UNPACK(x,v) \
139 ({ \
140 argb_t _v = (argb_t)(v); \
141 x##_ag = (_v >> 8) & 0xff00ff; \
142 x##_rb = (_v) & 0xff00ff; \
143 })
144
145 #define ARGB_PACK(x) (uint32_t)(((x##_ag) << 8) | x##_rb)
146
147 #define ARGB_SUM(x1,x2,x3) \
148 ({ \
149 x1##_ag = x2##_ag + x3##_ag; \
150 x1##_rb = x2##_rb + x3##_rb; \
151 })
152
153 #define ARGB_REDUCE(x,red) \
154 ({ \
155 int _red = (red) >> 8; \
156 if (_red < 256) { \
157 x##_ag = ((x##_ag*_red) >> 8) & 0xff00ff; \
158 x##_rb = ((x##_rb*_red) >> 8) & 0xff00ff; \
159 } \
160 })
161
162 #define ARGB_INTERP255(x1,x2,x3,alpha) \
163 ({ \
164 int _alpha = (alpha); \
165 int _ialpha; \
166 _alpha += _alpha >> 8; \
167 _ialpha = 256 - _alpha; \
168 x1##_ag = ((x2##_ag*_ialpha + x3##_ag*_alpha) >> 8) & 0xff00ff; \
169 x1##_rb = ((x2##_rb*_ialpha + x3##_rb*_alpha) >> 8) & 0xff00ff; \
170 })
171
172 #define ARGB_ADDW_11(x1,x2,x3) \
173 ({ \
174 x1##_ag = (x2##_ag + x3##_ag); \
175 x1##_rb = (x2##_rb + x3##_rb); \
176 })
177
178 #define ARGB_ADDW_31(x1,x2,x3) \
179 ({ \
180 x1##_ag = (3*x2##_ag + x3##_ag); \
181 x1##_rb = (3*x2##_rb + x3##_rb); \
182 })
183
184 #define ARGB_ADDW_13(x1,x2,x3) \
185 ({ \
186 x1##_ag = (x2##_ag + 3*x3##_ag); \
187 x1##_rb = (x2##_rb + 3*x3##_rb); \
188 })
189
190 #define ARGB_MULSHIFT(x1,x2,v,s) \
191 ({ \
192 unsigned _vv = (v); \
193 x1##_ag = ((x2##_ag * _vv) >> (s)) & 0xff00ff; \
194 x1##_rb = ((x2##_rb * _vv) >> (s)) & 0xff00ff; \
195 })
196
197 #define ARGB_SHR(x1,x2,s) \
198 ({ \
199 int _s = (s); \
200 x1##_ag = (x2##_ag >> _s) & 0xff00ff; \
201 x1##_rb = (x2##_rb >> _s) & 0xff00ff; \
202 })
203
204 #define ARGB_BEGIN ((void)0)
205 #define ARGB_DONE ((void)0)
206
207 #define ARGB_RESCALE_SHIFT 8
208 #define ARGB_DECL_SCALE(s2,s) int s2 = (int)((s)*(s)*(1 << ARGB_RESCALE_SHIFT))
209 #define ARGB_RESCALE(x,scale2) ARGB_MULSHIFT(x,x,scale2,ARGB_RESCALE_SHIFT)
210
211 #endif /* !USE_MMX */
212
213 #define ARGB_ADD(x1,x2) ARGB_SUM(x1,x1,x2)
214 #define ARGB_READ(x,p) ARGB_UNPACK(x,*(uint32_t*)(p))
215 #define ARGB_WRITE(x,p) *(uint32_t*)(p) = ARGB_PACK(x)
216
217 #endif /* !ARGB_T_DEFINED */
218
219
220
221 #ifdef ARGB_SCALE_GENERIC
222 static void
ARGB_SCALE_GENERIC(ScaleOp * op)223 ARGB_SCALE_GENERIC( ScaleOp* op )
224 {
225 int dst_pitch = op->dst_pitch;
226 int src_pitch = op->src_pitch;
227 uint8_t* dst_line = op->dst_line;
228 uint8_t* src_line = op->src_line;
229 ARGB_DECL_SCALE(scale2, op->scale);
230 int h;
231 int sx = op->sx;
232 int sy = op->sy;
233 int ix = op->ix;
234 int iy = op->iy;
235
236 ARGB_BEGIN;
237
238 src_line += (sx >> 16)*4 + (sy >> 16)*src_pitch;
239 sx &= 0xffff;
240 sy &= 0xffff;
241
242 for ( h = op->rd.h; h > 0; h-- ) {
243 uint8_t* dst = dst_line;
244 uint8_t* src = src_line;
245 uint8_t* dst_end = dst + 4*op->rd.w;
246 int sx1 = sx;
247 int sy1 = sy;
248
249 for ( ; dst < dst_end; ) {
250 int sx2 = sx1 + ix;
251 int sy2 = sy1 + iy;
252
253 ARGB_DECL_ZERO();
254 ARGB_DECL(spix);
255 ARGB_DECL(pix);
256 ARGB_ZERO(pix);
257
258 /* the current destination pixel maps to the (sx1,sy1)-(sx2,sy2)
259 * source square, we're going to compute the sum of its pixels'
260 * colors... simple box filtering
261 */
262 {
263 int gsy, gsx;
264 for ( gsy = 0; gsy < sy2; gsy += 65536 ) {
265 for ( gsx = 0; gsx < sx2; gsx += 65536 ) {
266 uint8_t* s = src + (gsx >> 16)*4 + (gsy >> 16)*src_pitch;
267 int xmin = gsx, xmax = gsx + 65536, ymin = gsy, ymax = gsy + 65536;
268 unsigned ww, hh;
269 unsigned red;
270
271 if (xmin < sx1) xmin = sx1;
272 if (xmax > sx2) xmax = sx2;
273 if (ymin < sy1) ymin = sy1;
274 if (ymax > sy2) ymax = sy2;
275
276 ww = (unsigned)(xmax-xmin);
277 red = ww;
278
279 hh = (unsigned)(ymax-ymin);
280 red = (hh < 65536) ? (red*hh >> 16U) : red;
281
282 ARGB_READ(spix,s);
283 ARGB_REDUCE(spix,red);
284 ARGB_ADD(pix,spix);
285 }
286 }
287 }
288
289 ARGB_RESCALE(pix,scale2);
290 ARGB_WRITE(pix,dst);
291
292 sx1 = sx2;
293 src += (sx1 >> 16)*4;
294 sx1 &= 0xffff;
295 dst += 4;
296 }
297
298 sy += iy;
299 src_line += (sy >> 16)*src_pitch;
300 sy &= 0xffff;
301
302 dst_line += dst_pitch;
303 }
304 ARGB_DONE;
305 }
306 #endif
307 #undef ARGB_SCALE_GENERIC
308
309
310 #ifdef ARGB_SCALE_05_TO_10
cross(int x,int y)311 static inline int cross( int x, int y ) {
312 if (x == 65536 && y == 65536)
313 return 65536;
314
315 return (int)((unsigned)x * (unsigned)y >> 16U);
316 }
317
318 static void
scale_05_to_10(ScaleOp * op)319 scale_05_to_10( ScaleOp* op )
320 {
321 int dst_pitch = op->dst_pitch;
322 int src_pitch = op->src_pitch;
323 uint8_t* dst_line = op->dst_line;
324 uint8_t* src_line = op->src_line;
325 ARGB_DECL_SCALE(scale2, op->scale);
326 int h;
327 int sx = op->sx;
328 int sy = op->sy;
329 int ix = op->ix;
330 int iy = op->iy;
331
332 ARGB_BEGIN;
333
334 src_line += (sx >> 16)*4 + (sy >> 16)*src_pitch;
335 sx &= 0xffff;
336 sy &= 0xffff;
337
338 for ( h = op->rd.h; h > 0; h-- ) {
339 uint8_t* dst = dst_line;
340 uint8_t* src = src_line;
341 uint8_t* dst_end = dst + 4*op->rd.w;
342 int sx1 = sx;
343 int sy1 = sy;
344
345 for ( ; dst < dst_end; ) {
346 int sx2 = sx1 + ix;
347 int sy2 = sy1 + iy;
348
349 ARGB_DECL_ZERO();
350 ARGB_DECL2(spix, pix);
351
352 int off = src_pitch;
353 int fx1 = sx1 & 0xffff;
354 int fx2 = sx2 & 0xffff;
355 int fy1 = sy1 & 0xffff;
356 int fy2 = sy2 & 0xffff;
357
358 int center_x = ((sx1 >> 16) + 1) < ((sx2-1) >> 16);
359 int center_y = ((sy1 >> 16) + 1) < ((sy2-1) >> 16);
360
361 ARGB_ZERO(pix);
362
363 if (fx2 == 0) {
364 fx2 = 65536;
365 }
366 if (fy2 == 0) {
367 fy2 = 65536;
368 }
369 fx1 = 65536 - fx1;
370 fy1 = 65536 - fy1;
371
372 /** TOP BAND
373 **/
374
375 /* top-left pixel */
376 ARGB_READ(spix,src);
377 ARGB_REDUCE(spix,cross(fx1,fy1));
378 ARGB_ADD(pix,spix);
379
380 /* top-center pixel, if any */
381 ARGB_READ(spix,src + 4);
382 if (center_x) {
383 ARGB_REDUCE(spix,fy1);
384 ARGB_ADD(pix,spix);
385 ARGB_READ(spix,src + 8);
386 }
387
388 /* top-right pixel */
389 ARGB_REDUCE(spix,cross(fx2,fy1));
390 ARGB_ADD(pix,spix);
391
392 /** MIDDLE BAND, IF ANY
393 **/
394 if (center_y) {
395 /* left-middle pixel */
396 ARGB_READ(spix,src + off);
397 ARGB_REDUCE(spix,fx1);
398 ARGB_ADD(pix,spix);
399
400 /* center pixel, if any */
401 ARGB_READ(spix,src + off + 4);
402 if (center_x) {
403 ARGB_ADD(pix,spix);
404 ARGB_READ(spix,src + off + 8);
405 }
406
407 /* right-middle pixel */
408 ARGB_REDUCE(spix,fx2);
409 ARGB_ADD(pix,spix);
410
411 off += src_pitch;
412 }
413
414 /** BOTTOM BAND
415 **/
416 /* left-bottom pixel */
417 ARGB_READ(spix,src + off);
418 ARGB_REDUCE(spix,cross(fx1,fy2));
419 ARGB_ADD(pix,spix);
420
421 /* center-bottom, if any */
422 ARGB_READ(spix,src + off + 4);
423 if (center_x) {
424 ARGB_REDUCE(spix,fy2);
425 ARGB_ADD(pix,spix);
426 ARGB_READ(spix,src + off + 8);
427 }
428
429 /* right-bottom pixel */
430 ARGB_REDUCE(spix,cross(fx2,fy2));
431 ARGB_ADD(pix,spix);
432
433 /** WRITE IT
434 **/
435 ARGB_RESCALE(pix,scale2);
436 ARGB_WRITE(pix,dst);
437
438 sx1 = sx2;
439 src += (sx1 >> 16)*4;
440 sx1 &= 0xffff;
441 dst += 4;
442 }
443
444 sy += iy;
445 src_line += (sy >> 16)*src_pitch;
446 sy &= 0xffff;
447
448 dst_line += dst_pitch;
449 }
450 ARGB_DONE;
451 }
452 #endif
453 #undef ARGB_SCALE_05_TO_10
454
455
456 #ifdef ARGB_SCALE_UP_BILINEAR
457 static void
scale_up_bilinear(ScaleOp * op)458 scale_up_bilinear( ScaleOp* op )
459 {
460 int dst_pitch = op->dst_pitch;
461 int src_pitch = op->src_pitch;
462 uint8_t* dst_line = op->dst_line;
463 uint8_t* src_line = op->src_line;
464 int sx = op->sx;
465 int sy = op->sy;
466 int ix = op->ix;
467 int iy = op->iy;
468 int xlimit, ylimit;
469 int h, sx0;
470
471 ARGB_BEGIN;
472
473 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
474 /* the four nearest source pixels, which are at (0.5,0.5) offsets */
475
476 sx = sx + ix/2 - 32768;
477 sy = sy + iy/2 - 32768;
478
479 xlimit = (op->src_w-1);
480 ylimit = (op->src_h-1);
481
482 sx0 = sx;
483
484 for ( h = op->rd.h; h > 0; h-- ) {
485 uint8_t* dst = dst_line;
486 uint8_t* dst_end = dst + 4*op->rd.w;
487
488 sx = sx0;
489 for ( ; dst < dst_end; ) {
490 int ex1, ex2, ey1, ey2, alpha;
491 uint8_t* s;
492
493 ARGB_DECL_ZERO();
494 ARGB_DECL2(spix1,spix2);
495 ARGB_DECL2(pix3,pix4);
496 ARGB_DECL(pix);
497
498 /* find the four neighbours */
499 ex1 = (sx >> 16);
500 ey1 = (sy >> 16);
501 ex2 = (sx+65535) >> 16;
502 ey2 = (sy+65535) >> 16;
503
504 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
505 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
506 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
507 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
508
509 ex2 = (ex2-ex1)*4;
510 ey2 = (ey2-ey1)*src_pitch;
511
512 /* interpolate */
513 s = src_line + ex1*4 + ey1*src_pitch;
514 ARGB_READ(spix1, s);
515 ARGB_READ(spix2, s+ex2);
516
517 alpha = (sx >> 8) & 0xff;
518 ARGB_INTERP255(pix3,spix1,spix2,alpha);
519
520 s += ey2;
521 ARGB_READ(spix1, s);
522 ARGB_READ(spix2, s+ex2);
523
524 ARGB_INTERP255(pix4,spix1,spix2,alpha);
525
526 alpha = (sy >> 8) & 0xff;
527 ARGB_INTERP255(pix,pix3,pix4,alpha);
528
529 ARGB_WRITE(pix,dst);
530
531 sx += ix;
532 dst += 4;
533 }
534
535 sy += iy;
536 dst_line += dst_pitch;
537 }
538 ARGB_DONE;
539 }
540 #endif
541 #undef ARGB_SCALE_UP_BILINEAR
542
543 #ifdef ARGB_SCALE_UP_QUICK_4x4
544 static void
ARGB_SCALE_UP_QUICK_4x4(ScaleOp * op)545 ARGB_SCALE_UP_QUICK_4x4( ScaleOp* op )
546 {
547 int dst_pitch = op->dst_pitch;
548 int src_pitch = op->src_pitch;
549 uint8_t* dst_line = op->dst_line;
550 uint8_t* src_line = op->src_line;
551 int sx = op->sx;
552 int sy = op->sy;
553 int ix = op->ix;
554 int iy = op->iy;
555 int xlimit, ylimit;
556 int h, sx0;
557
558 ARGB_BEGIN;
559
560 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
561 /* the four nearest source pixels, which are at (0.5,0.5) offsets */
562
563 sx = sx + ix/2 - 32768;
564 sy = sy + iy/2 - 32768;
565
566 xlimit = (op->src_w-1);
567 ylimit = (op->src_h-1);
568
569 sx0 = sx;
570
571 for ( h = op->rd.h; h > 0; h-- ) {
572 uint8_t* dst = dst_line;
573 uint8_t* dst_end = dst + 4*op->rd.w;
574
575 sx = sx0;
576 for ( ; dst < dst_end; ) {
577 int ex1, ex2, ey1, ey2;
578 uint8_t* p;
579 ARGB_DECL_ZERO();
580 ARGB_DECL(pix);
581 ARGB_DECL2(spix1, spix2);
582 ARGB_DECL2(pix3, pix4);
583
584 /* find the four neighbours */
585 ex1 = (sx >> 16);
586 ey1 = (sy >> 16);
587 ex2 = (sx+65535) >> 16;
588 ey2 = (sy+65535) >> 16;
589
590 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
591 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
592 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
593 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
594
595 /* interpolate */
596 p = (src_line + ex1*4 + ey1*src_pitch);
597
598 ex2 = (ex2-ex1)*4;
599 ey2 = (ey2-ey1)*src_pitch;
600
601 switch (((sx >> 14) & 3) | ((sy >> 12) & 12)) {
602 case 0:
603 *(uint32_t*)dst = *(uint32_t*)p;
604 break;
605
606 /* top-line is easy */
607 case 1:
608 ARGB_READ(spix1, p);
609 ARGB_READ(spix2, p+ex2);
610 ARGB_ADDW_31(pix,spix1,spix2);
611 ARGB_SHR(pix,pix,2);
612 ARGB_WRITE(pix, dst);
613 break;
614
615 case 2:
616 ARGB_READ(spix1, p);
617 ARGB_READ(spix2, p+ex2);
618 ARGB_ADDW_11(pix, spix1, spix2);
619 ARGB_SHR(pix,pix,1);
620 ARGB_WRITE(pix, dst);
621 break;
622
623 case 3:
624 ARGB_READ(spix1, p);
625 ARGB_READ(spix2, p+ex2);
626 ARGB_ADDW_13(pix,spix1,spix2);
627 ARGB_SHR(pix,pix,2);
628 ARGB_WRITE(pix, dst);
629 break;
630
631 /* second line is harder */
632 case 4:
633 ARGB_READ(spix1, p);
634 ARGB_READ(spix2, p+ey2);
635 ARGB_ADDW_31(pix,spix1,spix2);
636 ARGB_SHR(pix,pix,2);
637 ARGB_WRITE(pix, dst);
638 break;
639
640 case 5:
641 ARGB_READ(spix1, p);
642 ARGB_READ(spix2, p+ex2);
643 ARGB_ADDW_31(pix3,spix1,spix2);
644 p += ey2;
645 ARGB_READ(spix1, p);
646 ARGB_READ(spix2, p+ex2);
647 ARGB_ADDW_31(pix4,spix1,spix2);
648
649 ARGB_ADDW_31(pix,pix3,pix4);
650 ARGB_SHR(pix,pix,4);
651 ARGB_WRITE(pix,dst);
652 break;
653
654 case 6:
655 ARGB_READ(spix1, p);
656 ARGB_READ(spix2, p+ex2);
657 ARGB_ADDW_11(pix3,spix1,spix2);
658 p += ey2;
659 ARGB_READ(spix1, p);
660 ARGB_READ(spix2, p+ex2);
661 ARGB_ADDW_11(pix4,spix1,spix2);
662
663 ARGB_ADDW_31(pix,pix3,pix4);
664 ARGB_SHR(pix,pix,3);
665 ARGB_WRITE(pix,dst);
666 break;
667
668 case 7:
669 ARGB_READ(spix1, p);
670 ARGB_READ(spix2, p+ex2);
671 ARGB_ADDW_13(pix3,spix1,spix2);
672 p += ey2;
673 ARGB_READ(spix1, p);
674 ARGB_READ(spix2, p+ex2);
675 ARGB_ADDW_13(pix4,spix1,spix2);
676
677 ARGB_ADDW_31(pix,pix3,pix4);
678 ARGB_SHR(pix,pix,4);
679 ARGB_WRITE(pix,dst);
680 break;
681
682 /* third line */
683 case 8:
684 ARGB_READ(spix1, p);
685 ARGB_READ(spix2, p+ey2);
686 ARGB_ADDW_11(pix,spix1,spix2);
687 ARGB_SHR(pix,pix,1);
688 ARGB_WRITE(pix, dst);
689 break;
690
691 case 9:
692 ARGB_READ(spix1, p);
693 ARGB_READ(spix2, p+ex2);
694 ARGB_ADDW_31(pix3,spix1,spix2);
695 p += ey2;
696 ARGB_READ(spix1, p);
697 ARGB_READ(spix2, p+ex2);
698 ARGB_ADDW_31(pix4,spix1,spix2);
699
700 ARGB_ADDW_11(pix,pix3,pix4);
701 ARGB_SHR(pix,pix,3);
702 ARGB_WRITE(pix,dst);
703 break;
704
705 case 10:
706 ARGB_READ(spix1, p);
707 ARGB_READ(spix2, p+ex2);
708 ARGB_ADDW_11(pix3,spix1,spix2);
709 p += ey2;
710 ARGB_READ(spix1, p);
711 ARGB_READ(spix2, p+ex2);
712 ARGB_ADDW_11(pix4,spix1,spix2);
713
714 ARGB_ADDW_11(pix,pix3,pix4);
715 ARGB_SHR(pix,pix,2);
716 ARGB_WRITE(pix,dst);
717 break;
718
719 case 11:
720 ARGB_READ(spix1, p);
721 ARGB_READ(spix2, p+ex2);
722 ARGB_ADDW_13(pix3,spix1,spix2);
723 p += ey2;
724 ARGB_READ(spix1, p);
725 ARGB_READ(spix2, p+ex2);
726 ARGB_ADDW_13(pix4,spix1,spix2);
727
728 ARGB_ADDW_11(pix,pix3,pix4);
729 ARGB_SHR(pix,pix,3);
730 ARGB_WRITE(pix,dst);
731 break;
732
733 /* last line */
734 case 12:
735 ARGB_READ(spix1, p);
736 ARGB_READ(spix2, p+ey2);
737 ARGB_ADDW_13(pix,spix1,spix2);
738 ARGB_SHR(pix,pix,2);
739 ARGB_WRITE(pix, dst);
740 break;
741
742 case 13:
743 ARGB_READ(spix1, p);
744 ARGB_READ(spix2, p+ex2);
745 ARGB_ADDW_31(pix3,spix1,spix2);
746 p += ey2;
747 ARGB_READ(spix1, p);
748 ARGB_READ(spix2, p+ex2);
749 ARGB_ADDW_31(pix4,spix1,spix2);
750
751 ARGB_ADDW_13(pix,pix3,pix4);
752 ARGB_SHR(pix,pix,4);
753 ARGB_WRITE(pix,dst);
754 break;
755
756 case 14:
757 ARGB_READ(spix1, p);
758 ARGB_READ(spix2, p+ex2);
759 ARGB_ADDW_11(pix3,spix1,spix2);
760 p += ey2;
761 ARGB_READ(spix1, p);
762 ARGB_READ(spix2, p+ex2);
763 ARGB_ADDW_11(pix4,spix1,spix2);
764
765 ARGB_ADDW_13(pix,pix3,pix4);
766 ARGB_SHR(pix,pix,3);
767 ARGB_WRITE(pix,dst);
768 break;
769
770 default:
771 ARGB_READ(spix1, p);
772 ARGB_READ(spix2, p+ex2);
773 ARGB_ADDW_13(pix3,spix1,spix2);
774 p += ey2;
775 ARGB_READ(spix1, p);
776 ARGB_READ(spix2, p+ex2);
777 ARGB_ADDW_13(pix4,spix1,spix2);
778
779 ARGB_ADDW_13(pix,pix3,pix4);
780 ARGB_SHR(pix,pix,4);
781 ARGB_WRITE(pix,dst);
782 }
783 sx += ix;
784 dst += 4;
785 }
786
787 sy += iy;
788 dst_line += dst_pitch;
789 }
790 ARGB_DONE;
791 }
792 #endif
793 #undef ARGB_SCALE_UP_QUICK_4x4
794
795
796 #ifdef ARGB_SCALE_NEAREST
797 /* this version scales up with nearest neighbours - looks crap */
798 static void
ARGB_SCALE_NEAREST(ScaleOp * op)799 ARGB_SCALE_NEAREST( ScaleOp* op )
800 {
801 int dst_pitch = op->dst_pitch;
802 int src_pitch = op->src_pitch;
803 uint8_t* dst_line = op->dst_line;
804 uint8_t* src_line = op->src_line;
805 int sx = op->sx;
806 int sy = op->sy;
807 int ix = op->ix;
808 int iy = op->iy;
809 int xlimit, ylimit;
810 int h, sx0;
811
812 ARGB_BEGIN;
813
814 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
815 /* the four nearest source pixels, which are at (0.5,0.5) offsets */
816
817 sx = sx + ix/2 - 32768;
818 sy = sy + iy/2 - 32768;
819
820 xlimit = (op->src_w-1);
821 ylimit = (op->src_h-1);
822
823 sx0 = sx;
824
825 for ( h = op->rd.h; h > 0; h-- ) {
826 uint8_t* dst = dst_line;
827 uint8_t* dst_end = dst + 4*op->rd.w;
828
829 sx = sx0;
830 for ( ; dst < dst_end; ) {
831 int ex1, ex2, ey1, ey2;
832 unsigned* p;
833
834 /* find the top-left neighbour */
835 ex1 = (sx >> 16);
836 ey1 = (sy >> 16);
837 ex2 = ex1+1;
838 ey2 = ey1+1;
839
840 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
841 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
842 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
843 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
844
845 p = (unsigned*)(src_line + ex1*4 + ey1*src_pitch);
846 if ((sx & 0xffff) >= 32768)
847 p += (ex2-ex1);
848 if ((sy & 0xffff) >= 32768)
849 p = (unsigned*)((char*)p + (ey2-ey1)*src_pitch);
850
851 *(unsigned*)dst = p[0];
852
853 sx += ix;
854 dst += 4;
855 }
856
857 sy += iy;
858 dst_line += dst_pitch;
859 }
860 }
861 #endif
862 #undef ARGB_SCALE_NEAREST
863