1 /* Copyright (C) 2007-2008 The Android Open Source Project
2 **
3 ** This software is licensed under the terms of the GNU General Public
4 ** License version 2, as published by the Free Software Foundation, and
5 ** may be copied, distributed, and modified under those terms.
6 **
7 ** This program is distributed in the hope that it will be useful,
8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 ** GNU General Public License for more details.
11 */
12 /* this file contains template code and may be included multiple times */
13
14 #ifndef ARGB_T_DEFINED
15 #define ARGB_T_DEFINED
16
17 #if USE_MMX
18 #include <mmintrin.h>
19
20 typedef __m64 mmx_t;
21 typedef mmx_t argb_t;
22
23 static inline mmx_t
mmx_load8888(unsigned value,mmx_t zero)24 mmx_load8888( unsigned value, mmx_t zero )
25 {
26 return _mm_unpacklo_pi8( _mm_cvtsi32_si64 (value), zero);
27 }
28
29 static inline unsigned
mmx_save8888(mmx_t argb,mmx_t zero)30 mmx_save8888( mmx_t argb, mmx_t zero )
31 {
32 return (unsigned) _mm_cvtsi64_si32( _mm_packs_pu16( argb, zero ) );
33 }
34
35 static inline mmx_t
mmx_expand16(int value)36 mmx_expand16( int value )
37 {
38 mmx_t t1 = _mm_cvtsi32_si64( value );
39 return _mm_packs_pi32( t1, t1 );
40 }
41
42 static inline int
mmx_makescale(double s)43 mmx_makescale( double s )
44 {
45 return (int)(s*(1 << 16));
46 }
47
48 static inline mmx_t
mmx_mulshift(mmx_t argb,int multiplier,int rshift,mmx_t zero)49 mmx_mulshift( mmx_t argb, int multiplier, int rshift, mmx_t zero )
50 {
51 mmx_t ar = _mm_unpackhi_pi16(argb, zero );
52 mmx_t gb = _mm_unpacklo_pi16(argb, zero );
53 mmx_t mult = mmx_expand16(multiplier);
54
55 ar = _mm_srli_pi32( _mm_madd_pi16( ar, mult ), rshift );
56 gb = _mm_srli_pi32( _mm_madd_pi16( gb, mult ), rshift );
57
58 return _mm_packs_pi32( gb, ar );
59 }
60
61 static inline mmx_t
mmx_interp255(mmx_t m1,mmx_t m2,mmx_t zero,int alpha)62 mmx_interp255( mmx_t m1, mmx_t m2, mmx_t zero, int alpha )
63 {
64 mmx_t mult, mult2, t1, t2, r1, r2;
65
66 // m1 = [ a1 | r1 | g1 | b1 ]
67 // m2 = [ a2 | r2 | g2 | b2 ]
68 alpha = (alpha << 16) | (alpha ^ 255);
69 mult = _mm_cvtsi32_si64( alpha ); // mult = [ 0 | 0 | a | 1-a ]
70 mult2 = _mm_slli_si64( mult, 32 ); // mult2 = [ a | 1-a | 0 | 0 ]
71 mult = _mm_or_si64( mult, mult2 ); // mults = [ a | 1-a | a | 1-a ]
72
73 t1 = _mm_unpackhi_pi16( m1, m2 ); // t1 = [ a2 | a1 | r2 | r1 ]
74 r1 = _mm_madd_pi16( t1, mult ); // r1 = [ ra | rr ]
75
76 t2 = _mm_unpacklo_pi16( m1, m2 ); // t1 = [ g2 | g1 | b2 | b1 ]
77 r2 = _mm_madd_pi16( t2, mult ); // r2 = [ rg | rb ]
78
79 r1 = _mm_srli_pi32( r1, 8 );
80 r2 = _mm_srli_pi32( r2, 8 );
81
82 return _mm_packs_pi32( r2, r1 );
83 }
84
85 #define ARGB_DECL_ZERO() mmx_t _zero = _mm_setzero_si64()
86 #define ARGB_DECL(x) mmx_t x
87 #define ARGB_DECL2(x1,x2) mmx_t x1, x2
88 #define ARGB_ZERO(x) x = _zero
89 #define ARGB_UNPACK(x,v) x = mmx_load8888((v), _zero)
90 #define ARGB_PACK(x) mmx_save8888(x, _zero)
91 #define ARGB_COPY(x,y) x = y
92 #define ARGB_SUM(x1,x2,x3) x1 = _mm_add_pi32(x2, x3)
93 #define ARGB_REDUCE(x,red) \
94 ({ \
95 int _red = (red) >> 8; \
96 if (_red < 256) \
97 x = mmx_mulshift( x, _red, 8, _zero ); \
98 })
99
100 #define ARGB_INTERP255(x1,x2,x3,alpha) \
101 x1 = mmx_interp255( x2, x3, _zero, (alpha))
102
103 #define ARGB_ADDW_11(x1,x2,x3) \
104 ARGB_SUM(x1,x2,x3)
105
106 #define ARGB_ADDW_31(x1,x2,x3) \
107 ({ \
108 mmx_t _t1 = _mm_add_pi16(x2, x3); \
109 mmx_t _t2 = _mm_slli_pi16(x2, 1); \
110 x1 = _mm_add_pi16(_t1, _t2); \
111 })
112
113 #define ARGB_ADDW_13(x1,x2,x3) \
114 ({ \
115 mmx_t _t1 = _mm_add_pi16(x2, x3); \
116 mmx_t _t2 = _mm_slli_pi16(x3, 1); \
117 x1 = _mm_add_pi16(_t1, _t2); \
118 })
119
120 #define ARGB_SHR(x1,x2,s) \
121 x1 = _mm_srli_pi16(x2, s)
122
123
124 #define ARGB_MULSHIFT(x1,x2,v,s) \
125 x1 = mmx_mulshift(x2, v, s, _zero)
126
127 #define ARGB_DONE _mm_empty()
128
129 #define ARGB_RESCALE_SHIFT 10
130 #define ARGB_DECL_SCALE(s2,s) int s2 = (int)((s)*(s)*(1 << ARGB_RESCALE_SHIFT))
131 #define ARGB_RESCALE(x,s2) x = mmx_mulshift( x, s2, ARGB_RESCALE_SHIFT, _zero )
132
133 #else /* !USE_MMX */
134
135 typedef uint32_t argb_t;
136
137 #define ARGB_DECL_ZERO() argb_t _zero = 0
138 #define ARGB_DECL(x) argb_t x##_ag, x##_rb
139 #define ARGB_DECL2(x1,x2) argb_t x1##_ag, x1##_rb, x2##_ag, x2##_rb
140 #define ARGB_ZERO(x) (x##_ag = x##_rb = 0)
141 #define ARGB_COPY(x,y) (x##_ag = y##_ag, x##_rb = y##_rb)
142
143 #define ARGB_UNPACK(x,v) \
144 ({ \
145 argb_t _v = (argb_t)(v); \
146 x##_ag = (_v >> 8) & 0xff00ff; \
147 x##_rb = (_v) & 0xff00ff; \
148 })
149
150 #define ARGB_PACK(x) (uint32_t)(((x##_ag) << 8) | x##_rb)
151
152 #define ARGB_SUM(x1,x2,x3) \
153 ({ \
154 x1##_ag = x2##_ag + x3##_ag; \
155 x1##_rb = x2##_rb + x3##_rb; \
156 })
157
158 #define ARGB_REDUCE(x,red) \
159 ({ \
160 int _red = (red) >> 8; \
161 if (_red < 256) { \
162 x##_ag = ((x##_ag*_red) >> 8) & 0xff00ff; \
163 x##_rb = ((x##_rb*_red) >> 8) & 0xff00ff; \
164 } \
165 })
166
167 #define ARGB_INTERP255(x1,x2,x3,alpha) \
168 ({ \
169 int _alpha = (alpha); \
170 int _ialpha; \
171 _alpha += _alpha >> 8; \
172 _ialpha = 256 - _alpha; \
173 x1##_ag = ((x2##_ag*_ialpha + x3##_ag*_alpha) >> 8) & 0xff00ff; \
174 x1##_rb = ((x2##_rb*_ialpha + x3##_rb*_alpha) >> 8) & 0xff00ff; \
175 })
176
177 #define ARGB_ADDW_11(x1,x2,x3) \
178 ({ \
179 x1##_ag = (x2##_ag + x3##_ag); \
180 x1##_rb = (x2##_rb + x3##_rb); \
181 })
182
183 #define ARGB_ADDW_31(x1,x2,x3) \
184 ({ \
185 x1##_ag = (3*x2##_ag + x3##_ag); \
186 x1##_rb = (3*x2##_rb + x3##_rb); \
187 })
188
189 #define ARGB_ADDW_13(x1,x2,x3) \
190 ({ \
191 x1##_ag = (x2##_ag + 3*x3##_ag); \
192 x1##_rb = (x2##_rb + 3*x3##_rb); \
193 })
194
195 #define ARGB_MULSHIFT(x1,x2,v,s) \
196 ({ \
197 unsigned _vv = (v); \
198 x1##_ag = ((x2##_ag * _vv) >> (s)) & 0xff00ff; \
199 x1##_rb = ((x2##_rb * _vv) >> (s)) & 0xff00ff; \
200 })
201
202 #define ARGB_SHR(x1,x2,s) \
203 ({ \
204 int _s = (s); \
205 x1##_ag = (x2##_ag >> _s) & 0xff00ff; \
206 x1##_rb = (x2##_rb >> _s) & 0xff00ff; \
207 })
208
209 #define ARGB_DONE ((void)0)
210
211 #define ARGB_RESCALE_SHIFT 8
212 #define ARGB_DECL_SCALE(s2,s) int s2 = (int)((s)*(s)*(1 << ARGB_RESCALE_SHIFT))
213 #define ARGB_RESCALE(x,scale2) ARGB_MULSHIFT(x,x,scale2,ARGB_RESCALE_SHIFT)
214
215 #endif /* !USE_MMX */
216
217 #define ARGB_ADD(x1,x2) ARGB_SUM(x1,x1,x2)
218 #define ARGB_READ(x,p) ARGB_UNPACK(x,*(uint32_t*)(p))
219 #define ARGB_WRITE(x,p) *(uint32_t*)(p) = ARGB_PACK(x)
220
221 #endif /* !ARGB_T_DEFINED */
222
223
224
225 #ifdef ARGB_SCALE_GENERIC
226 static void
ARGB_SCALE_GENERIC(ScaleOp * op)227 ARGB_SCALE_GENERIC( ScaleOp* op )
228 {
229 int dst_pitch = op->dst_pitch;
230 int src_pitch = op->src_pitch;
231 uint8_t* dst_line = op->dst_line;
232 uint8_t* src_line = op->src_line;
233 ARGB_DECL_SCALE(scale2, op->scale);
234 int h;
235 int sx = op->sx;
236 int sy = op->sy;
237 int ix = op->ix;
238 int iy = op->iy;
239
240 src_line += (sx >> 16)*4 + (sy >> 16)*src_pitch;
241 sx &= 0xffff;
242 sy &= 0xffff;
243
244 for ( h = op->rd.h; h > 0; h-- ) {
245 uint8_t* dst = dst_line;
246 uint8_t* src = src_line;
247 uint8_t* dst_end = dst + 4*op->rd.w;
248 int sx1 = sx;
249 int sy1 = sy;
250
251 for ( ; dst < dst_end; ) {
252 int sx2 = sx1 + ix;
253 int sy2 = sy1 + iy;
254
255 ARGB_DECL_ZERO();
256 ARGB_DECL(spix);
257 ARGB_DECL(pix);
258 ARGB_ZERO(pix);
259
260 /* the current destination pixel maps to the (sx1,sy1)-(sx2,sy2)
261 * source square, we're going to compute the sum of its pixels'
262 * colors... simple box filtering
263 */
264 {
265 int gsy, gsx;
266 for ( gsy = 0; gsy < sy2; gsy += 65536 ) {
267 for ( gsx = 0; gsx < sx2; gsx += 65536 ) {
268 uint8_t* s = src + (gsx >> 16)*4 + (gsy >> 16)*src_pitch;
269 int xmin = gsx, xmax = gsx + 65536, ymin = gsy, ymax = gsy + 65536;
270 unsigned ww, hh;
271 unsigned red;
272
273 if (xmin < sx1) xmin = sx1;
274 if (xmax > sx2) xmax = sx2;
275 if (ymin < sy1) ymin = sy1;
276 if (ymax > sy2) ymax = sy2;
277
278 ww = (unsigned)(xmax-xmin);
279 red = ww;
280
281 hh = (unsigned)(ymax-ymin);
282 red = (hh < 65536) ? (red*hh >> 16U) : red;
283
284 ARGB_READ(spix,s);
285 ARGB_REDUCE(spix,red);
286 ARGB_ADD(pix,spix);
287 }
288 }
289 }
290
291 ARGB_RESCALE(pix,scale2);
292 ARGB_WRITE(pix,dst);
293
294 sx1 = sx2;
295 src += (sx1 >> 16)*4;
296 sx1 &= 0xffff;
297 dst += 4;
298 }
299
300 sy += iy;
301 src_line += (sy >> 16)*src_pitch;
302 sy &= 0xffff;
303
304 dst_line += dst_pitch;
305 }
306 ARGB_DONE;
307 }
308 #endif
309 #undef ARGB_SCALE_GENERIC
310
311
312 #ifdef ARGB_SCALE_05_TO_10
cross(int x,int y)313 static inline int cross( int x, int y ) {
314 if (x == 65536 && y == 65536)
315 return 65536;
316
317 return (int)((unsigned)x * (unsigned)y >> 16U);
318 }
319
320 static void
scale_05_to_10(ScaleOp * op)321 scale_05_to_10( ScaleOp* op )
322 {
323 int dst_pitch = op->dst_pitch;
324 int src_pitch = op->src_pitch;
325 uint8_t* dst_line = op->dst_line;
326 uint8_t* src_line = op->src_line;
327 ARGB_DECL_SCALE(scale2, op->scale);
328 int h;
329 int sx = op->sx;
330 int sy = op->sy;
331 int ix = op->ix;
332 int iy = op->iy;
333
334 src_line += (sx >> 16)*4 + (sy >> 16)*src_pitch;
335 sx &= 0xffff;
336 sy &= 0xffff;
337
338 for ( h = op->rd.h; h > 0; h-- ) {
339 uint8_t* dst = dst_line;
340 uint8_t* src = src_line;
341 uint8_t* dst_end = dst + 4*op->rd.w;
342 int sx1 = sx;
343 int sy1 = sy;
344
345 for ( ; dst < dst_end; ) {
346 int sx2 = sx1 + ix;
347 int sy2 = sy1 + iy;
348
349 ARGB_DECL_ZERO();
350 ARGB_DECL2(spix, pix);
351
352 int off = src_pitch;
353 int fx1 = sx1 & 0xffff;
354 int fx2 = sx2 & 0xffff;
355 int fy1 = sy1 & 0xffff;
356 int fy2 = sy2 & 0xffff;
357
358 int center_x = ((sx1 >> 16) + 1) < ((sx2-1) >> 16);
359 int center_y = ((sy1 >> 16) + 1) < ((sy2-1) >> 16);
360
361 ARGB_ZERO(pix);
362
363 if (fx2 == 0) {
364 fx2 = 65536;
365 }
366 if (fy2 == 0) {
367 fy2 = 65536;
368 }
369 fx1 = 65536 - fx1;
370 fy1 = 65536 - fy1;
371
372 /** TOP BAND
373 **/
374
375 /* top-left pixel */
376 ARGB_READ(spix,src);
377 ARGB_REDUCE(spix,cross(fx1,fy1));
378 ARGB_ADD(pix,spix);
379
380 /* top-center pixel, if any */
381 ARGB_READ(spix,src + 4);
382 if (center_x) {
383 ARGB_REDUCE(spix,fy1);
384 ARGB_ADD(pix,spix);
385 ARGB_READ(spix,src + 8);
386 }
387
388 /* top-right pixel */
389 ARGB_REDUCE(spix,cross(fx2,fy1));
390 ARGB_ADD(pix,spix);
391
392 /** MIDDLE BAND, IF ANY
393 **/
394 if (center_y) {
395 /* left-middle pixel */
396 ARGB_READ(spix,src + off);
397 ARGB_REDUCE(spix,fx1);
398 ARGB_ADD(pix,spix);
399
400 /* center pixel, if any */
401 ARGB_READ(spix,src + off + 4);
402 if (center_x) {
403 ARGB_ADD(pix,spix);
404 ARGB_READ(spix,src + off + 8);
405 }
406
407 /* right-middle pixel */
408 ARGB_REDUCE(spix,fx2);
409 ARGB_ADD(pix,spix);
410
411 off += src_pitch;
412 }
413
414 /** BOTTOM BAND
415 **/
416 /* left-bottom pixel */
417 ARGB_READ(spix,src + off);
418 ARGB_REDUCE(spix,cross(fx1,fy2));
419 ARGB_ADD(pix,spix);
420
421 /* center-bottom, if any */
422 ARGB_READ(spix,src + off + 4);
423 if (center_x) {
424 ARGB_REDUCE(spix,fy2);
425 ARGB_ADD(pix,spix);
426 ARGB_READ(spix,src + off + 8);
427 }
428
429 /* right-bottom pixel */
430 ARGB_REDUCE(spix,cross(fx2,fy2));
431 ARGB_ADD(pix,spix);
432
433 /** WRITE IT
434 **/
435 ARGB_RESCALE(pix,scale2);
436 ARGB_WRITE(pix,dst);
437
438 sx1 = sx2;
439 src += (sx1 >> 16)*4;
440 sx1 &= 0xffff;
441 dst += 4;
442 }
443
444 sy += iy;
445 src_line += (sy >> 16)*src_pitch;
446 sy &= 0xffff;
447
448 dst_line += dst_pitch;
449 }
450 ARGB_DONE;
451 }
452 #endif
453 #undef ARGB_SCALE_05_TO_10
454
455
456 #ifdef ARGB_SCALE_UP_BILINEAR
457 static void
scale_up_bilinear(ScaleOp * op)458 scale_up_bilinear( ScaleOp* op )
459 {
460 int dst_pitch = op->dst_pitch;
461 int src_pitch = op->src_pitch;
462 uint8_t* dst_line = op->dst_line;
463 uint8_t* src_line = op->src_line;
464 int sx = op->sx;
465 int sy = op->sy;
466 int ix = op->ix;
467 int iy = op->iy;
468 int xlimit, ylimit;
469 int h, sx0;
470
471 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
472 /* the four nearest source pixels, which are at (0.5,0.5) offsets */
473
474 sx = sx + ix/2 - 32768;
475 sy = sy + iy/2 - 32768;
476
477 xlimit = (op->src_w-1);
478 ylimit = (op->src_h-1);
479
480 sx0 = sx;
481
482 for ( h = op->rd.h; h > 0; h-- ) {
483 uint8_t* dst = dst_line;
484 uint8_t* dst_end = dst + 4*op->rd.w;
485
486 sx = sx0;
487 for ( ; dst < dst_end; ) {
488 int ex1, ex2, ey1, ey2, alpha;
489 uint8_t* s;
490
491 ARGB_DECL_ZERO();
492 ARGB_DECL2(spix1,spix2);
493 ARGB_DECL2(pix3,pix4);
494 ARGB_DECL(pix);
495
496 /* find the four neighbours */
497 ex1 = (sx >> 16);
498 ey1 = (sy >> 16);
499 ex2 = (sx+65535) >> 16;
500 ey2 = (sy+65535) >> 16;
501
502 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
503 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
504 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
505 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
506
507 ex2 = (ex2-ex1)*4;
508 ey2 = (ey2-ey1)*src_pitch;
509
510 /* interpolate */
511 s = src_line + ex1*4 + ey1*src_pitch;
512 ARGB_READ(spix1, s);
513 ARGB_READ(spix2, s+ex2);
514
515 alpha = (sx >> 8) & 0xff;
516 ARGB_INTERP255(pix3,spix1,spix2,alpha);
517
518 s += ey2;
519 ARGB_READ(spix1, s);
520 ARGB_READ(spix2, s+ex2);
521
522 ARGB_INTERP255(pix4,spix1,spix2,alpha);
523
524 alpha = (sy >> 8) & 0xff;
525 ARGB_INTERP255(pix,pix3,pix4,alpha);
526
527 ARGB_WRITE(pix,dst);
528
529 sx += ix;
530 dst += 4;
531 }
532
533 sy += iy;
534 dst_line += dst_pitch;
535 }
536 ARGB_DONE;
537 }
538 #endif
539 #undef ARGB_SCALE_UP_BILINEAR
540
541 #ifdef ARGB_SCALE_UP_QUICK_4x4
542 static void
ARGB_SCALE_UP_QUICK_4x4(ScaleOp * op)543 ARGB_SCALE_UP_QUICK_4x4( ScaleOp* op )
544 {
545 int dst_pitch = op->dst_pitch;
546 int src_pitch = op->src_pitch;
547 uint8_t* dst_line = op->dst_line;
548 uint8_t* src_line = op->src_line;
549 int sx = op->sx;
550 int sy = op->sy;
551 int ix = op->ix;
552 int iy = op->iy;
553 int xlimit, ylimit;
554 int h, sx0;
555
556 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
557 /* the four nearest source pixels, which are at (0.5,0.5) offsets */
558
559 sx = sx + ix/2 - 32768;
560 sy = sy + iy/2 - 32768;
561
562 xlimit = (op->src_w-1);
563 ylimit = (op->src_h-1);
564
565 sx0 = sx;
566
567 for ( h = op->rd.h; h > 0; h-- ) {
568 uint8_t* dst = dst_line;
569 uint8_t* dst_end = dst + 4*op->rd.w;
570
571 sx = sx0;
572 for ( ; dst < dst_end; ) {
573 int ex1, ex2, ey1, ey2;
574 uint8_t* p;
575 ARGB_DECL_ZERO();
576 ARGB_DECL(pix);
577 ARGB_DECL2(spix1, spix2);
578 ARGB_DECL2(pix3, pix4);
579
580 /* find the four neighbours */
581 ex1 = (sx >> 16);
582 ey1 = (sy >> 16);
583 ex2 = (sx+65535) >> 16;
584 ey2 = (sy+65535) >> 16;
585
586 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
587 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
588 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
589 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
590
591 /* interpolate */
592 p = (src_line + ex1*4 + ey1*src_pitch);
593
594 ex2 = (ex2-ex1)*4;
595 ey2 = (ey2-ey1)*src_pitch;
596
597 switch (((sx >> 14) & 3) | ((sy >> 12) & 12)) {
598 case 0:
599 *(uint32_t*)dst = *(uint32_t*)p;
600 break;
601
602 /* top-line is easy */
603 case 1:
604 ARGB_READ(spix1, p);
605 ARGB_READ(spix2, p+ex2);
606 ARGB_ADDW_31(pix,spix1,spix2);
607 ARGB_SHR(pix,pix,2);
608 ARGB_WRITE(pix, dst);
609 break;
610
611 case 2:
612 ARGB_READ(spix1, p);
613 ARGB_READ(spix2, p+ex2);
614 ARGB_ADDW_11(pix, spix1, spix2);
615 ARGB_SHR(pix,pix,1);
616 ARGB_WRITE(pix, dst);
617 break;
618
619 case 3:
620 ARGB_READ(spix1, p);
621 ARGB_READ(spix2, p+ex2);
622 ARGB_ADDW_13(pix,spix1,spix2);
623 ARGB_SHR(pix,pix,2);
624 ARGB_WRITE(pix, dst);
625 break;
626
627 /* second line is harder */
628 case 4:
629 ARGB_READ(spix1, p);
630 ARGB_READ(spix2, p+ey2);
631 ARGB_ADDW_31(pix,spix1,spix2);
632 ARGB_SHR(pix,pix,2);
633 ARGB_WRITE(pix, dst);
634 break;
635
636 case 5:
637 ARGB_READ(spix1, p);
638 ARGB_READ(spix2, p+ex2);
639 ARGB_ADDW_31(pix3,spix1,spix2);
640 p += ey2;
641 ARGB_READ(spix1, p);
642 ARGB_READ(spix2, p+ex2);
643 ARGB_ADDW_31(pix4,spix1,spix2);
644
645 ARGB_ADDW_31(pix,pix3,pix4);
646 ARGB_SHR(pix,pix,4);
647 ARGB_WRITE(pix,dst);
648 break;
649
650 case 6:
651 ARGB_READ(spix1, p);
652 ARGB_READ(spix2, p+ex2);
653 ARGB_ADDW_11(pix3,spix1,spix2);
654 p += ey2;
655 ARGB_READ(spix1, p);
656 ARGB_READ(spix2, p+ex2);
657 ARGB_ADDW_11(pix4,spix1,spix2);
658
659 ARGB_ADDW_31(pix,pix3,pix4);
660 ARGB_SHR(pix,pix,3);
661 ARGB_WRITE(pix,dst);
662 break;
663
664 case 7:
665 ARGB_READ(spix1, p);
666 ARGB_READ(spix2, p+ex2);
667 ARGB_ADDW_13(pix3,spix1,spix2);
668 p += ey2;
669 ARGB_READ(spix1, p);
670 ARGB_READ(spix2, p+ex2);
671 ARGB_ADDW_13(pix4,spix1,spix2);
672
673 ARGB_ADDW_31(pix,pix3,pix4);
674 ARGB_SHR(pix,pix,4);
675 ARGB_WRITE(pix,dst);
676 break;
677
678 /* third line */
679 case 8:
680 ARGB_READ(spix1, p);
681 ARGB_READ(spix2, p+ey2);
682 ARGB_ADDW_11(pix,spix1,spix2);
683 ARGB_SHR(pix,pix,1);
684 ARGB_WRITE(pix, dst);
685 break;
686
687 case 9:
688 ARGB_READ(spix1, p);
689 ARGB_READ(spix2, p+ex2);
690 ARGB_ADDW_31(pix3,spix1,spix2);
691 p += ey2;
692 ARGB_READ(spix1, p);
693 ARGB_READ(spix2, p+ex2);
694 ARGB_ADDW_31(pix4,spix1,spix2);
695
696 ARGB_ADDW_11(pix,pix3,pix4);
697 ARGB_SHR(pix,pix,3);
698 ARGB_WRITE(pix,dst);
699 break;
700
701 case 10:
702 ARGB_READ(spix1, p);
703 ARGB_READ(spix2, p+ex2);
704 ARGB_ADDW_11(pix3,spix1,spix2);
705 p += ey2;
706 ARGB_READ(spix1, p);
707 ARGB_READ(spix2, p+ex2);
708 ARGB_ADDW_11(pix4,spix1,spix2);
709
710 ARGB_ADDW_11(pix,pix3,pix4);
711 ARGB_SHR(pix,pix,2);
712 ARGB_WRITE(pix,dst);
713 break;
714
715 case 11:
716 ARGB_READ(spix1, p);
717 ARGB_READ(spix2, p+ex2);
718 ARGB_ADDW_13(pix3,spix1,spix2);
719 p += ey2;
720 ARGB_READ(spix1, p);
721 ARGB_READ(spix2, p+ex2);
722 ARGB_ADDW_13(pix4,spix1,spix2);
723
724 ARGB_ADDW_11(pix,pix3,pix4);
725 ARGB_SHR(pix,pix,3);
726 ARGB_WRITE(pix,dst);
727 break;
728
729 /* last line */
730 case 12:
731 ARGB_READ(spix1, p);
732 ARGB_READ(spix2, p+ey2);
733 ARGB_ADDW_13(pix,spix1,spix2);
734 ARGB_SHR(pix,pix,2);
735 ARGB_WRITE(pix, dst);
736 break;
737
738 case 13:
739 ARGB_READ(spix1, p);
740 ARGB_READ(spix2, p+ex2);
741 ARGB_ADDW_31(pix3,spix1,spix2);
742 p += ey2;
743 ARGB_READ(spix1, p);
744 ARGB_READ(spix2, p+ex2);
745 ARGB_ADDW_31(pix4,spix1,spix2);
746
747 ARGB_ADDW_13(pix,pix3,pix4);
748 ARGB_SHR(pix,pix,4);
749 ARGB_WRITE(pix,dst);
750 break;
751
752 case 14:
753 ARGB_READ(spix1, p);
754 ARGB_READ(spix2, p+ex2);
755 ARGB_ADDW_11(pix3,spix1,spix2);
756 p += ey2;
757 ARGB_READ(spix1, p);
758 ARGB_READ(spix2, p+ex2);
759 ARGB_ADDW_11(pix4,spix1,spix2);
760
761 ARGB_ADDW_13(pix,pix3,pix4);
762 ARGB_SHR(pix,pix,3);
763 ARGB_WRITE(pix,dst);
764 break;
765
766 default:
767 ARGB_READ(spix1, p);
768 ARGB_READ(spix2, p+ex2);
769 ARGB_ADDW_13(pix3,spix1,spix2);
770 p += ey2;
771 ARGB_READ(spix1, p);
772 ARGB_READ(spix2, p+ex2);
773 ARGB_ADDW_13(pix4,spix1,spix2);
774
775 ARGB_ADDW_13(pix,pix3,pix4);
776 ARGB_SHR(pix,pix,4);
777 ARGB_WRITE(pix,dst);
778 }
779 sx += ix;
780 dst += 4;
781 }
782
783 sy += iy;
784 dst_line += dst_pitch;
785 }
786 ARGB_DONE;
787 }
788 #endif
789 #undef ARGB_SCALE_UP_QUICK_4x4
790
791
792 #ifdef ARGB_SCALE_NEAREST
793 /* this version scales up with nearest neighbours - looks crap */
794 static void
ARGB_SCALE_NEAREST(ScaleOp * op)795 ARGB_SCALE_NEAREST( ScaleOp* op )
796 {
797 int dst_pitch = op->dst_pitch;
798 int src_pitch = op->src_pitch;
799 uint8_t* dst_line = op->dst_line;
800 uint8_t* src_line = op->src_line;
801 int sx = op->sx;
802 int sy = op->sy;
803 int ix = op->ix;
804 int iy = op->iy;
805 int xlimit, ylimit;
806 int h, sx0;
807
808 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
809 /* the four nearest source pixels, which are at (0.5,0.5) offsets */
810
811 sx = sx + ix/2 - 32768;
812 sy = sy + iy/2 - 32768;
813
814 xlimit = (op->src_w-1);
815 ylimit = (op->src_h-1);
816
817 sx0 = sx;
818
819 for ( h = op->rd.h; h > 0; h-- ) {
820 uint8_t* dst = dst_line;
821 uint8_t* dst_end = dst + 4*op->rd.w;
822
823 sx = sx0;
824 for ( ; dst < dst_end; ) {
825 int ex1, ex2, ey1, ey2;
826 unsigned* p;
827
828 /* find the top-left neighbour */
829 ex1 = (sx >> 16);
830 ey1 = (sy >> 16);
831 ex2 = ex1+1;
832 ey2 = ey1+1;
833
834 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
835 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
836 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
837 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
838
839 p = (unsigned*)(src_line + ex1*4 + ey1*src_pitch);
840 if ((sx & 0xffff) >= 32768)
841 p += (ex2-ex1);
842 if ((sy & 0xffff) >= 32768)
843 p = (unsigned*)((char*)p + (ey2-ey1)*src_pitch);
844
845 *(unsigned*)dst = p[0];
846
847 sx += ix;
848 dst += 4;
849 }
850
851 sy += iy;
852 dst_line += dst_pitch;
853 }
854 }
855 #endif
856 #undef ARGB_SCALE_NEAREST
857