1 /* Copyright (C) 2007-2008 The Android Open Source Project
2 **
3 ** This software is licensed under the terms of the GNU General Public
4 ** License version 2, as published by the Free Software Foundation, and
5 ** may be copied, distributed, and modified under those terms.
6 **
7 ** This program is distributed in the hope that it will be useful,
8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 ** GNU General Public License for more details.
11 */
12 /* this file contains template code and may be included multiple times */
13
14 #ifndef ARGB_T_DEFINED
15 #define ARGB_T_DEFINED
16
17 #if USE_MMX
18 #include <mmintrin.h>
19
20 typedef __m64 mmx_t;
21 typedef mmx_t argb_t;
22
23 static inline mmx_t
mmx_load8888(unsigned value,mmx_t zero)24 mmx_load8888( unsigned value, mmx_t zero )
25 {
26 return _mm_unpacklo_pi8( _mm_cvtsi32_si64 (value), zero);
27 }
28
29 static inline unsigned
mmx_save8888(mmx_t argb,mmx_t zero)30 mmx_save8888( mmx_t argb, mmx_t zero )
31 {
32 return (unsigned) _mm_cvtsi64_si32( _mm_packs_pu16( argb, zero ) );
33 }
34
35 static inline mmx_t
mmx_expand16(int value)36 mmx_expand16( int value )
37 {
38 mmx_t t1 = _mm_cvtsi32_si64( value );
39 return _mm_packs_pi32( t1, t1 );
40 }
41
42 static inline mmx_t
mmx_mulshift(mmx_t argb,int multiplier,int rshift,mmx_t zero)43 mmx_mulshift( mmx_t argb, int multiplier, int rshift, mmx_t zero )
44 {
45 mmx_t ar = _mm_unpackhi_pi16(argb, zero );
46 mmx_t gb = _mm_unpacklo_pi16(argb, zero );
47 mmx_t mult = mmx_expand16(multiplier);
48
49 ar = _mm_srli_pi32( _mm_madd_pi16( ar, mult ), rshift );
50 gb = _mm_srli_pi32( _mm_madd_pi16( gb, mult ), rshift );
51
52 return _mm_packs_pi32( gb, ar );
53 }
54
55 static inline mmx_t
mmx_interp255(mmx_t m1,mmx_t m2,mmx_t zero,int alpha)56 mmx_interp255( mmx_t m1, mmx_t m2, mmx_t zero, int alpha )
57 {
58 mmx_t mult, mult2, t1, t2, r1, r2;
59
60 // m1 = [ a1 | r1 | g1 | b1 ]
61 // m2 = [ a2 | r2 | g2 | b2 ]
62 alpha = (alpha << 16) | (alpha ^ 255);
63 mult = _mm_cvtsi32_si64( alpha ); // mult = [ 0 | 0 | a | 1-a ]
64 mult2 = _mm_slli_si64( mult, 32 ); // mult2 = [ a | 1-a | 0 | 0 ]
65 mult = _mm_or_si64( mult, mult2 ); // mults = [ a | 1-a | a | 1-a ]
66
67 t1 = _mm_unpackhi_pi16( m1, m2 ); // t1 = [ a2 | a1 | r2 | r1 ]
68 r1 = _mm_madd_pi16( t1, mult ); // r1 = [ ra | rr ]
69
70 t2 = _mm_unpacklo_pi16( m1, m2 ); // t1 = [ g2 | g1 | b2 | b1 ]
71 r2 = _mm_madd_pi16( t2, mult ); // r2 = [ rg | rb ]
72
73 r1 = _mm_srli_pi32( r1, 8 );
74 r2 = _mm_srli_pi32( r2, 8 );
75
76 return _mm_packs_pi32( r2, r1 );
77 }
78
79 #define ARGB_DECL_ZERO() mmx_t _zero = _mm_setzero_si64()
80 #define ARGB_DECL(x) mmx_t x
81 #define ARGB_DECL2(x1,x2) mmx_t x1, x2
82 #define ARGB_ZERO(x) x = _zero
83 #define ARGB_UNPACK(x,v) x = mmx_load8888((v), _zero)
84 #define ARGB_PACK(x) mmx_save8888(x, _zero)
85 #define ARGB_COPY(x,y) x = y
86 #define ARGB_SUM(x1,x2,x3) x1 = _mm_add_pi32(x2, x3)
87 #define ARGB_REDUCE(x,red) \
88 ({ \
89 int _red = (red) >> 8; \
90 if (_red < 256) \
91 x = mmx_mulshift( x, _red, 8, _zero ); \
92 })
93
94 #define ARGB_INTERP255(x1,x2,x3,alpha) \
95 x1 = mmx_interp255( x2, x3, _zero, (alpha))
96
97 #define ARGB_ADDW_11(x1,x2,x3) \
98 ARGB_SUM(x1,x2,x3)
99
100 #define ARGB_ADDW_31(x1,x2,x3) \
101 ({ \
102 mmx_t _t1 = _mm_add_pi16(x2, x3); \
103 mmx_t _t2 = _mm_slli_pi16(x2, 1); \
104 x1 = _mm_add_pi16(_t1, _t2); \
105 })
106
107 #define ARGB_ADDW_13(x1,x2,x3) \
108 ({ \
109 mmx_t _t1 = _mm_add_pi16(x2, x3); \
110 mmx_t _t2 = _mm_slli_pi16(x3, 1); \
111 x1 = _mm_add_pi16(_t1, _t2); \
112 })
113
114 #define ARGB_SHR(x1,x2,s) \
115 x1 = _mm_srli_pi16(x2, s)
116
117
118 #define ARGB_MULSHIFT(x1,x2,v,s) \
119 x1 = mmx_mulshift(x2, v, s, _zero)
120
121 #define ARGB_DONE _mm_empty()
122
123 #define ARGB_RESCALE_SHIFT 10
124 #define ARGB_DECL_SCALE(s2,s) int s2 = (int)((s)*(s)*(1 << ARGB_RESCALE_SHIFT))
125 #define ARGB_RESCALE(x,s2) x = mmx_mulshift( x, s2, ARGB_RESCALE_SHIFT, _zero )
126
127 #else /* !USE_MMX */
128
129 typedef uint32_t argb_t;
130
131 #define ARGB_DECL_ZERO() /* nothing */
132 #define ARGB_DECL(x) argb_t x##_ag, x##_rb
133 #define ARGB_DECL2(x1,x2) argb_t x1##_ag, x1##_rb, x2##_ag, x2##_rb
134 #define ARGB_ZERO(x) (x##_ag = x##_rb = 0)
135 #define ARGB_COPY(x,y) (x##_ag = y##_ag, x##_rb = y##_rb)
136
137 #define ARGB_UNPACK(x,v) \
138 ({ \
139 argb_t _v = (argb_t)(v); \
140 x##_ag = (_v >> 8) & 0xff00ff; \
141 x##_rb = (_v) & 0xff00ff; \
142 })
143
144 #define ARGB_PACK(x) (uint32_t)(((x##_ag) << 8) | x##_rb)
145
146 #define ARGB_SUM(x1,x2,x3) \
147 ({ \
148 x1##_ag = x2##_ag + x3##_ag; \
149 x1##_rb = x2##_rb + x3##_rb; \
150 })
151
152 #define ARGB_REDUCE(x,red) \
153 ({ \
154 int _red = (red) >> 8; \
155 if (_red < 256) { \
156 x##_ag = ((x##_ag*_red) >> 8) & 0xff00ff; \
157 x##_rb = ((x##_rb*_red) >> 8) & 0xff00ff; \
158 } \
159 })
160
161 #define ARGB_INTERP255(x1,x2,x3,alpha) \
162 ({ \
163 int _alpha = (alpha); \
164 int _ialpha; \
165 _alpha += _alpha >> 8; \
166 _ialpha = 256 - _alpha; \
167 x1##_ag = ((x2##_ag*_ialpha + x3##_ag*_alpha) >> 8) & 0xff00ff; \
168 x1##_rb = ((x2##_rb*_ialpha + x3##_rb*_alpha) >> 8) & 0xff00ff; \
169 })
170
171 #define ARGB_ADDW_11(x1,x2,x3) \
172 ({ \
173 x1##_ag = (x2##_ag + x3##_ag); \
174 x1##_rb = (x2##_rb + x3##_rb); \
175 })
176
177 #define ARGB_ADDW_31(x1,x2,x3) \
178 ({ \
179 x1##_ag = (3*x2##_ag + x3##_ag); \
180 x1##_rb = (3*x2##_rb + x3##_rb); \
181 })
182
183 #define ARGB_ADDW_13(x1,x2,x3) \
184 ({ \
185 x1##_ag = (x2##_ag + 3*x3##_ag); \
186 x1##_rb = (x2##_rb + 3*x3##_rb); \
187 })
188
189 #define ARGB_MULSHIFT(x1,x2,v,s) \
190 ({ \
191 unsigned _vv = (v); \
192 x1##_ag = ((x2##_ag * _vv) >> (s)) & 0xff00ff; \
193 x1##_rb = ((x2##_rb * _vv) >> (s)) & 0xff00ff; \
194 })
195
196 #define ARGB_SHR(x1,x2,s) \
197 ({ \
198 int _s = (s); \
199 x1##_ag = (x2##_ag >> _s) & 0xff00ff; \
200 x1##_rb = (x2##_rb >> _s) & 0xff00ff; \
201 })
202
203 #define ARGB_DONE ((void)0)
204
205 #define ARGB_RESCALE_SHIFT 8
206 #define ARGB_DECL_SCALE(s2,s) int s2 = (int)((s)*(s)*(1 << ARGB_RESCALE_SHIFT))
207 #define ARGB_RESCALE(x,scale2) ARGB_MULSHIFT(x,x,scale2,ARGB_RESCALE_SHIFT)
208
209 #endif /* !USE_MMX */
210
211 #define ARGB_ADD(x1,x2) ARGB_SUM(x1,x1,x2)
212 #define ARGB_READ(x,p) ARGB_UNPACK(x,*(uint32_t*)(p))
213 #define ARGB_WRITE(x,p) *(uint32_t*)(p) = ARGB_PACK(x)
214
215 #endif /* !ARGB_T_DEFINED */
216
217
218
219 #ifdef ARGB_SCALE_GENERIC
220 static void
ARGB_SCALE_GENERIC(ScaleOp * op)221 ARGB_SCALE_GENERIC( ScaleOp* op )
222 {
223 int dst_pitch = op->dst_pitch;
224 int src_pitch = op->src_pitch;
225 uint8_t* dst_line = op->dst_line;
226 uint8_t* src_line = op->src_line;
227 ARGB_DECL_SCALE(scale2, op->scale);
228 int h;
229 int sx = op->sx;
230 int sy = op->sy;
231 int ix = op->ix;
232 int iy = op->iy;
233
234 _mm_empty();
235
236 src_line += (sx >> 16)*4 + (sy >> 16)*src_pitch;
237 sx &= 0xffff;
238 sy &= 0xffff;
239
240 for ( h = op->rd.h; h > 0; h-- ) {
241 uint8_t* dst = dst_line;
242 uint8_t* src = src_line;
243 uint8_t* dst_end = dst + 4*op->rd.w;
244 int sx1 = sx;
245 int sy1 = sy;
246
247 for ( ; dst < dst_end; ) {
248 int sx2 = sx1 + ix;
249 int sy2 = sy1 + iy;
250
251 ARGB_DECL_ZERO();
252 ARGB_DECL(spix);
253 ARGB_DECL(pix);
254 ARGB_ZERO(pix);
255
256 /* the current destination pixel maps to the (sx1,sy1)-(sx2,sy2)
257 * source square, we're going to compute the sum of its pixels'
258 * colors... simple box filtering
259 */
260 {
261 int gsy, gsx;
262 for ( gsy = 0; gsy < sy2; gsy += 65536 ) {
263 for ( gsx = 0; gsx < sx2; gsx += 65536 ) {
264 uint8_t* s = src + (gsx >> 16)*4 + (gsy >> 16)*src_pitch;
265 int xmin = gsx, xmax = gsx + 65536, ymin = gsy, ymax = gsy + 65536;
266 unsigned ww, hh;
267 unsigned red;
268
269 if (xmin < sx1) xmin = sx1;
270 if (xmax > sx2) xmax = sx2;
271 if (ymin < sy1) ymin = sy1;
272 if (ymax > sy2) ymax = sy2;
273
274 ww = (unsigned)(xmax-xmin);
275 red = ww;
276
277 hh = (unsigned)(ymax-ymin);
278 red = (hh < 65536) ? (red*hh >> 16U) : red;
279
280 ARGB_READ(spix,s);
281 ARGB_REDUCE(spix,red);
282 ARGB_ADD(pix,spix);
283 }
284 }
285 }
286
287 ARGB_RESCALE(pix,scale2);
288 ARGB_WRITE(pix,dst);
289
290 sx1 = sx2;
291 src += (sx1 >> 16)*4;
292 sx1 &= 0xffff;
293 dst += 4;
294 }
295
296 sy += iy;
297 src_line += (sy >> 16)*src_pitch;
298 sy &= 0xffff;
299
300 dst_line += dst_pitch;
301 }
302 ARGB_DONE;
303 }
304 #endif
305 #undef ARGB_SCALE_GENERIC
306
307
308 #ifdef ARGB_SCALE_05_TO_10
cross(int x,int y)309 static inline int cross( int x, int y ) {
310 if (x == 65536 && y == 65536)
311 return 65536;
312
313 return (int)((unsigned)x * (unsigned)y >> 16U);
314 }
315
316 static void
scale_05_to_10(ScaleOp * op)317 scale_05_to_10( ScaleOp* op )
318 {
319 int dst_pitch = op->dst_pitch;
320 int src_pitch = op->src_pitch;
321 uint8_t* dst_line = op->dst_line;
322 uint8_t* src_line = op->src_line;
323 ARGB_DECL_SCALE(scale2, op->scale);
324 int h;
325 int sx = op->sx;
326 int sy = op->sy;
327 int ix = op->ix;
328 int iy = op->iy;
329
330 _mm_empty();
331
332 src_line += (sx >> 16)*4 + (sy >> 16)*src_pitch;
333 sx &= 0xffff;
334 sy &= 0xffff;
335
336 for ( h = op->rd.h; h > 0; h-- ) {
337 uint8_t* dst = dst_line;
338 uint8_t* src = src_line;
339 uint8_t* dst_end = dst + 4*op->rd.w;
340 int sx1 = sx;
341 int sy1 = sy;
342
343 for ( ; dst < dst_end; ) {
344 int sx2 = sx1 + ix;
345 int sy2 = sy1 + iy;
346
347 ARGB_DECL_ZERO();
348 ARGB_DECL2(spix, pix);
349
350 int off = src_pitch;
351 int fx1 = sx1 & 0xffff;
352 int fx2 = sx2 & 0xffff;
353 int fy1 = sy1 & 0xffff;
354 int fy2 = sy2 & 0xffff;
355
356 int center_x = ((sx1 >> 16) + 1) < ((sx2-1) >> 16);
357 int center_y = ((sy1 >> 16) + 1) < ((sy2-1) >> 16);
358
359 ARGB_ZERO(pix);
360
361 if (fx2 == 0) {
362 fx2 = 65536;
363 }
364 if (fy2 == 0) {
365 fy2 = 65536;
366 }
367 fx1 = 65536 - fx1;
368 fy1 = 65536 - fy1;
369
370 /** TOP BAND
371 **/
372
373 /* top-left pixel */
374 ARGB_READ(spix,src);
375 ARGB_REDUCE(spix,cross(fx1,fy1));
376 ARGB_ADD(pix,spix);
377
378 /* top-center pixel, if any */
379 ARGB_READ(spix,src + 4);
380 if (center_x) {
381 ARGB_REDUCE(spix,fy1);
382 ARGB_ADD(pix,spix);
383 ARGB_READ(spix,src + 8);
384 }
385
386 /* top-right pixel */
387 ARGB_REDUCE(spix,cross(fx2,fy1));
388 ARGB_ADD(pix,spix);
389
390 /** MIDDLE BAND, IF ANY
391 **/
392 if (center_y) {
393 /* left-middle pixel */
394 ARGB_READ(spix,src + off);
395 ARGB_REDUCE(spix,fx1);
396 ARGB_ADD(pix,spix);
397
398 /* center pixel, if any */
399 ARGB_READ(spix,src + off + 4);
400 if (center_x) {
401 ARGB_ADD(pix,spix);
402 ARGB_READ(spix,src + off + 8);
403 }
404
405 /* right-middle pixel */
406 ARGB_REDUCE(spix,fx2);
407 ARGB_ADD(pix,spix);
408
409 off += src_pitch;
410 }
411
412 /** BOTTOM BAND
413 **/
414 /* left-bottom pixel */
415 ARGB_READ(spix,src + off);
416 ARGB_REDUCE(spix,cross(fx1,fy2));
417 ARGB_ADD(pix,spix);
418
419 /* center-bottom, if any */
420 ARGB_READ(spix,src + off + 4);
421 if (center_x) {
422 ARGB_REDUCE(spix,fy2);
423 ARGB_ADD(pix,spix);
424 ARGB_READ(spix,src + off + 8);
425 }
426
427 /* right-bottom pixel */
428 ARGB_REDUCE(spix,cross(fx2,fy2));
429 ARGB_ADD(pix,spix);
430
431 /** WRITE IT
432 **/
433 ARGB_RESCALE(pix,scale2);
434 ARGB_WRITE(pix,dst);
435
436 sx1 = sx2;
437 src += (sx1 >> 16)*4;
438 sx1 &= 0xffff;
439 dst += 4;
440 }
441
442 sy += iy;
443 src_line += (sy >> 16)*src_pitch;
444 sy &= 0xffff;
445
446 dst_line += dst_pitch;
447 }
448 ARGB_DONE;
449 }
450 #endif
451 #undef ARGB_SCALE_05_TO_10
452
453
454 #ifdef ARGB_SCALE_UP_BILINEAR
455 static void
scale_up_bilinear(ScaleOp * op)456 scale_up_bilinear( ScaleOp* op )
457 {
458 int dst_pitch = op->dst_pitch;
459 int src_pitch = op->src_pitch;
460 uint8_t* dst_line = op->dst_line;
461 uint8_t* src_line = op->src_line;
462 int sx = op->sx;
463 int sy = op->sy;
464 int ix = op->ix;
465 int iy = op->iy;
466 int xlimit, ylimit;
467 int h, sx0;
468
469 _mm_empty();
470
471 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
472 /* the four nearest source pixels, which are at (0.5,0.5) offsets */
473
474 sx = sx + ix/2 - 32768;
475 sy = sy + iy/2 - 32768;
476
477 xlimit = (op->src_w-1);
478 ylimit = (op->src_h-1);
479
480 sx0 = sx;
481
482 for ( h = op->rd.h; h > 0; h-- ) {
483 uint8_t* dst = dst_line;
484 uint8_t* dst_end = dst + 4*op->rd.w;
485
486 sx = sx0;
487 for ( ; dst < dst_end; ) {
488 int ex1, ex2, ey1, ey2, alpha;
489 uint8_t* s;
490
491 ARGB_DECL_ZERO();
492 ARGB_DECL2(spix1,spix2);
493 ARGB_DECL2(pix3,pix4);
494 ARGB_DECL(pix);
495
496 /* find the four neighbours */
497 ex1 = (sx >> 16);
498 ey1 = (sy >> 16);
499 ex2 = (sx+65535) >> 16;
500 ey2 = (sy+65535) >> 16;
501
502 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
503 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
504 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
505 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
506
507 ex2 = (ex2-ex1)*4;
508 ey2 = (ey2-ey1)*src_pitch;
509
510 /* interpolate */
511 s = src_line + ex1*4 + ey1*src_pitch;
512 ARGB_READ(spix1, s);
513 ARGB_READ(spix2, s+ex2);
514
515 alpha = (sx >> 8) & 0xff;
516 ARGB_INTERP255(pix3,spix1,spix2,alpha);
517
518 s += ey2;
519 ARGB_READ(spix1, s);
520 ARGB_READ(spix2, s+ex2);
521
522 ARGB_INTERP255(pix4,spix1,spix2,alpha);
523
524 alpha = (sy >> 8) & 0xff;
525 ARGB_INTERP255(pix,pix3,pix4,alpha);
526
527 ARGB_WRITE(pix,dst);
528
529 sx += ix;
530 dst += 4;
531 }
532
533 sy += iy;
534 dst_line += dst_pitch;
535 }
536 ARGB_DONE;
537 }
538 #endif
539 #undef ARGB_SCALE_UP_BILINEAR
540
541 #ifdef ARGB_SCALE_UP_QUICK_4x4
542 static void
ARGB_SCALE_UP_QUICK_4x4(ScaleOp * op)543 ARGB_SCALE_UP_QUICK_4x4( ScaleOp* op )
544 {
545 int dst_pitch = op->dst_pitch;
546 int src_pitch = op->src_pitch;
547 uint8_t* dst_line = op->dst_line;
548 uint8_t* src_line = op->src_line;
549 int sx = op->sx;
550 int sy = op->sy;
551 int ix = op->ix;
552 int iy = op->iy;
553 int xlimit, ylimit;
554 int h, sx0;
555
556 _mm_empty();
557
558 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
559 /* the four nearest source pixels, which are at (0.5,0.5) offsets */
560
561 sx = sx + ix/2 - 32768;
562 sy = sy + iy/2 - 32768;
563
564 xlimit = (op->src_w-1);
565 ylimit = (op->src_h-1);
566
567 sx0 = sx;
568
569 for ( h = op->rd.h; h > 0; h-- ) {
570 uint8_t* dst = dst_line;
571 uint8_t* dst_end = dst + 4*op->rd.w;
572
573 sx = sx0;
574 for ( ; dst < dst_end; ) {
575 int ex1, ex2, ey1, ey2;
576 uint8_t* p;
577 ARGB_DECL_ZERO();
578 ARGB_DECL(pix);
579 ARGB_DECL2(spix1, spix2);
580 ARGB_DECL2(pix3, pix4);
581
582 /* find the four neighbours */
583 ex1 = (sx >> 16);
584 ey1 = (sy >> 16);
585 ex2 = (sx+65535) >> 16;
586 ey2 = (sy+65535) >> 16;
587
588 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
589 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
590 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
591 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
592
593 /* interpolate */
594 p = (src_line + ex1*4 + ey1*src_pitch);
595
596 ex2 = (ex2-ex1)*4;
597 ey2 = (ey2-ey1)*src_pitch;
598
599 switch (((sx >> 14) & 3) | ((sy >> 12) & 12)) {
600 case 0:
601 *(uint32_t*)dst = *(uint32_t*)p;
602 break;
603
604 /* top-line is easy */
605 case 1:
606 ARGB_READ(spix1, p);
607 ARGB_READ(spix2, p+ex2);
608 ARGB_ADDW_31(pix,spix1,spix2);
609 ARGB_SHR(pix,pix,2);
610 ARGB_WRITE(pix, dst);
611 break;
612
613 case 2:
614 ARGB_READ(spix1, p);
615 ARGB_READ(spix2, p+ex2);
616 ARGB_ADDW_11(pix, spix1, spix2);
617 ARGB_SHR(pix,pix,1);
618 ARGB_WRITE(pix, dst);
619 break;
620
621 case 3:
622 ARGB_READ(spix1, p);
623 ARGB_READ(spix2, p+ex2);
624 ARGB_ADDW_13(pix,spix1,spix2);
625 ARGB_SHR(pix,pix,2);
626 ARGB_WRITE(pix, dst);
627 break;
628
629 /* second line is harder */
630 case 4:
631 ARGB_READ(spix1, p);
632 ARGB_READ(spix2, p+ey2);
633 ARGB_ADDW_31(pix,spix1,spix2);
634 ARGB_SHR(pix,pix,2);
635 ARGB_WRITE(pix, dst);
636 break;
637
638 case 5:
639 ARGB_READ(spix1, p);
640 ARGB_READ(spix2, p+ex2);
641 ARGB_ADDW_31(pix3,spix1,spix2);
642 p += ey2;
643 ARGB_READ(spix1, p);
644 ARGB_READ(spix2, p+ex2);
645 ARGB_ADDW_31(pix4,spix1,spix2);
646
647 ARGB_ADDW_31(pix,pix3,pix4);
648 ARGB_SHR(pix,pix,4);
649 ARGB_WRITE(pix,dst);
650 break;
651
652 case 6:
653 ARGB_READ(spix1, p);
654 ARGB_READ(spix2, p+ex2);
655 ARGB_ADDW_11(pix3,spix1,spix2);
656 p += ey2;
657 ARGB_READ(spix1, p);
658 ARGB_READ(spix2, p+ex2);
659 ARGB_ADDW_11(pix4,spix1,spix2);
660
661 ARGB_ADDW_31(pix,pix3,pix4);
662 ARGB_SHR(pix,pix,3);
663 ARGB_WRITE(pix,dst);
664 break;
665
666 case 7:
667 ARGB_READ(spix1, p);
668 ARGB_READ(spix2, p+ex2);
669 ARGB_ADDW_13(pix3,spix1,spix2);
670 p += ey2;
671 ARGB_READ(spix1, p);
672 ARGB_READ(spix2, p+ex2);
673 ARGB_ADDW_13(pix4,spix1,spix2);
674
675 ARGB_ADDW_31(pix,pix3,pix4);
676 ARGB_SHR(pix,pix,4);
677 ARGB_WRITE(pix,dst);
678 break;
679
680 /* third line */
681 case 8:
682 ARGB_READ(spix1, p);
683 ARGB_READ(spix2, p+ey2);
684 ARGB_ADDW_11(pix,spix1,spix2);
685 ARGB_SHR(pix,pix,1);
686 ARGB_WRITE(pix, dst);
687 break;
688
689 case 9:
690 ARGB_READ(spix1, p);
691 ARGB_READ(spix2, p+ex2);
692 ARGB_ADDW_31(pix3,spix1,spix2);
693 p += ey2;
694 ARGB_READ(spix1, p);
695 ARGB_READ(spix2, p+ex2);
696 ARGB_ADDW_31(pix4,spix1,spix2);
697
698 ARGB_ADDW_11(pix,pix3,pix4);
699 ARGB_SHR(pix,pix,3);
700 ARGB_WRITE(pix,dst);
701 break;
702
703 case 10:
704 ARGB_READ(spix1, p);
705 ARGB_READ(spix2, p+ex2);
706 ARGB_ADDW_11(pix3,spix1,spix2);
707 p += ey2;
708 ARGB_READ(spix1, p);
709 ARGB_READ(spix2, p+ex2);
710 ARGB_ADDW_11(pix4,spix1,spix2);
711
712 ARGB_ADDW_11(pix,pix3,pix4);
713 ARGB_SHR(pix,pix,2);
714 ARGB_WRITE(pix,dst);
715 break;
716
717 case 11:
718 ARGB_READ(spix1, p);
719 ARGB_READ(spix2, p+ex2);
720 ARGB_ADDW_13(pix3,spix1,spix2);
721 p += ey2;
722 ARGB_READ(spix1, p);
723 ARGB_READ(spix2, p+ex2);
724 ARGB_ADDW_13(pix4,spix1,spix2);
725
726 ARGB_ADDW_11(pix,pix3,pix4);
727 ARGB_SHR(pix,pix,3);
728 ARGB_WRITE(pix,dst);
729 break;
730
731 /* last line */
732 case 12:
733 ARGB_READ(spix1, p);
734 ARGB_READ(spix2, p+ey2);
735 ARGB_ADDW_13(pix,spix1,spix2);
736 ARGB_SHR(pix,pix,2);
737 ARGB_WRITE(pix, dst);
738 break;
739
740 case 13:
741 ARGB_READ(spix1, p);
742 ARGB_READ(spix2, p+ex2);
743 ARGB_ADDW_31(pix3,spix1,spix2);
744 p += ey2;
745 ARGB_READ(spix1, p);
746 ARGB_READ(spix2, p+ex2);
747 ARGB_ADDW_31(pix4,spix1,spix2);
748
749 ARGB_ADDW_13(pix,pix3,pix4);
750 ARGB_SHR(pix,pix,4);
751 ARGB_WRITE(pix,dst);
752 break;
753
754 case 14:
755 ARGB_READ(spix1, p);
756 ARGB_READ(spix2, p+ex2);
757 ARGB_ADDW_11(pix3,spix1,spix2);
758 p += ey2;
759 ARGB_READ(spix1, p);
760 ARGB_READ(spix2, p+ex2);
761 ARGB_ADDW_11(pix4,spix1,spix2);
762
763 ARGB_ADDW_13(pix,pix3,pix4);
764 ARGB_SHR(pix,pix,3);
765 ARGB_WRITE(pix,dst);
766 break;
767
768 default:
769 ARGB_READ(spix1, p);
770 ARGB_READ(spix2, p+ex2);
771 ARGB_ADDW_13(pix3,spix1,spix2);
772 p += ey2;
773 ARGB_READ(spix1, p);
774 ARGB_READ(spix2, p+ex2);
775 ARGB_ADDW_13(pix4,spix1,spix2);
776
777 ARGB_ADDW_13(pix,pix3,pix4);
778 ARGB_SHR(pix,pix,4);
779 ARGB_WRITE(pix,dst);
780 }
781 sx += ix;
782 dst += 4;
783 }
784
785 sy += iy;
786 dst_line += dst_pitch;
787 }
788 ARGB_DONE;
789 }
790 #endif
791 #undef ARGB_SCALE_UP_QUICK_4x4
792
793
794 #ifdef ARGB_SCALE_NEAREST
795 /* this version scales up with nearest neighbours - looks crap */
796 static void
ARGB_SCALE_NEAREST(ScaleOp * op)797 ARGB_SCALE_NEAREST( ScaleOp* op )
798 {
799 int dst_pitch = op->dst_pitch;
800 int src_pitch = op->src_pitch;
801 uint8_t* dst_line = op->dst_line;
802 uint8_t* src_line = op->src_line;
803 int sx = op->sx;
804 int sy = op->sy;
805 int ix = op->ix;
806 int iy = op->iy;
807 int xlimit, ylimit;
808 int h, sx0;
809
810 _mm_empty();
811
812 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
813 /* the four nearest source pixels, which are at (0.5,0.5) offsets */
814
815 sx = sx + ix/2 - 32768;
816 sy = sy + iy/2 - 32768;
817
818 xlimit = (op->src_w-1);
819 ylimit = (op->src_h-1);
820
821 sx0 = sx;
822
823 for ( h = op->rd.h; h > 0; h-- ) {
824 uint8_t* dst = dst_line;
825 uint8_t* dst_end = dst + 4*op->rd.w;
826
827 sx = sx0;
828 for ( ; dst < dst_end; ) {
829 int ex1, ex2, ey1, ey2;
830 unsigned* p;
831
832 /* find the top-left neighbour */
833 ex1 = (sx >> 16);
834 ey1 = (sy >> 16);
835 ex2 = ex1+1;
836 ey2 = ey1+1;
837
838 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
839 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
840 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
841 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
842
843 p = (unsigned*)(src_line + ex1*4 + ey1*src_pitch);
844 if ((sx & 0xffff) >= 32768)
845 p += (ex2-ex1);
846 if ((sy & 0xffff) >= 32768)
847 p = (unsigned*)((char*)p + (ey2-ey1)*src_pitch);
848
849 *(unsigned*)dst = p[0];
850
851 sx += ix;
852 dst += 4;
853 }
854
855 sy += iy;
856 dst_line += dst_pitch;
857 }
858 }
859 #endif
860 #undef ARGB_SCALE_NEAREST
861