1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3 * Copyright © 2000 SuSE, Inc.
4 * Copyright © 2007 Red Hat, Inc.
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of SuSE not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. SuSE makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
15 *
16 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 *
23 * Author: Keith Packard, SuSE, Inc.
24 */
25
26 #ifndef PIXMAN_FAST_PATH_H__
27 #define PIXMAN_FAST_PATH_H__
28
29 #include "pixman-private.h"
30
31 #define PIXMAN_REPEAT_COVER -1
32
33 /* Flags describing input parameters to fast path macro template.
34 * Turning on some flag values may indicate that
35 * "some property X is available so template can use this" or
36 * "some property X should be handled by template".
37 *
38 * FLAG_HAVE_SOLID_MASK
39 * Input mask is solid so template should handle this.
40 *
41 * FLAG_HAVE_NON_SOLID_MASK
42 * Input mask is bits mask so template should handle this.
43 *
44 * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
45 * exclusive. (It's not allowed to turn both flags on)
46 */
47 #define FLAG_NONE (0)
48 #define FLAG_HAVE_SOLID_MASK (1 << 1)
49 #define FLAG_HAVE_NON_SOLID_MASK (1 << 2)
50
51 /* To avoid too short repeated scanline function calls, extend source
52 * scanlines having width less than below constant value.
53 */
54 #define REPEAT_NORMAL_MIN_WIDTH 64
55
56 static force_inline pixman_bool_t
repeat(pixman_repeat_t repeat,int * c,int size)57 repeat (pixman_repeat_t repeat, int *c, int size)
58 {
59 if (repeat == PIXMAN_REPEAT_NONE)
60 {
61 if (*c < 0 || *c >= size)
62 return FALSE;
63 }
64 else if (repeat == PIXMAN_REPEAT_NORMAL)
65 {
66 while (*c >= size)
67 *c -= size;
68 while (*c < 0)
69 *c += size;
70 }
71 else if (repeat == PIXMAN_REPEAT_PAD)
72 {
73 *c = CLIP (*c, 0, size - 1);
74 }
75 else /* REFLECT */
76 {
77 *c = MOD (*c, size * 2);
78 if (*c >= size)
79 *c = size * 2 - *c - 1;
80 }
81 return TRUE;
82 }
83
84 static force_inline int
pixman_fixed_to_bilinear_weight(pixman_fixed_t x)85 pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
86 {
87 return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
88 ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
89 }
90
91 #if BILINEAR_INTERPOLATION_BITS <= 4
92 /* Inspired by Filter_32_opaque from Skia */
93 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)94 bilinear_interpolation (uint32_t tl, uint32_t tr,
95 uint32_t bl, uint32_t br,
96 int distx, int disty)
97 {
98 int distxy, distxiy, distixy, distixiy;
99 uint32_t lo, hi;
100
101 distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
102 disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
103
104 distxy = distx * disty;
105 distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */
106 distixy = (disty << 4) - distxy; /* disty * (16 - distx) */
107 distixiy =
108 16 * 16 - (disty << 4) -
109 (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
110
111 lo = (tl & 0xff00ff) * distixiy;
112 hi = ((tl >> 8) & 0xff00ff) * distixiy;
113
114 lo += (tr & 0xff00ff) * distxiy;
115 hi += ((tr >> 8) & 0xff00ff) * distxiy;
116
117 lo += (bl & 0xff00ff) * distixy;
118 hi += ((bl >> 8) & 0xff00ff) * distixy;
119
120 lo += (br & 0xff00ff) * distxy;
121 hi += ((br >> 8) & 0xff00ff) * distxy;
122
123 return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
124 }
125
126 #else
127 #if SIZEOF_LONG > 4
128
129 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)130 bilinear_interpolation (uint32_t tl, uint32_t tr,
131 uint32_t bl, uint32_t br,
132 int distx, int disty)
133 {
134 uint64_t distxy, distxiy, distixy, distixiy;
135 uint64_t tl64, tr64, bl64, br64;
136 uint64_t f, r;
137
138 distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
139 disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
140
141 distxy = distx * disty;
142 distxiy = distx * (256 - disty);
143 distixy = (256 - distx) * disty;
144 distixiy = (256 - distx) * (256 - disty);
145
146 /* Alpha and Blue */
147 tl64 = tl & 0xff0000ff;
148 tr64 = tr & 0xff0000ff;
149 bl64 = bl & 0xff0000ff;
150 br64 = br & 0xff0000ff;
151
152 f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
153 r = f & 0x0000ff0000ff0000ull;
154
155 /* Red and Green */
156 tl64 = tl;
157 tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
158
159 tr64 = tr;
160 tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
161
162 bl64 = bl;
163 bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
164
165 br64 = br;
166 br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
167
168 f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
169 r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
170
171 return (uint32_t)(r >> 16);
172 }
173
174 #else
175
176 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)177 bilinear_interpolation (uint32_t tl, uint32_t tr,
178 uint32_t bl, uint32_t br,
179 int distx, int disty)
180 {
181 int distxy, distxiy, distixy, distixiy;
182 uint32_t f, r;
183
184 distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
185 disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
186
187 distxy = distx * disty;
188 distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */
189 distixy = (disty << 8) - distxy; /* disty * (256 - distx) */
190 distixiy =
191 256 * 256 - (disty << 8) -
192 (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */
193
194 /* Blue */
195 r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
196 + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
197
198 /* Green */
199 f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
200 + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
201 r |= f & 0xff000000;
202
203 tl >>= 16;
204 tr >>= 16;
205 bl >>= 16;
206 br >>= 16;
207 r >>= 16;
208
209 /* Red */
210 f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
211 + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
212 r |= f & 0x00ff0000;
213
214 /* Alpha */
215 f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
216 + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
217 r |= f & 0xff000000;
218
219 return r;
220 }
221
222 #endif
223 #endif // BILINEAR_INTERPOLATION_BITS <= 4
224
225 /*
226 * For each scanline fetched from source image with PAD repeat:
227 * - calculate how many pixels need to be padded on the left side
228 * - calculate how many pixels need to be padded on the right side
229 * - update width to only count pixels which are fetched from the image
230 * All this information is returned via 'width', 'left_pad', 'right_pad'
231 * arguments. The code is assuming that 'unit_x' is positive.
232 *
233 * Note: 64-bit math is used in order to avoid potential overflows, which
234 * is probably excessive in many cases. This particular function
235 * may need its own correctness test and performance tuning.
236 */
237 static force_inline void
pad_repeat_get_scanline_bounds(int32_t source_image_width,pixman_fixed_t vx,pixman_fixed_t unit_x,int32_t * width,int32_t * left_pad,int32_t * right_pad)238 pad_repeat_get_scanline_bounds (int32_t source_image_width,
239 pixman_fixed_t vx,
240 pixman_fixed_t unit_x,
241 int32_t * width,
242 int32_t * left_pad,
243 int32_t * right_pad)
244 {
245 int64_t max_vx = (int64_t) source_image_width << 16;
246 int64_t tmp;
247 if (vx < 0)
248 {
249 tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
250 if (tmp > *width)
251 {
252 *left_pad = *width;
253 *width = 0;
254 }
255 else
256 {
257 *left_pad = (int32_t) tmp;
258 *width -= (int32_t) tmp;
259 }
260 }
261 else
262 {
263 *left_pad = 0;
264 }
265 tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
266 if (tmp < 0)
267 {
268 *right_pad = *width;
269 *width = 0;
270 }
271 else if (tmp >= *width)
272 {
273 *right_pad = 0;
274 }
275 else
276 {
277 *right_pad = *width - (int32_t) tmp;
278 *width = (int32_t) tmp;
279 }
280 }
281
282 /* A macroified version of specialized nearest scalers for some
283 * common 8888 and 565 formats. It supports SRC and OVER ops.
284 *
285 * There are two repeat versions, one that handles repeat normal,
286 * and one without repeat handling that only works if the src region
287 * used is completely covered by the pre-repeated source samples.
288 *
289 * The loops are unrolled to process two pixels per iteration for better
290 * performance on most CPU architectures (superscalar processors
291 * can issue several operations simultaneously, other processors can hide
292 * instructions latencies by pipelining operations). Unrolling more
293 * does not make much sense because the compiler will start running out
294 * of spare registers soon.
295 */
296
297 #define GET_8888_ALPHA(s) ((s) >> 24)
298 /* This is not actually used since we don't have an OVER with
299 565 source, but it is needed to build. */
300 #define GET_0565_ALPHA(s) 0xff
301 #define GET_x888_ALPHA(s) 0xff
302
303 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
304 src_type_t, dst_type_t, OP, repeat_mode) \
305 static force_inline void \
306 scanline_func_name (dst_type_t *dst, \
307 const src_type_t *src, \
308 int32_t w, \
309 pixman_fixed_t vx, \
310 pixman_fixed_t unit_x, \
311 pixman_fixed_t src_width_fixed, \
312 pixman_bool_t fully_transparent_src) \
313 { \
314 uint32_t d; \
315 src_type_t s1, s2; \
316 uint8_t a1, a2; \
317 int x1, x2; \
318 \
319 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \
320 return; \
321 \
322 if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
323 abort(); \
324 \
325 while ((w -= 2) >= 0) \
326 { \
327 x1 = pixman_fixed_to_int (vx); \
328 vx += unit_x; \
329 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
330 { \
331 /* This works because we know that unit_x is positive */ \
332 while (vx >= 0) \
333 vx -= src_width_fixed; \
334 } \
335 s1 = *(src + x1); \
336 \
337 x2 = pixman_fixed_to_int (vx); \
338 vx += unit_x; \
339 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
340 { \
341 /* This works because we know that unit_x is positive */ \
342 while (vx >= 0) \
343 vx -= src_width_fixed; \
344 } \
345 s2 = *(src + x2); \
346 \
347 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
348 { \
349 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
350 a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
351 \
352 if (a1 == 0xff) \
353 { \
354 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
355 } \
356 else if (s1) \
357 { \
358 d = convert_ ## DST_FORMAT ## _to_8888 (*dst); \
359 s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \
360 a1 ^= 0xff; \
361 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
362 *dst = convert_8888_to_ ## DST_FORMAT (d); \
363 } \
364 dst++; \
365 \
366 if (a2 == 0xff) \
367 { \
368 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \
369 } \
370 else if (s2) \
371 { \
372 d = convert_## DST_FORMAT ## _to_8888 (*dst); \
373 s2 = convert_## SRC_FORMAT ## _to_8888 (s2); \
374 a2 ^= 0xff; \
375 UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
376 *dst = convert_8888_to_ ## DST_FORMAT (d); \
377 } \
378 dst++; \
379 } \
380 else /* PIXMAN_OP_SRC */ \
381 { \
382 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
383 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \
384 } \
385 } \
386 \
387 if (w & 1) \
388 { \
389 x1 = pixman_fixed_to_int (vx); \
390 s1 = *(src + x1); \
391 \
392 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
393 { \
394 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
395 \
396 if (a1 == 0xff) \
397 { \
398 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
399 } \
400 else if (s1) \
401 { \
402 d = convert_## DST_FORMAT ## _to_8888 (*dst); \
403 s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \
404 a1 ^= 0xff; \
405 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
406 *dst = convert_8888_to_ ## DST_FORMAT (d); \
407 } \
408 dst++; \
409 } \
410 else /* PIXMAN_OP_SRC */ \
411 { \
412 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
413 } \
414 } \
415 }
416
417 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
418 dst_type_t, repeat_mode, have_mask, mask_is_solid) \
419 static void \
420 fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \
421 pixman_composite_info_t *info) \
422 { \
423 PIXMAN_COMPOSITE_ARGS (info); \
424 dst_type_t *dst_line; \
425 mask_type_t *mask_line; \
426 src_type_t *src_first_line; \
427 int y; \
428 pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width); \
429 pixman_fixed_t max_vy; \
430 pixman_vector_t v; \
431 pixman_fixed_t vx, vy; \
432 pixman_fixed_t unit_x, unit_y; \
433 int32_t left_pad, right_pad; \
434 \
435 src_type_t *src; \
436 dst_type_t *dst; \
437 mask_type_t solid_mask; \
438 const mask_type_t *mask = &solid_mask; \
439 int src_stride, mask_stride, dst_stride; \
440 \
441 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
442 if (have_mask) \
443 { \
444 if (mask_is_solid) \
445 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
446 else \
447 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
448 mask_stride, mask_line, 1); \
449 } \
450 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
451 * transformed from destination space to source space */ \
452 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
453 \
454 /* reference point is the center of the pixel */ \
455 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
456 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
457 v.vector[2] = pixman_fixed_1; \
458 \
459 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
460 return; \
461 \
462 unit_x = src_image->common.transform->matrix[0][0]; \
463 unit_y = src_image->common.transform->matrix[1][1]; \
464 \
465 /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
466 v.vector[0] -= pixman_fixed_e; \
467 v.vector[1] -= pixman_fixed_e; \
468 \
469 vx = v.vector[0]; \
470 vy = v.vector[1]; \
471 \
472 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
473 { \
474 max_vy = pixman_int_to_fixed (src_image->bits.height); \
475 \
476 /* Clamp repeating positions inside the actual samples */ \
477 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
478 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
479 } \
480 \
481 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
482 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
483 { \
484 pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
485 &width, &left_pad, &right_pad); \
486 vx += left_pad * unit_x; \
487 } \
488 \
489 while (--height >= 0) \
490 { \
491 dst = dst_line; \
492 dst_line += dst_stride; \
493 if (have_mask && !mask_is_solid) \
494 { \
495 mask = mask_line; \
496 mask_line += mask_stride; \
497 } \
498 \
499 y = pixman_fixed_to_int (vy); \
500 vy += unit_y; \
501 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
502 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
503 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
504 { \
505 repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
506 src = src_first_line + src_stride * y; \
507 if (left_pad > 0) \
508 { \
509 scanline_func (mask, dst, \
510 src + src_image->bits.width - src_image->bits.width + 1, \
511 left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \
512 } \
513 if (width > 0) \
514 { \
515 scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
516 dst + left_pad, src + src_image->bits.width, width, \
517 vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \
518 } \
519 if (right_pad > 0) \
520 { \
521 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
522 dst + left_pad + width, src + src_image->bits.width, \
523 right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \
524 } \
525 } \
526 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
527 { \
528 static const src_type_t zero[1] = { 0 }; \
529 if (y < 0 || y >= src_image->bits.height) \
530 { \
531 scanline_func (mask, dst, zero + 1, left_pad + width + right_pad, \
532 -pixman_fixed_e, 0, src_width_fixed, TRUE); \
533 continue; \
534 } \
535 src = src_first_line + src_stride * y; \
536 if (left_pad > 0) \
537 { \
538 scanline_func (mask, dst, zero + 1, left_pad, \
539 -pixman_fixed_e, 0, src_width_fixed, TRUE); \
540 } \
541 if (width > 0) \
542 { \
543 scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
544 dst + left_pad, src + src_image->bits.width, width, \
545 vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \
546 } \
547 if (right_pad > 0) \
548 { \
549 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
550 dst + left_pad + width, zero + 1, right_pad, \
551 -pixman_fixed_e, 0, src_width_fixed, TRUE); \
552 } \
553 } \
554 else \
555 { \
556 src = src_first_line + src_stride * y; \
557 scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed, \
558 unit_x, src_width_fixed, FALSE); \
559 } \
560 } \
561 }
562
563 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
564 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
565 dst_type_t, repeat_mode, have_mask, mask_is_solid) \
566 FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
567 dst_type_t, repeat_mode, have_mask, mask_is_solid)
568
569 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \
570 repeat_mode) \
571 static force_inline void \
572 scanline_func##scale_func_name##_wrapper ( \
573 const uint8_t *mask, \
574 dst_type_t *dst, \
575 const src_type_t *src, \
576 int32_t w, \
577 pixman_fixed_t vx, \
578 pixman_fixed_t unit_x, \
579 pixman_fixed_t max_vx, \
580 pixman_bool_t fully_transparent_src) \
581 { \
582 scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \
583 } \
584 FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \
585 src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
586
587 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
588 repeat_mode) \
589 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \
590 dst_type_t, repeat_mode)
591
592 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
593 src_type_t, dst_type_t, OP, repeat_mode) \
594 FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
595 SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
596 OP, repeat_mode) \
597 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \
598 scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
599 src_type_t, dst_type_t, repeat_mode)
600
601
602 #define SCALED_NEAREST_FLAGS \
603 (FAST_PATH_SCALE_TRANSFORM | \
604 FAST_PATH_NO_ALPHA_MAP | \
605 FAST_PATH_NEAREST_FILTER | \
606 FAST_PATH_NO_ACCESSORS | \
607 FAST_PATH_NARROW_FORMAT)
608
609 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
610 { PIXMAN_OP_ ## op, \
611 PIXMAN_ ## s, \
612 (SCALED_NEAREST_FLAGS | \
613 FAST_PATH_NORMAL_REPEAT | \
614 FAST_PATH_X_UNIT_POSITIVE), \
615 PIXMAN_null, 0, \
616 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
617 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
618 }
619
620 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
621 { PIXMAN_OP_ ## op, \
622 PIXMAN_ ## s, \
623 (SCALED_NEAREST_FLAGS | \
624 FAST_PATH_PAD_REPEAT | \
625 FAST_PATH_X_UNIT_POSITIVE), \
626 PIXMAN_null, 0, \
627 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
628 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
629 }
630
631 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
632 { PIXMAN_OP_ ## op, \
633 PIXMAN_ ## s, \
634 (SCALED_NEAREST_FLAGS | \
635 FAST_PATH_NONE_REPEAT | \
636 FAST_PATH_X_UNIT_POSITIVE), \
637 PIXMAN_null, 0, \
638 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
639 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
640 }
641
642 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
643 { PIXMAN_OP_ ## op, \
644 PIXMAN_ ## s, \
645 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
646 PIXMAN_null, 0, \
647 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
648 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
649 }
650
651 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
652 { PIXMAN_OP_ ## op, \
653 PIXMAN_ ## s, \
654 (SCALED_NEAREST_FLAGS | \
655 FAST_PATH_NORMAL_REPEAT | \
656 FAST_PATH_X_UNIT_POSITIVE), \
657 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
658 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
659 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
660 }
661
662 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
663 { PIXMAN_OP_ ## op, \
664 PIXMAN_ ## s, \
665 (SCALED_NEAREST_FLAGS | \
666 FAST_PATH_PAD_REPEAT | \
667 FAST_PATH_X_UNIT_POSITIVE), \
668 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
669 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
670 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
671 }
672
673 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
674 { PIXMAN_OP_ ## op, \
675 PIXMAN_ ## s, \
676 (SCALED_NEAREST_FLAGS | \
677 FAST_PATH_NONE_REPEAT | \
678 FAST_PATH_X_UNIT_POSITIVE), \
679 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
680 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
681 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
682 }
683
684 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
685 { PIXMAN_OP_ ## op, \
686 PIXMAN_ ## s, \
687 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
688 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
689 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
690 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
691 }
692
693 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
694 { PIXMAN_OP_ ## op, \
695 PIXMAN_ ## s, \
696 (SCALED_NEAREST_FLAGS | \
697 FAST_PATH_NORMAL_REPEAT | \
698 FAST_PATH_X_UNIT_POSITIVE), \
699 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
700 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
701 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
702 }
703
704 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
705 { PIXMAN_OP_ ## op, \
706 PIXMAN_ ## s, \
707 (SCALED_NEAREST_FLAGS | \
708 FAST_PATH_PAD_REPEAT | \
709 FAST_PATH_X_UNIT_POSITIVE), \
710 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
711 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
712 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
713 }
714
715 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
716 { PIXMAN_OP_ ## op, \
717 PIXMAN_ ## s, \
718 (SCALED_NEAREST_FLAGS | \
719 FAST_PATH_NONE_REPEAT | \
720 FAST_PATH_X_UNIT_POSITIVE), \
721 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
722 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
723 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
724 }
725
726 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
727 { PIXMAN_OP_ ## op, \
728 PIXMAN_ ## s, \
729 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
730 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
731 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
732 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
733 }
734
735 /* Prefer the use of 'cover' variant, because it is faster */
736 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
737 SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
738 SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
739 SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
740 SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
741
742 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
743 SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
744 SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
745 SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
746
747 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \
748 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
749 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
750 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
751
752 /*****************************************************************************/
753
754 /*
755 * Identify 5 zones in each scanline for bilinear scaling. Depending on
756 * whether 2 pixels to be interpolated are fetched from the image itself,
757 * from the padding area around it or from both image and padding area.
758 */
759 static force_inline void
bilinear_pad_repeat_get_scanline_bounds(int32_t source_image_width,pixman_fixed_t vx,pixman_fixed_t unit_x,int32_t * left_pad,int32_t * left_tz,int32_t * width,int32_t * right_tz,int32_t * right_pad)760 bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
761 pixman_fixed_t vx,
762 pixman_fixed_t unit_x,
763 int32_t * left_pad,
764 int32_t * left_tz,
765 int32_t * width,
766 int32_t * right_tz,
767 int32_t * right_pad)
768 {
769 int width1 = *width, left_pad1, right_pad1;
770 int width2 = *width, left_pad2, right_pad2;
771
772 pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
773 &width1, &left_pad1, &right_pad1);
774 pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
775 unit_x, &width2, &left_pad2, &right_pad2);
776
777 *left_pad = left_pad2;
778 *left_tz = left_pad1 - left_pad2;
779 *right_tz = right_pad2 - right_pad1;
780 *right_pad = right_pad1;
781 *width -= *left_pad + *left_tz + *right_tz + *right_pad;
782 }
783
784 /*
785 * Main loop template for single pass bilinear scaling. It needs to be
786 * provided with 'scanline_func' which should do the compositing operation.
787 * The needed function has the following prototype:
788 *
789 * scanline_func (dst_type_t * dst,
790 * const mask_type_ * mask,
791 * const src_type_t * src_top,
792 * const src_type_t * src_bottom,
793 * int32_t width,
794 * int weight_top,
795 * int weight_bottom,
796 * pixman_fixed_t vx,
797 * pixman_fixed_t unit_x,
798 * pixman_fixed_t max_vx,
799 * pixman_bool_t zero_src)
800 *
801 * Where:
802 * dst - destination scanline buffer for storing results
803 * mask - mask buffer (or single value for solid mask)
804 * src_top, src_bottom - two source scanlines
805 * width - number of pixels to process
806 * weight_top - weight of the top row for interpolation
807 * weight_bottom - weight of the bottom row for interpolation
808 * vx - initial position for fetching the first pair of
809 * pixels from the source buffer
810 * unit_x - position increment needed to move to the next pair
811 * of pixels
812 * max_vx - image size as a fixed point value, can be used for
813 * implementing NORMAL repeat (when it is supported)
814 * zero_src - boolean hint variable, which is set to TRUE when
815 * all source pixels are fetched from zero padding
816 * zone for NONE repeat
817 *
818 * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
819 * BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
820 * for NONE repeat when handling fuzzy antialiased top or bottom image
821 * edges. Also both top and bottom weight variables are guaranteed to
822 * have value, which is less than BILINEAR_INTERPOLATION_RANGE.
823 * For example, the weights can fit into unsigned byte or be used
824 * with 8-bit SIMD multiplication instructions for 8-bit interpolation
825 * precision.
826 */
827 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
828 dst_type_t, repeat_mode, flags) \
829 static void \
830 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
831 pixman_composite_info_t *info) \
832 { \
833 PIXMAN_COMPOSITE_ARGS (info); \
834 dst_type_t *dst_line; \
835 mask_type_t *mask_line; \
836 src_type_t *src_first_line; \
837 int y1, y2; \
838 pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
839 pixman_vector_t v; \
840 pixman_fixed_t vx, vy; \
841 pixman_fixed_t unit_x, unit_y; \
842 int32_t left_pad, left_tz, right_tz, right_pad; \
843 \
844 dst_type_t *dst; \
845 mask_type_t solid_mask; \
846 const mask_type_t *mask = &solid_mask; \
847 int src_stride, mask_stride, dst_stride; \
848 \
849 int src_width; \
850 pixman_fixed_t src_width_fixed; \
851 int max_x; \
852 pixman_bool_t need_src_extension; \
853 \
854 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
855 if (flags & FLAG_HAVE_SOLID_MASK) \
856 { \
857 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
858 mask_stride = 0; \
859 } \
860 else if (flags & FLAG_HAVE_NON_SOLID_MASK) \
861 { \
862 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
863 mask_stride, mask_line, 1); \
864 } \
865 \
866 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
867 * transformed from destination space to source space */ \
868 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
869 \
870 /* reference point is the center of the pixel */ \
871 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
872 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
873 v.vector[2] = pixman_fixed_1; \
874 \
875 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
876 return; \
877 \
878 unit_x = src_image->common.transform->matrix[0][0]; \
879 unit_y = src_image->common.transform->matrix[1][1]; \
880 \
881 v.vector[0] -= pixman_fixed_1 / 2; \
882 v.vector[1] -= pixman_fixed_1 / 2; \
883 \
884 vy = v.vector[1]; \
885 \
886 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
887 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
888 { \
889 bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \
890 &left_pad, &left_tz, &width, &right_tz, &right_pad); \
891 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
892 { \
893 /* PAD repeat does not need special handling for 'transition zones' and */ \
894 /* they can be combined with 'padding zones' safely */ \
895 left_pad += left_tz; \
896 right_pad += right_tz; \
897 left_tz = right_tz = 0; \
898 } \
899 v.vector[0] += left_pad * unit_x; \
900 } \
901 \
902 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
903 { \
904 vx = v.vector[0]; \
905 repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \
906 max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1; \
907 \
908 if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \
909 { \
910 src_width = 0; \
911 \
912 while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \
913 src_width += src_image->bits.width; \
914 \
915 need_src_extension = TRUE; \
916 } \
917 else \
918 { \
919 src_width = src_image->bits.width; \
920 need_src_extension = FALSE; \
921 } \
922 \
923 src_width_fixed = pixman_int_to_fixed (src_width); \
924 } \
925 \
926 while (--height >= 0) \
927 { \
928 int weight1, weight2; \
929 dst = dst_line; \
930 dst_line += dst_stride; \
931 vx = v.vector[0]; \
932 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
933 { \
934 mask = mask_line; \
935 mask_line += mask_stride; \
936 } \
937 \
938 y1 = pixman_fixed_to_int (vy); \
939 weight2 = pixman_fixed_to_bilinear_weight (vy); \
940 if (weight2) \
941 { \
942 /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */ \
943 y2 = y1 + 1; \
944 weight1 = BILINEAR_INTERPOLATION_RANGE - weight2; \
945 } \
946 else \
947 { \
948 /* set both top and bottom row to the same scanline and tweak weights */ \
949 y2 = y1; \
950 weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2; \
951 } \
952 vy += unit_y; \
953 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
954 { \
955 src_type_t *src1, *src2; \
956 src_type_t buf1[2]; \
957 src_type_t buf2[2]; \
958 repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \
959 repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \
960 src1 = src_first_line + src_stride * y1; \
961 src2 = src_first_line + src_stride * y2; \
962 \
963 if (left_pad > 0) \
964 { \
965 buf1[0] = buf1[1] = src1[0]; \
966 buf2[0] = buf2[1] = src2[0]; \
967 scanline_func (dst, mask, \
968 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \
969 dst += left_pad; \
970 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
971 mask += left_pad; \
972 } \
973 if (width > 0) \
974 { \
975 scanline_func (dst, mask, \
976 src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
977 dst += width; \
978 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
979 mask += width; \
980 } \
981 if (right_pad > 0) \
982 { \
983 buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
984 buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
985 scanline_func (dst, mask, \
986 buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \
987 } \
988 } \
989 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
990 { \
991 src_type_t *src1, *src2; \
992 src_type_t buf1[2]; \
993 src_type_t buf2[2]; \
994 /* handle top/bottom zero padding by just setting weights to 0 if needed */ \
995 if (y1 < 0) \
996 { \
997 weight1 = 0; \
998 y1 = 0; \
999 } \
1000 if (y1 >= src_image->bits.height) \
1001 { \
1002 weight1 = 0; \
1003 y1 = src_image->bits.height - 1; \
1004 } \
1005 if (y2 < 0) \
1006 { \
1007 weight2 = 0; \
1008 y2 = 0; \
1009 } \
1010 if (y2 >= src_image->bits.height) \
1011 { \
1012 weight2 = 0; \
1013 y2 = src_image->bits.height - 1; \
1014 } \
1015 src1 = src_first_line + src_stride * y1; \
1016 src2 = src_first_line + src_stride * y2; \
1017 \
1018 if (left_pad > 0) \
1019 { \
1020 buf1[0] = buf1[1] = 0; \
1021 buf2[0] = buf2[1] = 0; \
1022 scanline_func (dst, mask, \
1023 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \
1024 dst += left_pad; \
1025 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1026 mask += left_pad; \
1027 } \
1028 if (left_tz > 0) \
1029 { \
1030 buf1[0] = 0; \
1031 buf1[1] = src1[0]; \
1032 buf2[0] = 0; \
1033 buf2[1] = src2[0]; \
1034 scanline_func (dst, mask, \
1035 buf1, buf2, left_tz, weight1, weight2, \
1036 pixman_fixed_frac (vx), unit_x, 0, FALSE); \
1037 dst += left_tz; \
1038 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1039 mask += left_tz; \
1040 vx += left_tz * unit_x; \
1041 } \
1042 if (width > 0) \
1043 { \
1044 scanline_func (dst, mask, \
1045 src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
1046 dst += width; \
1047 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1048 mask += width; \
1049 vx += width * unit_x; \
1050 } \
1051 if (right_tz > 0) \
1052 { \
1053 buf1[0] = src1[src_image->bits.width - 1]; \
1054 buf1[1] = 0; \
1055 buf2[0] = src2[src_image->bits.width - 1]; \
1056 buf2[1] = 0; \
1057 scanline_func (dst, mask, \
1058 buf1, buf2, right_tz, weight1, weight2, \
1059 pixman_fixed_frac (vx), unit_x, 0, FALSE); \
1060 dst += right_tz; \
1061 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1062 mask += right_tz; \
1063 } \
1064 if (right_pad > 0) \
1065 { \
1066 buf1[0] = buf1[1] = 0; \
1067 buf2[0] = buf2[1] = 0; \
1068 scanline_func (dst, mask, \
1069 buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \
1070 } \
1071 } \
1072 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
1073 { \
1074 int32_t num_pixels; \
1075 int32_t width_remain; \
1076 src_type_t * src_line_top; \
1077 src_type_t * src_line_bottom; \
1078 src_type_t buf1[2]; \
1079 src_type_t buf2[2]; \
1080 src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \
1081 src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \
1082 int i, j; \
1083 \
1084 repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \
1085 repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \
1086 src_line_top = src_first_line + src_stride * y1; \
1087 src_line_bottom = src_first_line + src_stride * y2; \
1088 \
1089 if (need_src_extension) \
1090 { \
1091 for (i=0; i<src_width;) \
1092 { \
1093 for (j=0; j<src_image->bits.width; j++, i++) \
1094 { \
1095 extended_src_line0[i] = src_line_top[j]; \
1096 extended_src_line1[i] = src_line_bottom[j]; \
1097 } \
1098 } \
1099 \
1100 src_line_top = &extended_src_line0[0]; \
1101 src_line_bottom = &extended_src_line1[0]; \
1102 } \
1103 \
1104 /* Top & Bottom wrap around buffer */ \
1105 buf1[0] = src_line_top[src_width - 1]; \
1106 buf1[1] = src_line_top[0]; \
1107 buf2[0] = src_line_bottom[src_width - 1]; \
1108 buf2[1] = src_line_bottom[0]; \
1109 \
1110 width_remain = width; \
1111 \
1112 while (width_remain > 0) \
1113 { \
1114 /* We use src_width_fixed because it can make vx in original source range */ \
1115 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
1116 \
1117 /* Wrap around part */ \
1118 if (pixman_fixed_to_int (vx) == src_width - 1) \
1119 { \
1120 /* for positive unit_x \
1121 * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \
1122 * \
1123 * vx is in range [0, src_width_fixed - pixman_fixed_e] \
1124 * So we are safe from overflow. \
1125 */ \
1126 num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \
1127 \
1128 if (num_pixels > width_remain) \
1129 num_pixels = width_remain; \
1130 \
1131 scanline_func (dst, mask, buf1, buf2, num_pixels, \
1132 weight1, weight2, pixman_fixed_frac(vx), \
1133 unit_x, src_width_fixed, FALSE); \
1134 \
1135 width_remain -= num_pixels; \
1136 vx += num_pixels * unit_x; \
1137 dst += num_pixels; \
1138 \
1139 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1140 mask += num_pixels; \
1141 \
1142 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
1143 } \
1144 \
1145 /* Normal scanline composite */ \
1146 if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \
1147 { \
1148 /* for positive unit_x \
1149 * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \
1150 * \
1151 * vx is in range [0, src_width_fixed - pixman_fixed_e] \
1152 * So we are safe from overflow here. \
1153 */ \
1154 num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \
1155 / unit_x) + 1; \
1156 \
1157 if (num_pixels > width_remain) \
1158 num_pixels = width_remain; \
1159 \
1160 scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \
1161 weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \
1162 \
1163 width_remain -= num_pixels; \
1164 vx += num_pixels * unit_x; \
1165 dst += num_pixels; \
1166 \
1167 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1168 mask += num_pixels; \
1169 } \
1170 } \
1171 } \
1172 else \
1173 { \
1174 scanline_func (dst, mask, src_first_line + src_stride * y1, \
1175 src_first_line + src_stride * y2, width, \
1176 weight1, weight2, vx, unit_x, max_vx, FALSE); \
1177 } \
1178 } \
1179 }
1180
1181 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
1182 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
1183 dst_type_t, repeat_mode, flags) \
1184 FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
1185 dst_type_t, repeat_mode, flags)
1186
1187 #define SCALED_BILINEAR_FLAGS \
1188 (FAST_PATH_SCALE_TRANSFORM | \
1189 FAST_PATH_NO_ALPHA_MAP | \
1190 FAST_PATH_BILINEAR_FILTER | \
1191 FAST_PATH_NO_ACCESSORS | \
1192 FAST_PATH_NARROW_FORMAT)
1193
1194 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \
1195 { PIXMAN_OP_ ## op, \
1196 PIXMAN_ ## s, \
1197 (SCALED_BILINEAR_FLAGS | \
1198 FAST_PATH_PAD_REPEAT | \
1199 FAST_PATH_X_UNIT_POSITIVE), \
1200 PIXMAN_null, 0, \
1201 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1202 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1203 }
1204
1205 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \
1206 { PIXMAN_OP_ ## op, \
1207 PIXMAN_ ## s, \
1208 (SCALED_BILINEAR_FLAGS | \
1209 FAST_PATH_NONE_REPEAT | \
1210 FAST_PATH_X_UNIT_POSITIVE), \
1211 PIXMAN_null, 0, \
1212 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1213 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1214 }
1215
1216 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \
1217 { PIXMAN_OP_ ## op, \
1218 PIXMAN_ ## s, \
1219 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1220 PIXMAN_null, 0, \
1221 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1222 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1223 }
1224
1225 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \
1226 { PIXMAN_OP_ ## op, \
1227 PIXMAN_ ## s, \
1228 (SCALED_BILINEAR_FLAGS | \
1229 FAST_PATH_NORMAL_REPEAT | \
1230 FAST_PATH_X_UNIT_POSITIVE), \
1231 PIXMAN_null, 0, \
1232 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1233 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1234 }
1235
1236 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
1237 { PIXMAN_OP_ ## op, \
1238 PIXMAN_ ## s, \
1239 (SCALED_BILINEAR_FLAGS | \
1240 FAST_PATH_PAD_REPEAT | \
1241 FAST_PATH_X_UNIT_POSITIVE), \
1242 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1243 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1244 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1245 }
1246
1247 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
1248 { PIXMAN_OP_ ## op, \
1249 PIXMAN_ ## s, \
1250 (SCALED_BILINEAR_FLAGS | \
1251 FAST_PATH_NONE_REPEAT | \
1252 FAST_PATH_X_UNIT_POSITIVE), \
1253 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1254 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1255 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1256 }
1257
1258 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
1259 { PIXMAN_OP_ ## op, \
1260 PIXMAN_ ## s, \
1261 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1262 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1263 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1264 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1265 }
1266
1267 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
1268 { PIXMAN_OP_ ## op, \
1269 PIXMAN_ ## s, \
1270 (SCALED_BILINEAR_FLAGS | \
1271 FAST_PATH_NORMAL_REPEAT | \
1272 FAST_PATH_X_UNIT_POSITIVE), \
1273 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1274 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1275 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1276 }
1277
1278 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
1279 { PIXMAN_OP_ ## op, \
1280 PIXMAN_ ## s, \
1281 (SCALED_BILINEAR_FLAGS | \
1282 FAST_PATH_PAD_REPEAT | \
1283 FAST_PATH_X_UNIT_POSITIVE), \
1284 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1285 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1286 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1287 }
1288
1289 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
1290 { PIXMAN_OP_ ## op, \
1291 PIXMAN_ ## s, \
1292 (SCALED_BILINEAR_FLAGS | \
1293 FAST_PATH_NONE_REPEAT | \
1294 FAST_PATH_X_UNIT_POSITIVE), \
1295 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1296 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1297 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1298 }
1299
1300 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
1301 { PIXMAN_OP_ ## op, \
1302 PIXMAN_ ## s, \
1303 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1304 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1305 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1306 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1307 }
1308
1309 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
1310 { PIXMAN_OP_ ## op, \
1311 PIXMAN_ ## s, \
1312 (SCALED_BILINEAR_FLAGS | \
1313 FAST_PATH_NORMAL_REPEAT | \
1314 FAST_PATH_X_UNIT_POSITIVE), \
1315 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1316 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1317 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1318 }
1319
1320 /* Prefer the use of 'cover' variant, because it is faster */
1321 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \
1322 SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \
1323 SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \
1324 SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \
1325 SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
1326
1327 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \
1328 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
1329 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
1330 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \
1331 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
1332
1333 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \
1334 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
1335 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
1336 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \
1337 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
1338
1339 #endif
1340