• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3  * Copyright © 2000 SuSE, Inc.
4  * Copyright © 2007 Red Hat, Inc.
5  *
6  * Permission to use, copy, modify, distribute, and sell this software and its
7  * documentation for any purpose is hereby granted without fee, provided that
8  * the above copyright notice appear in all copies and that both that
9  * copyright notice and this permission notice appear in supporting
10  * documentation, and that the name of SuSE not be used in advertising or
11  * publicity pertaining to distribution of the software without specific,
12  * written prior permission.  SuSE makes no representations about the
13  * suitability of this software for any purpose.  It is provided "as is"
14  * without express or implied warranty.
15  *
16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22  *
23  * Author:  Keith Packard, SuSE, Inc.
24  */
25 
26 #ifndef PIXMAN_FAST_PATH_H__
27 #define PIXMAN_FAST_PATH_H__
28 
29 #include "pixman-private.h"
30 
31 #define PIXMAN_REPEAT_COVER -1
32 
33 /* Flags describing input parameters to fast path macro template.
34  * Turning on some flag values may indicate that
35  * "some property X is available so template can use this" or
36  * "some property X should be handled by template".
37  *
38  * FLAG_HAVE_SOLID_MASK
39  *  Input mask is solid so template should handle this.
40  *
41  * FLAG_HAVE_NON_SOLID_MASK
42  *  Input mask is bits mask so template should handle this.
43  *
44  * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
45  * exclusive. (It's not allowed to turn both flags on)
46  */
47 #define FLAG_NONE				(0)
48 #define FLAG_HAVE_SOLID_MASK			(1 <<   1)
49 #define FLAG_HAVE_NON_SOLID_MASK		(1 <<   2)
50 
51 /* To avoid too short repeated scanline function calls, extend source
52  * scanlines having width less than below constant value.
53  */
54 #define REPEAT_NORMAL_MIN_WIDTH			64
55 
56 static force_inline pixman_bool_t
repeat(pixman_repeat_t repeat,int * c,int size)57 repeat (pixman_repeat_t repeat, int *c, int size)
58 {
59     if (repeat == PIXMAN_REPEAT_NONE)
60     {
61 	if (*c < 0 || *c >= size)
62 	    return FALSE;
63     }
64     else if (repeat == PIXMAN_REPEAT_NORMAL)
65     {
66 	while (*c >= size)
67 	    *c -= size;
68 	while (*c < 0)
69 	    *c += size;
70     }
71     else if (repeat == PIXMAN_REPEAT_PAD)
72     {
73 	*c = CLIP (*c, 0, size - 1);
74     }
75     else /* REFLECT */
76     {
77 	*c = MOD (*c, size * 2);
78 	if (*c >= size)
79 	    *c = size * 2 - *c - 1;
80     }
81     return TRUE;
82 }
83 
84 static force_inline int
pixman_fixed_to_bilinear_weight(pixman_fixed_t x)85 pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
86 {
87     return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
88 	   ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
89 }
90 
91 #if BILINEAR_INTERPOLATION_BITS <= 4
92 /* Inspired by Filter_32_opaque from Skia */
93 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)94 bilinear_interpolation (uint32_t tl, uint32_t tr,
95 			uint32_t bl, uint32_t br,
96 			int distx, int disty)
97 {
98     int distxy, distxiy, distixy, distixiy;
99     uint32_t lo, hi;
100 
101     distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
102     disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
103 
104     distxy = distx * disty;
105     distxiy = (distx << 4) - distxy;	/* distx * (16 - disty) */
106     distixy = (disty << 4) - distxy;	/* disty * (16 - distx) */
107     distixiy =
108 	16 * 16 - (disty << 4) -
109 	(distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
110 
111     lo = (tl & 0xff00ff) * distixiy;
112     hi = ((tl >> 8) & 0xff00ff) * distixiy;
113 
114     lo += (tr & 0xff00ff) * distxiy;
115     hi += ((tr >> 8) & 0xff00ff) * distxiy;
116 
117     lo += (bl & 0xff00ff) * distixy;
118     hi += ((bl >> 8) & 0xff00ff) * distixy;
119 
120     lo += (br & 0xff00ff) * distxy;
121     hi += ((br >> 8) & 0xff00ff) * distxy;
122 
123     return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
124 }
125 
126 #else
127 #if SIZEOF_LONG > 4
128 
129 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)130 bilinear_interpolation (uint32_t tl, uint32_t tr,
131 			uint32_t bl, uint32_t br,
132 			int distx, int disty)
133 {
134     uint64_t distxy, distxiy, distixy, distixiy;
135     uint64_t tl64, tr64, bl64, br64;
136     uint64_t f, r;
137 
138     distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
139     disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
140 
141     distxy = distx * disty;
142     distxiy = distx * (256 - disty);
143     distixy = (256 - distx) * disty;
144     distixiy = (256 - distx) * (256 - disty);
145 
146     /* Alpha and Blue */
147     tl64 = tl & 0xff0000ff;
148     tr64 = tr & 0xff0000ff;
149     bl64 = bl & 0xff0000ff;
150     br64 = br & 0xff0000ff;
151 
152     f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
153     r = f & 0x0000ff0000ff0000ull;
154 
155     /* Red and Green */
156     tl64 = tl;
157     tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
158 
159     tr64 = tr;
160     tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
161 
162     bl64 = bl;
163     bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
164 
165     br64 = br;
166     br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
167 
168     f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
169     r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
170 
171     return (uint32_t)(r >> 16);
172 }
173 
174 #else
175 
176 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)177 bilinear_interpolation (uint32_t tl, uint32_t tr,
178 			uint32_t bl, uint32_t br,
179 			int distx, int disty)
180 {
181     int distxy, distxiy, distixy, distixiy;
182     uint32_t f, r;
183 
184     distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
185     disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
186 
187     distxy = distx * disty;
188     distxiy = (distx << 8) - distxy;	/* distx * (256 - disty) */
189     distixy = (disty << 8) - distxy;	/* disty * (256 - distx) */
190     distixiy =
191 	256 * 256 - (disty << 8) -
192 	(distx << 8) + distxy;		/* (256 - distx) * (256 - disty) */
193 
194     /* Blue */
195     r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
196       + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
197 
198     /* Green */
199     f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
200       + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
201     r |= f & 0xff000000;
202 
203     tl >>= 16;
204     tr >>= 16;
205     bl >>= 16;
206     br >>= 16;
207     r >>= 16;
208 
209     /* Red */
210     f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
211       + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
212     r |= f & 0x00ff0000;
213 
214     /* Alpha */
215     f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
216       + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
217     r |= f & 0xff000000;
218 
219     return r;
220 }
221 
222 #endif
223 #endif // BILINEAR_INTERPOLATION_BITS <= 4
224 
225 /*
226  * For each scanline fetched from source image with PAD repeat:
227  * - calculate how many pixels need to be padded on the left side
228  * - calculate how many pixels need to be padded on the right side
229  * - update width to only count pixels which are fetched from the image
230  * All this information is returned via 'width', 'left_pad', 'right_pad'
231  * arguments. The code is assuming that 'unit_x' is positive.
232  *
233  * Note: 64-bit math is used in order to avoid potential overflows, which
234  *       is probably excessive in many cases. This particular function
235  *       may need its own correctness test and performance tuning.
236  */
237 static force_inline void
pad_repeat_get_scanline_bounds(int32_t source_image_width,pixman_fixed_t vx,pixman_fixed_t unit_x,int32_t * width,int32_t * left_pad,int32_t * right_pad)238 pad_repeat_get_scanline_bounds (int32_t         source_image_width,
239 				pixman_fixed_t  vx,
240 				pixman_fixed_t  unit_x,
241 				int32_t *       width,
242 				int32_t *       left_pad,
243 				int32_t *       right_pad)
244 {
245     int64_t max_vx = (int64_t) source_image_width << 16;
246     int64_t tmp;
247     if (vx < 0)
248     {
249 	tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
250 	if (tmp > *width)
251 	{
252 	    *left_pad = *width;
253 	    *width = 0;
254 	}
255 	else
256 	{
257 	    *left_pad = (int32_t) tmp;
258 	    *width -= (int32_t) tmp;
259 	}
260     }
261     else
262     {
263 	*left_pad = 0;
264     }
265     tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
266     if (tmp < 0)
267     {
268 	*right_pad = *width;
269 	*width = 0;
270     }
271     else if (tmp >= *width)
272     {
273 	*right_pad = 0;
274     }
275     else
276     {
277 	*right_pad = *width - (int32_t) tmp;
278 	*width = (int32_t) tmp;
279     }
280 }
281 
282 /* A macroified version of specialized nearest scalers for some
283  * common 8888 and 565 formats. It supports SRC and OVER ops.
284  *
285  * There are two repeat versions, one that handles repeat normal,
286  * and one without repeat handling that only works if the src region
287  * used is completely covered by the pre-repeated source samples.
288  *
289  * The loops are unrolled to process two pixels per iteration for better
290  * performance on most CPU architectures (superscalar processors
291  * can issue several operations simultaneously, other processors can hide
292  * instructions latencies by pipelining operations). Unrolling more
293  * does not make much sense because the compiler will start running out
294  * of spare registers soon.
295  */
296 
297 #define GET_8888_ALPHA(s) ((s) >> 24)
298  /* This is not actually used since we don't have an OVER with
299     565 source, but it is needed to build. */
300 #define GET_0565_ALPHA(s) 0xff
301 #define GET_x888_ALPHA(s) 0xff
302 
303 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,			\
304 			      src_type_t, dst_type_t, OP, repeat_mode)				\
305 static force_inline void									\
306 scanline_func_name (dst_type_t       *dst,							\
307 		    const src_type_t *src,							\
308 		    int32_t           w,							\
309 		    pixman_fixed_t    vx,							\
310 		    pixman_fixed_t    unit_x,							\
311 		    pixman_fixed_t    src_width_fixed,						\
312 		    pixman_bool_t     fully_transparent_src)					\
313 {												\
314 	uint32_t   d;										\
315 	src_type_t s1, s2;									\
316 	uint8_t    a1, a2;									\
317 	int        x1, x2;									\
318 												\
319 	if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src)			\
320 	    return;										\
321 												\
322 	if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)		\
323 	    abort();										\
324 												\
325 	while ((w -= 2) >= 0)									\
326 	{											\
327 	    x1 = pixman_fixed_to_int (vx);							\
328 	    vx += unit_x;									\
329 	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
330 	    {											\
331 		/* This works because we know that unit_x is positive */			\
332 		while (vx >= 0)									\
333 		    vx -= src_width_fixed;							\
334 	    }											\
335 	    s1 = *(src + x1);									\
336 												\
337 	    x2 = pixman_fixed_to_int (vx);							\
338 	    vx += unit_x;									\
339 	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
340 	    {											\
341 		/* This works because we know that unit_x is positive */			\
342 		while (vx >= 0)									\
343 		    vx -= src_width_fixed;							\
344 	    }											\
345 	    s2 = *(src + x2);									\
346 												\
347 	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
348 	    {											\
349 		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
350 		a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);						\
351 												\
352 		if (a1 == 0xff)									\
353 		{										\
354 		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
355 		}										\
356 		else if (s1)									\
357 		{										\
358 		    d = convert_ ## DST_FORMAT ## _to_8888 (*dst);				\
359 		    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
360 		    a1 ^= 0xff;									\
361 		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
362 		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
363 		}										\
364 		dst++;										\
365 												\
366 		if (a2 == 0xff)									\
367 		{										\
368 		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
369 		}										\
370 		else if (s2)									\
371 		{										\
372 		    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
373 		    s2 = convert_## SRC_FORMAT ## _to_8888 (s2);				\
374 		    a2 ^= 0xff;									\
375 		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);					\
376 		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
377 		}										\
378 		dst++;										\
379 	    }											\
380 	    else /* PIXMAN_OP_SRC */								\
381 	    {											\
382 		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
383 		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
384 	    }											\
385 	}											\
386 												\
387 	if (w & 1)										\
388 	{											\
389 	    x1 = pixman_fixed_to_int (vx);							\
390 	    s1 = *(src + x1);									\
391 												\
392 	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
393 	    {											\
394 		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
395 												\
396 		if (a1 == 0xff)									\
397 		{										\
398 		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
399 		}										\
400 		else if (s1)									\
401 		{										\
402 		    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
403 		    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
404 		    a1 ^= 0xff;									\
405 		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
406 		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
407 		}										\
408 		dst++;										\
409 	    }											\
410 	    else /* PIXMAN_OP_SRC */								\
411 	    {											\
412 		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
413 	    }											\
414 	}											\
415 }
416 
417 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
418 				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
419 static void											\
420 fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,		\
421 						   pixman_composite_info_t *info)               \
422 {												\
423     PIXMAN_COMPOSITE_ARGS (info);					                        \
424     dst_type_t *dst_line;						                        \
425     mask_type_t *mask_line;									\
426     src_type_t *src_first_line;									\
427     int       y;										\
428     pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width);		\
429     pixman_fixed_t max_vy;									\
430     pixman_vector_t v;										\
431     pixman_fixed_t vx, vy;									\
432     pixman_fixed_t unit_x, unit_y;								\
433     int32_t left_pad, right_pad;								\
434 												\
435     src_type_t *src;										\
436     dst_type_t *dst;										\
437     mask_type_t solid_mask;									\
438     const mask_type_t *mask = &solid_mask;							\
439     int src_stride, mask_stride, dst_stride;							\
440 												\
441     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
442     if (have_mask)										\
443     {												\
444 	if (mask_is_solid)									\
445 	    solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
446 	else											\
447 	    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,			\
448 				   mask_stride, mask_line, 1);					\
449     }												\
450     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
451      * transformed from destination space to source space */					\
452     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
453 												\
454     /* reference point is the center of the pixel */						\
455     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
456     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
457     v.vector[2] = pixman_fixed_1;								\
458 												\
459     if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
460 	return;											\
461 												\
462     unit_x = src_image->common.transform->matrix[0][0];						\
463     unit_y = src_image->common.transform->matrix[1][1];						\
464 												\
465     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */			\
466     v.vector[0] -= pixman_fixed_e;								\
467     v.vector[1] -= pixman_fixed_e;								\
468 												\
469     vx = v.vector[0];										\
470     vy = v.vector[1];										\
471 												\
472     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
473     {												\
474 	max_vy = pixman_int_to_fixed (src_image->bits.height);					\
475 												\
476 	/* Clamp repeating positions inside the actual samples */				\
477 	repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);					\
478 	repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
479     }												\
480 												\
481     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
482 	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
483     {												\
484 	pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,			\
485 					&width, &left_pad, &right_pad);				\
486 	vx += left_pad * unit_x;								\
487     }												\
488 												\
489     while (--height >= 0)									\
490     {												\
491 	dst = dst_line;										\
492 	dst_line += dst_stride;									\
493 	if (have_mask && !mask_is_solid)							\
494 	{											\
495 	    mask = mask_line;									\
496 	    mask_line += mask_stride;								\
497 	}											\
498 												\
499 	y = pixman_fixed_to_int (vy);								\
500 	vy += unit_y;										\
501 	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
502 	    repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
503 	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
504 	{											\
505 	    repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);				\
506 	    src = src_first_line + src_stride * y;						\
507 	    if (left_pad > 0)									\
508 	    {											\
509 		scanline_func (mask, dst,							\
510 			       src + src_image->bits.width - src_image->bits.width + 1,		\
511 			       left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);		\
512 	    }											\
513 	    if (width > 0)									\
514 	    {											\
515 		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
516 			       dst + left_pad, src + src_image->bits.width, width,		\
517 			       vx - src_width_fixed, unit_x, src_width_fixed, FALSE);		\
518 	    }											\
519 	    if (right_pad > 0)									\
520 	    {											\
521 		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
522 			       dst + left_pad + width, src + src_image->bits.width,		\
523 			       right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);		\
524 	    }											\
525 	}											\
526 	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
527 	{											\
528 	    static const src_type_t zero[1] = { 0 };						\
529 	    if (y < 0 || y >= src_image->bits.height)						\
530 	    {											\
531 		scanline_func (mask, dst, zero + 1, left_pad + width + right_pad,		\
532 			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
533 		continue;									\
534 	    }											\
535 	    src = src_first_line + src_stride * y;						\
536 	    if (left_pad > 0)									\
537 	    {											\
538 		scanline_func (mask, dst, zero + 1, left_pad,					\
539 			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
540 	    }											\
541 	    if (width > 0)									\
542 	    {											\
543 		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
544 			       dst + left_pad, src + src_image->bits.width, width,		\
545 			       vx - src_width_fixed, unit_x, src_width_fixed, FALSE);		\
546 	    }											\
547 	    if (right_pad > 0)									\
548 	    {											\
549 		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
550 			       dst + left_pad + width, zero + 1, right_pad,			\
551 			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
552 	    }											\
553 	}											\
554 	else											\
555 	{											\
556 	    src = src_first_line + src_stride * y;						\
557 	    scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed,	\
558 			   unit_x, src_width_fixed, FALSE);					\
559 	}											\
560     }												\
561 }
562 
563 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
564 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
565 				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
566 	FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,	\
567 				  dst_type_t, repeat_mode, have_mask, mask_is_solid)
568 
569 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t,	\
570 			      repeat_mode)							\
571     static force_inline void									\
572     scanline_func##scale_func_name##_wrapper (							\
573 		    const uint8_t    *mask,							\
574 		    dst_type_t       *dst,							\
575 		    const src_type_t *src,							\
576 		    int32_t          w,								\
577 		    pixman_fixed_t   vx,							\
578 		    pixman_fixed_t   unit_x,							\
579 		    pixman_fixed_t   max_vx,							\
580 		    pixman_bool_t    fully_transparent_src)					\
581     {												\
582 	scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src);			\
583     }												\
584     FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper,	\
585 			       src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
586 
587 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,		\
588 			      repeat_mode)							\
589 	FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t,		\
590 			      dst_type_t, repeat_mode)
591 
592 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,				\
593 		     src_type_t, dst_type_t, OP, repeat_mode)				\
594     FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
595 			  SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,		\
596 			  OP, repeat_mode)						\
597     FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP,			\
598 			  scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
599 			  src_type_t, dst_type_t, repeat_mode)
600 
601 
602 #define SCALED_NEAREST_FLAGS						\
603     (FAST_PATH_SCALE_TRANSFORM	|					\
604      FAST_PATH_NO_ALPHA_MAP	|					\
605      FAST_PATH_NEAREST_FILTER	|					\
606      FAST_PATH_NO_ACCESSORS	|					\
607      FAST_PATH_NARROW_FORMAT)
608 
609 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)			\
610     {   PIXMAN_OP_ ## op,						\
611 	PIXMAN_ ## s,							\
612 	(SCALED_NEAREST_FLAGS		|				\
613 	 FAST_PATH_NORMAL_REPEAT	|				\
614 	 FAST_PATH_X_UNIT_POSITIVE),					\
615 	PIXMAN_null, 0,							\
616 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
617 	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
618     }
619 
620 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)			\
621     {   PIXMAN_OP_ ## op,						\
622 	PIXMAN_ ## s,							\
623 	(SCALED_NEAREST_FLAGS		|				\
624 	 FAST_PATH_PAD_REPEAT		|				\
625 	 FAST_PATH_X_UNIT_POSITIVE),					\
626 	PIXMAN_null, 0,							\
627 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
628 	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
629     }
630 
631 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)			\
632     {   PIXMAN_OP_ ## op,						\
633 	PIXMAN_ ## s,							\
634 	(SCALED_NEAREST_FLAGS		|				\
635 	 FAST_PATH_NONE_REPEAT		|				\
636 	 FAST_PATH_X_UNIT_POSITIVE),					\
637 	PIXMAN_null, 0,							\
638 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
639 	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
640     }
641 
642 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)			\
643     {   PIXMAN_OP_ ## op,						\
644 	PIXMAN_ ## s,							\
645 	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
646 	PIXMAN_null, 0,							\
647 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
648 	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
649     }
650 
651 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
652     {   PIXMAN_OP_ ## op,						\
653 	PIXMAN_ ## s,							\
654 	(SCALED_NEAREST_FLAGS		|				\
655 	 FAST_PATH_NORMAL_REPEAT	|				\
656 	 FAST_PATH_X_UNIT_POSITIVE),					\
657 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
658 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
659 	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
660     }
661 
662 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
663     {   PIXMAN_OP_ ## op,						\
664 	PIXMAN_ ## s,							\
665 	(SCALED_NEAREST_FLAGS		|				\
666 	 FAST_PATH_PAD_REPEAT		|				\
667 	 FAST_PATH_X_UNIT_POSITIVE),					\
668 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
669 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
670 	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
671     }
672 
673 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
674     {   PIXMAN_OP_ ## op,						\
675 	PIXMAN_ ## s,							\
676 	(SCALED_NEAREST_FLAGS		|				\
677 	 FAST_PATH_NONE_REPEAT		|				\
678 	 FAST_PATH_X_UNIT_POSITIVE),					\
679 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
680 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
681 	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
682     }
683 
684 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
685     {   PIXMAN_OP_ ## op,						\
686 	PIXMAN_ ## s,							\
687 	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
688 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
689 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
690 	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
691     }
692 
693 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
694     {   PIXMAN_OP_ ## op,						\
695 	PIXMAN_ ## s,							\
696 	(SCALED_NEAREST_FLAGS		|				\
697 	 FAST_PATH_NORMAL_REPEAT	|				\
698 	 FAST_PATH_X_UNIT_POSITIVE),					\
699 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
700 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
701 	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
702     }
703 
704 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
705     {   PIXMAN_OP_ ## op,						\
706 	PIXMAN_ ## s,							\
707 	(SCALED_NEAREST_FLAGS		|				\
708 	 FAST_PATH_PAD_REPEAT		|				\
709 	 FAST_PATH_X_UNIT_POSITIVE),					\
710 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
711 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
712 	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
713     }
714 
715 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
716     {   PIXMAN_OP_ ## op,						\
717 	PIXMAN_ ## s,							\
718 	(SCALED_NEAREST_FLAGS		|				\
719 	 FAST_PATH_NONE_REPEAT		|				\
720 	 FAST_PATH_X_UNIT_POSITIVE),					\
721 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
722 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
723 	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
724     }
725 
726 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
727     {   PIXMAN_OP_ ## op,						\
728 	PIXMAN_ ## s,							\
729 	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
730 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
731 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
732 	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
733     }
734 
735 /* Prefer the use of 'cover' variant, because it is faster */
736 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)				\
737     SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),			\
738     SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),			\
739     SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),				\
740     SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
741 
742 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)			\
743     SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
744     SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
745     SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
746 
747 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func)		\
748     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
749     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
750     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
751 
752 /*****************************************************************************/
753 
754 /*
755  * Identify 5 zones in each scanline for bilinear scaling. Depending on
756  * whether 2 pixels to be interpolated are fetched from the image itself,
757  * from the padding area around it or from both image and padding area.
758  */
759 static force_inline void
bilinear_pad_repeat_get_scanline_bounds(int32_t source_image_width,pixman_fixed_t vx,pixman_fixed_t unit_x,int32_t * left_pad,int32_t * left_tz,int32_t * width,int32_t * right_tz,int32_t * right_pad)760 bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
761 					 pixman_fixed_t  vx,
762 					 pixman_fixed_t  unit_x,
763 					 int32_t *       left_pad,
764 					 int32_t *       left_tz,
765 					 int32_t *       width,
766 					 int32_t *       right_tz,
767 					 int32_t *       right_pad)
768 {
769 	int width1 = *width, left_pad1, right_pad1;
770 	int width2 = *width, left_pad2, right_pad2;
771 
772 	pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
773 					&width1, &left_pad1, &right_pad1);
774 	pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
775 					unit_x, &width2, &left_pad2, &right_pad2);
776 
777 	*left_pad = left_pad2;
778 	*left_tz = left_pad1 - left_pad2;
779 	*right_tz = right_pad2 - right_pad1;
780 	*right_pad = right_pad1;
781 	*width -= *left_pad + *left_tz + *right_tz + *right_pad;
782 }
783 
784 /*
785  * Main loop template for single pass bilinear scaling. It needs to be
786  * provided with 'scanline_func' which should do the compositing operation.
787  * The needed function has the following prototype:
788  *
789  *	scanline_func (dst_type_t *       dst,
790  *		       const mask_type_ * mask,
791  *		       const src_type_t * src_top,
792  *		       const src_type_t * src_bottom,
793  *		       int32_t            width,
794  *		       int                weight_top,
795  *		       int                weight_bottom,
796  *		       pixman_fixed_t     vx,
797  *		       pixman_fixed_t     unit_x,
798  *		       pixman_fixed_t     max_vx,
799  *		       pixman_bool_t      zero_src)
800  *
801  * Where:
802  *  dst                 - destination scanline buffer for storing results
803  *  mask                - mask buffer (or single value for solid mask)
804  *  src_top, src_bottom - two source scanlines
805  *  width               - number of pixels to process
806  *  weight_top          - weight of the top row for interpolation
807  *  weight_bottom       - weight of the bottom row for interpolation
808  *  vx                  - initial position for fetching the first pair of
809  *                        pixels from the source buffer
810  *  unit_x              - position increment needed to move to the next pair
811  *                        of pixels
812  *  max_vx              - image size as a fixed point value, can be used for
813  *                        implementing NORMAL repeat (when it is supported)
814  *  zero_src            - boolean hint variable, which is set to TRUE when
815  *                        all source pixels are fetched from zero padding
816  *                        zone for NONE repeat
817  *
818  * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
819  *       BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
820  *       for NONE repeat when handling fuzzy antialiased top or bottom image
821  *       edges. Also both top and bottom weight variables are guaranteed to
822  *       have value, which is less than BILINEAR_INTERPOLATION_RANGE.
823  *       For example, the weights can fit into unsigned byte or be used
824  *       with 8-bit SIMD multiplication instructions for 8-bit interpolation
825  *       precision.
826  */
827 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
828 				  dst_type_t, repeat_mode, flags)				\
829 static void											\
830 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,		\
831 						   pixman_composite_info_t *info)		\
832 {												\
833     PIXMAN_COMPOSITE_ARGS (info);								\
834     dst_type_t *dst_line;									\
835     mask_type_t *mask_line;									\
836     src_type_t *src_first_line;									\
837     int       y1, y2;										\
838     pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */		\
839     pixman_vector_t v;										\
840     pixman_fixed_t vx, vy;									\
841     pixman_fixed_t unit_x, unit_y;								\
842     int32_t left_pad, left_tz, right_tz, right_pad;						\
843 												\
844     dst_type_t *dst;										\
845     mask_type_t solid_mask;									\
846     const mask_type_t *mask = &solid_mask;							\
847     int src_stride, mask_stride, dst_stride;							\
848 												\
849     int src_width;										\
850     pixman_fixed_t src_width_fixed;								\
851     int max_x;											\
852     pixman_bool_t need_src_extension;								\
853 												\
854     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
855     if (flags & FLAG_HAVE_SOLID_MASK)								\
856     {												\
857 	solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
858 	mask_stride = 0;									\
859     }												\
860     else if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
861     {												\
862 	PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,				\
863 			       mask_stride, mask_line, 1);					\
864     }												\
865 												\
866     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
867      * transformed from destination space to source space */					\
868     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
869 												\
870     /* reference point is the center of the pixel */						\
871     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
872     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
873     v.vector[2] = pixman_fixed_1;								\
874 												\
875     if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
876 	return;											\
877 												\
878     unit_x = src_image->common.transform->matrix[0][0];						\
879     unit_y = src_image->common.transform->matrix[1][1];						\
880 												\
881     v.vector[0] -= pixman_fixed_1 / 2;								\
882     v.vector[1] -= pixman_fixed_1 / 2;								\
883 												\
884     vy = v.vector[1];										\
885 												\
886     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
887 	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
888     {												\
889 	bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x,	\
890 					&left_pad, &left_tz, &width, &right_tz, &right_pad);	\
891 	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
892 	{											\
893 	    /* PAD repeat does not need special handling for 'transition zones' and */		\
894 	    /* they can be combined with 'padding zones' safely */				\
895 	    left_pad += left_tz;								\
896 	    right_pad += right_tz;								\
897 	    left_tz = right_tz = 0;								\
898 	}											\
899 	v.vector[0] += left_pad * unit_x;							\
900     }												\
901 												\
902     if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
903     {												\
904 	vx = v.vector[0];									\
905 	repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width));		\
906 	max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1;			\
907 												\
908 	if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH)					\
909 	{											\
910 	    src_width = 0;									\
911 												\
912 	    while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x)			\
913 		src_width += src_image->bits.width;						\
914 												\
915 	    need_src_extension = TRUE;								\
916 	}											\
917 	else											\
918 	{											\
919 	    src_width = src_image->bits.width;							\
920 	    need_src_extension = FALSE;								\
921 	}											\
922 												\
923 	src_width_fixed = pixman_int_to_fixed (src_width);					\
924     }												\
925 												\
926     while (--height >= 0)									\
927     {												\
928 	int weight1, weight2;									\
929 	dst = dst_line;										\
930 	dst_line += dst_stride;									\
931 	vx = v.vector[0];									\
932 	if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
933 	{											\
934 	    mask = mask_line;									\
935 	    mask_line += mask_stride;								\
936 	}											\
937 												\
938 	y1 = pixman_fixed_to_int (vy);								\
939 	weight2 = pixman_fixed_to_bilinear_weight (vy);						\
940 	if (weight2)										\
941 	{											\
942 	    /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */	\
943 	    y2 = y1 + 1;									\
944 	    weight1 = BILINEAR_INTERPOLATION_RANGE - weight2;					\
945 	}											\
946 	else											\
947 	{											\
948 	    /* set both top and bottom row to the same scanline and tweak weights */		\
949 	    y2 = y1;										\
950 	    weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2;				\
951 	}											\
952 	vy += unit_y;										\
953 	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
954 	{											\
955 	    src_type_t *src1, *src2;								\
956 	    src_type_t buf1[2];									\
957 	    src_type_t buf2[2];									\
958 	    repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height);				\
959 	    repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);				\
960 	    src1 = src_first_line + src_stride * y1;						\
961 	    src2 = src_first_line + src_stride * y2;						\
962 												\
963 	    if (left_pad > 0)									\
964 	    {											\
965 		buf1[0] = buf1[1] = src1[0];							\
966 		buf2[0] = buf2[1] = src2[0];							\
967 		scanline_func (dst, mask,							\
968 			       buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE);		\
969 		dst += left_pad;								\
970 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
971 		    mask += left_pad;								\
972 	    }											\
973 	    if (width > 0)									\
974 	    {											\
975 		scanline_func (dst, mask,							\
976 			       src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);	\
977 		dst += width;									\
978 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
979 		    mask += width;								\
980 	    }											\
981 	    if (right_pad > 0)									\
982 	    {											\
983 		buf1[0] = buf1[1] = src1[src_image->bits.width - 1];				\
984 		buf2[0] = buf2[1] = src2[src_image->bits.width - 1];				\
985 		scanline_func (dst, mask,							\
986 			       buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE);	\
987 	    }											\
988 	}											\
989 	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
990 	{											\
991 	    src_type_t *src1, *src2;								\
992 	    src_type_t buf1[2];									\
993 	    src_type_t buf2[2];									\
994 	    /* handle top/bottom zero padding by just setting weights to 0 if needed */		\
995 	    if (y1 < 0)										\
996 	    {											\
997 		weight1 = 0;									\
998 		y1 = 0;										\
999 	    }											\
1000 	    if (y1 >= src_image->bits.height)							\
1001 	    {											\
1002 		weight1 = 0;									\
1003 		y1 = src_image->bits.height - 1;						\
1004 	    }											\
1005 	    if (y2 < 0)										\
1006 	    {											\
1007 		weight2 = 0;									\
1008 		y2 = 0;										\
1009 	    }											\
1010 	    if (y2 >= src_image->bits.height)							\
1011 	    {											\
1012 		weight2 = 0;									\
1013 		y2 = src_image->bits.height - 1;						\
1014 	    }											\
1015 	    src1 = src_first_line + src_stride * y1;						\
1016 	    src2 = src_first_line + src_stride * y2;						\
1017 												\
1018 	    if (left_pad > 0)									\
1019 	    {											\
1020 		buf1[0] = buf1[1] = 0;								\
1021 		buf2[0] = buf2[1] = 0;								\
1022 		scanline_func (dst, mask,							\
1023 			       buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE);		\
1024 		dst += left_pad;								\
1025 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
1026 		    mask += left_pad;								\
1027 	    }											\
1028 	    if (left_tz > 0)									\
1029 	    {											\
1030 		buf1[0] = 0;									\
1031 		buf1[1] = src1[0];								\
1032 		buf2[0] = 0;									\
1033 		buf2[1] = src2[0];								\
1034 		scanline_func (dst, mask,							\
1035 			       buf1, buf2, left_tz, weight1, weight2,				\
1036 			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
1037 		dst += left_tz;									\
1038 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
1039 		    mask += left_tz;								\
1040 		vx += left_tz * unit_x;								\
1041 	    }											\
1042 	    if (width > 0)									\
1043 	    {											\
1044 		scanline_func (dst, mask,							\
1045 			       src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);	\
1046 		dst += width;									\
1047 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
1048 		    mask += width;								\
1049 		vx += width * unit_x;								\
1050 	    }											\
1051 	    if (right_tz > 0)									\
1052 	    {											\
1053 		buf1[0] = src1[src_image->bits.width - 1];					\
1054 		buf1[1] = 0;									\
1055 		buf2[0] = src2[src_image->bits.width - 1];					\
1056 		buf2[1] = 0;									\
1057 		scanline_func (dst, mask,							\
1058 			       buf1, buf2, right_tz, weight1, weight2,				\
1059 			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
1060 		dst += right_tz;								\
1061 		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
1062 		    mask += right_tz;								\
1063 	    }											\
1064 	    if (right_pad > 0)									\
1065 	    {											\
1066 		buf1[0] = buf1[1] = 0;								\
1067 		buf2[0] = buf2[1] = 0;								\
1068 		scanline_func (dst, mask,							\
1069 			       buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE);		\
1070 	    }											\
1071 	}											\
1072 	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
1073 	{											\
1074 	    int32_t	    num_pixels;								\
1075 	    int32_t	    width_remain;							\
1076 	    src_type_t *    src_line_top;							\
1077 	    src_type_t *    src_line_bottom;							\
1078 	    src_type_t	    buf1[2];								\
1079 	    src_type_t	    buf2[2];								\
1080 	    src_type_t	    extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2];			\
1081 	    src_type_t	    extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2];			\
1082 	    int		    i, j;								\
1083 												\
1084 	    repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height);				\
1085 	    repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height);				\
1086 	    src_line_top = src_first_line + src_stride * y1;					\
1087 	    src_line_bottom = src_first_line + src_stride * y2;					\
1088 												\
1089 	    if (need_src_extension)								\
1090 	    {											\
1091 		for (i=0; i<src_width;)								\
1092 		{										\
1093 		    for (j=0; j<src_image->bits.width; j++, i++)				\
1094 		    {										\
1095 			extended_src_line0[i] = src_line_top[j];				\
1096 			extended_src_line1[i] = src_line_bottom[j];				\
1097 		    }										\
1098 		}										\
1099 												\
1100 		src_line_top = &extended_src_line0[0];						\
1101 		src_line_bottom = &extended_src_line1[0];					\
1102 	    }											\
1103 												\
1104 	    /* Top & Bottom wrap around buffer */						\
1105 	    buf1[0] = src_line_top[src_width - 1];						\
1106 	    buf1[1] = src_line_top[0];								\
1107 	    buf2[0] = src_line_bottom[src_width - 1];						\
1108 	    buf2[1] = src_line_bottom[0];							\
1109 												\
1110 	    width_remain = width;								\
1111 												\
1112 	    while (width_remain > 0)								\
1113 	    {											\
1114 		/* We use src_width_fixed because it can make vx in original source range */	\
1115 		repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);				\
1116 												\
1117 		/* Wrap around part */								\
1118 		if (pixman_fixed_to_int (vx) == src_width - 1)					\
1119 		{										\
1120 		    /* for positive unit_x							\
1121 		     * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed		\
1122 		     *										\
1123 		     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
1124 		     * So we are safe from overflow.						\
1125 		     */										\
1126 		    num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1;	\
1127 												\
1128 		    if (num_pixels > width_remain)						\
1129 			num_pixels = width_remain;						\
1130 												\
1131 		    scanline_func (dst, mask, buf1, buf2, num_pixels,				\
1132 				   weight1, weight2, pixman_fixed_frac(vx),			\
1133 				   unit_x, src_width_fixed, FALSE);				\
1134 												\
1135 		    width_remain -= num_pixels;							\
1136 		    vx += num_pixels * unit_x;							\
1137 		    dst += num_pixels;								\
1138 												\
1139 		    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
1140 			mask += num_pixels;							\
1141 												\
1142 		    repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);			\
1143 		}										\
1144 												\
1145 		/* Normal scanline composite */							\
1146 		if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0)		\
1147 		{										\
1148 		    /* for positive unit_x							\
1149 		     * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1)	\
1150 		     *										\
1151 		     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
1152 		     * So we are safe from overflow here.					\
1153 		     */										\
1154 		    num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e)	\
1155 				  / unit_x) + 1;						\
1156 												\
1157 		    if (num_pixels > width_remain)						\
1158 			num_pixels = width_remain;						\
1159 												\
1160 		    scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels,	\
1161 				   weight1, weight2, vx, unit_x, src_width_fixed, FALSE);	\
1162 												\
1163 		    width_remain -= num_pixels;							\
1164 		    vx += num_pixels * unit_x;							\
1165 		    dst += num_pixels;								\
1166 												\
1167 		    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
1168 		        mask += num_pixels;							\
1169 		}										\
1170 	    }											\
1171 	}											\
1172 	else											\
1173 	{											\
1174 	    scanline_func (dst, mask, src_first_line + src_stride * y1,				\
1175 			   src_first_line + src_stride * y2, width,				\
1176 			   weight1, weight2, vx, unit_x, max_vx, FALSE);			\
1177 	}											\
1178     }												\
1179 }
1180 
1181 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
1182 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
1183 				  dst_type_t, repeat_mode, flags)				\
1184 	FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
1185 				  dst_type_t, repeat_mode, flags)
1186 
1187 #define SCALED_BILINEAR_FLAGS						\
1188     (FAST_PATH_SCALE_TRANSFORM	|					\
1189      FAST_PATH_NO_ALPHA_MAP	|					\
1190      FAST_PATH_BILINEAR_FILTER	|					\
1191      FAST_PATH_NO_ACCESSORS	|					\
1192      FAST_PATH_NARROW_FORMAT)
1193 
1194 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func)			\
1195     {   PIXMAN_OP_ ## op,						\
1196 	PIXMAN_ ## s,							\
1197 	(SCALED_BILINEAR_FLAGS		|				\
1198 	 FAST_PATH_PAD_REPEAT		|				\
1199 	 FAST_PATH_X_UNIT_POSITIVE),					\
1200 	PIXMAN_null, 0,							\
1201 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1202 	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
1203     }
1204 
1205 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func)			\
1206     {   PIXMAN_OP_ ## op,						\
1207 	PIXMAN_ ## s,							\
1208 	(SCALED_BILINEAR_FLAGS		|				\
1209 	 FAST_PATH_NONE_REPEAT		|				\
1210 	 FAST_PATH_X_UNIT_POSITIVE),					\
1211 	PIXMAN_null, 0,							\
1212 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1213 	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
1214     }
1215 
1216 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)			\
1217     {   PIXMAN_OP_ ## op,						\
1218 	PIXMAN_ ## s,							\
1219 	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
1220 	PIXMAN_null, 0,							\
1221 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1222 	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
1223     }
1224 
1225 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func)			\
1226     {   PIXMAN_OP_ ## op,						\
1227 	PIXMAN_ ## s,							\
1228 	(SCALED_BILINEAR_FLAGS		|				\
1229 	 FAST_PATH_NORMAL_REPEAT	|				\
1230 	 FAST_PATH_X_UNIT_POSITIVE),					\
1231 	PIXMAN_null, 0,							\
1232 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1233 	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
1234     }
1235 
1236 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
1237     {   PIXMAN_OP_ ## op,						\
1238 	PIXMAN_ ## s,							\
1239 	(SCALED_BILINEAR_FLAGS		|				\
1240 	 FAST_PATH_PAD_REPEAT		|				\
1241 	 FAST_PATH_X_UNIT_POSITIVE),					\
1242 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
1243 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1244 	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
1245     }
1246 
1247 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
1248     {   PIXMAN_OP_ ## op,						\
1249 	PIXMAN_ ## s,							\
1250 	(SCALED_BILINEAR_FLAGS		|				\
1251 	 FAST_PATH_NONE_REPEAT		|				\
1252 	 FAST_PATH_X_UNIT_POSITIVE),					\
1253 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
1254 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1255 	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
1256     }
1257 
1258 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
1259     {   PIXMAN_OP_ ## op,						\
1260 	PIXMAN_ ## s,							\
1261 	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
1262 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
1263 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1264 	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
1265     }
1266 
1267 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
1268     {   PIXMAN_OP_ ## op,						\
1269 	PIXMAN_ ## s,							\
1270 	(SCALED_BILINEAR_FLAGS		|				\
1271 	 FAST_PATH_NORMAL_REPEAT	|				\
1272 	 FAST_PATH_X_UNIT_POSITIVE),					\
1273 	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
1274 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1275 	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
1276     }
1277 
1278 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
1279     {   PIXMAN_OP_ ## op,						\
1280 	PIXMAN_ ## s,							\
1281 	(SCALED_BILINEAR_FLAGS		|				\
1282 	 FAST_PATH_PAD_REPEAT		|				\
1283 	 FAST_PATH_X_UNIT_POSITIVE),					\
1284 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
1285 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1286 	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
1287     }
1288 
1289 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
1290     {   PIXMAN_OP_ ## op,						\
1291 	PIXMAN_ ## s,							\
1292 	(SCALED_BILINEAR_FLAGS		|				\
1293 	 FAST_PATH_NONE_REPEAT		|				\
1294 	 FAST_PATH_X_UNIT_POSITIVE),					\
1295 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
1296 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1297 	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
1298     }
1299 
1300 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
1301     {   PIXMAN_OP_ ## op,						\
1302 	PIXMAN_ ## s,							\
1303 	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
1304 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
1305 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1306 	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
1307     }
1308 
1309 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)	\
1310     {   PIXMAN_OP_ ## op,						\
1311 	PIXMAN_ ## s,							\
1312 	(SCALED_BILINEAR_FLAGS		|				\
1313 	 FAST_PATH_NORMAL_REPEAT	|				\
1314 	 FAST_PATH_X_UNIT_POSITIVE),					\
1315 	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
1316 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1317 	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
1318     }
1319 
1320 /* Prefer the use of 'cover' variant, because it is faster */
1321 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func)				\
1322     SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func),			\
1323     SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func),			\
1324     SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func),			\
1325     SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
1326 
1327 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func)			\
1328     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
1329     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
1330     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func),		\
1331     SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
1332 
1333 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func)		\
1334     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
1335     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
1336     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),		\
1337     SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
1338 
1339 #endif
1340