1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2 /*
3 * Copyright © 2000 SuSE, Inc.
4 * Copyright © 2007 Red Hat, Inc.
5 *
6 * Permission to use, copy, modify, distribute, and sell this software and its
7 * documentation for any purpose is hereby granted without fee, provided that
8 * the above copyright notice appear in all copies and that both that
9 * copyright notice and this permission notice appear in supporting
10 * documentation, and that the name of SuSE not be used in advertising or
11 * publicity pertaining to distribution of the software without specific,
12 * written prior permission. SuSE makes no representations about the
13 * suitability of this software for any purpose. It is provided "as is"
14 * without express or implied warranty.
15 *
16 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22 *
23 * Author: Keith Packard, SuSE, Inc.
24 */
25
26 #ifndef PIXMAN_FAST_PATH_H__
27 #define PIXMAN_FAST_PATH_H__
28
29 #include "pixman-private.h"
30
31 #define PIXMAN_REPEAT_COVER -1
32
33 /* Flags describing input parameters to fast path macro template.
34 * Turning on some flag values may indicate that
35 * "some property X is available so template can use this" or
36 * "some property X should be handled by template".
37 *
38 * FLAG_HAVE_SOLID_MASK
39 * Input mask is solid so template should handle this.
40 *
41 * FLAG_HAVE_NON_SOLID_MASK
42 * Input mask is bits mask so template should handle this.
43 *
44 * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
45 * exclusive. (It's not allowed to turn both flags on)
46 */
47 #define FLAG_NONE (0)
48 #define FLAG_HAVE_SOLID_MASK (1 << 1)
49 #define FLAG_HAVE_NON_SOLID_MASK (1 << 2)
50
51 /* To avoid too short repeated scanline function calls, extend source
52 * scanlines having width less than below constant value.
53 */
54 #define REPEAT_NORMAL_MIN_WIDTH 64
55
56 static force_inline pixman_bool_t
repeat(pixman_repeat_t repeat,int * c,int size)57 repeat (pixman_repeat_t repeat, int *c, int size)
58 {
59 if (repeat == PIXMAN_REPEAT_NONE)
60 {
61 if (*c < 0 || *c >= size)
62 return FALSE;
63 }
64 else if (repeat == PIXMAN_REPEAT_NORMAL)
65 {
66 while (*c >= size)
67 *c -= size;
68 while (*c < 0)
69 *c += size;
70 }
71 else if (repeat == PIXMAN_REPEAT_PAD)
72 {
73 *c = CLIP (*c, 0, size - 1);
74 }
75 else /* REFLECT */
76 {
77 *c = MOD (*c, size * 2);
78 if (*c >= size)
79 *c = size * 2 - *c - 1;
80 }
81 return TRUE;
82 }
83
84 static force_inline int
pixman_fixed_to_bilinear_weight(pixman_fixed_t x)85 pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
86 {
87 return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
88 ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
89 }
90
91 #if BILINEAR_INTERPOLATION_BITS <= 4
92 /* Inspired by Filter_32_opaque from Skia */
93 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)94 bilinear_interpolation (uint32_t tl, uint32_t tr,
95 uint32_t bl, uint32_t br,
96 int distx, int disty)
97 {
98 int distxy, distxiy, distixy, distixiy;
99 uint32_t lo, hi;
100
101 distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
102 disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
103
104 distxy = distx * disty;
105 distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */
106 distixy = (disty << 4) - distxy; /* disty * (16 - distx) */
107 distixiy =
108 16 * 16 - (disty << 4) -
109 (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
110
111 lo = (tl & 0xff00ff) * distixiy;
112 hi = ((tl >> 8) & 0xff00ff) * distixiy;
113
114 lo += (tr & 0xff00ff) * distxiy;
115 hi += ((tr >> 8) & 0xff00ff) * distxiy;
116
117 lo += (bl & 0xff00ff) * distixy;
118 hi += ((bl >> 8) & 0xff00ff) * distixy;
119
120 lo += (br & 0xff00ff) * distxy;
121 hi += ((br >> 8) & 0xff00ff) * distxy;
122
123 return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
124 }
125
126 #else
127 #if SIZEOF_LONG > 4
128
129 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)130 bilinear_interpolation (uint32_t tl, uint32_t tr,
131 uint32_t bl, uint32_t br,
132 int distx, int disty)
133 {
134 uint64_t distxy, distxiy, distixy, distixiy;
135 uint64_t tl64, tr64, bl64, br64;
136 uint64_t f, r;
137
138 distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
139 disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
140
141 distxy = distx * disty;
142 distxiy = distx * (256 - disty);
143 distixy = (256 - distx) * disty;
144 distixiy = (256 - distx) * (256 - disty);
145
146 /* Alpha and Blue */
147 tl64 = tl & 0xff0000ff;
148 tr64 = tr & 0xff0000ff;
149 bl64 = bl & 0xff0000ff;
150 br64 = br & 0xff0000ff;
151
152 f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
153 r = f & 0x0000ff0000ff0000ull;
154
155 /* Red and Green */
156 tl64 = tl;
157 tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
158
159 tr64 = tr;
160 tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
161
162 bl64 = bl;
163 bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
164
165 br64 = br;
166 br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
167
168 f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
169 r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
170
171 return (uint32_t)(r >> 16);
172 }
173
174 #else
175
176 static force_inline uint32_t
bilinear_interpolation(uint32_t tl,uint32_t tr,uint32_t bl,uint32_t br,int distx,int disty)177 bilinear_interpolation (uint32_t tl, uint32_t tr,
178 uint32_t bl, uint32_t br,
179 int distx, int disty)
180 {
181 int distxy, distxiy, distixy, distixiy;
182 uint32_t f, r;
183
184 distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
185 disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
186
187 distxy = distx * disty;
188 distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */
189 distixy = (disty << 8) - distxy; /* disty * (256 - distx) */
190 distixiy =
191 256 * 256 - (disty << 8) -
192 (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */
193
194 /* Blue */
195 r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
196 + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
197
198 /* Green */
199 f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
200 + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
201 r |= f & 0xff000000;
202
203 tl >>= 16;
204 tr >>= 16;
205 bl >>= 16;
206 br >>= 16;
207 r >>= 16;
208
209 /* Red */
210 f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
211 + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy;
212 r |= f & 0x00ff0000;
213
214 /* Alpha */
215 f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
216 + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy;
217 r |= f & 0xff000000;
218
219 return r;
220 }
221
222 #endif
223 #endif // BILINEAR_INTERPOLATION_BITS <= 4
224
225 static force_inline argb_t
bilinear_interpolation_float(argb_t tl,argb_t tr,argb_t bl,argb_t br,float distx,float disty)226 bilinear_interpolation_float (argb_t tl, argb_t tr,
227 argb_t bl, argb_t br,
228 float distx, float disty)
229 {
230 float distxy, distxiy, distixy, distixiy;
231 argb_t r;
232
233 distxy = distx * disty;
234 distxiy = distx * (1.f - disty);
235 distixy = (1.f - distx) * disty;
236 distixiy = (1.f - distx) * (1.f - disty);
237
238 r.a = tl.a * distixiy + tr.a * distxiy +
239 bl.a * distixy + br.a * distxy;
240 r.r = tl.r * distixiy + tr.r * distxiy +
241 bl.r * distixy + br.r * distxy;
242 r.g = tl.g * distixiy + tr.g * distxiy +
243 bl.g * distixy + br.g * distxy;
244 r.b = tl.b * distixiy + tr.b * distxiy +
245 bl.b * distixy + br.b * distxy;
246
247 return r;
248 }
249
250 /*
251 * For each scanline fetched from source image with PAD repeat:
252 * - calculate how many pixels need to be padded on the left side
253 * - calculate how many pixels need to be padded on the right side
254 * - update width to only count pixels which are fetched from the image
255 * All this information is returned via 'width', 'left_pad', 'right_pad'
256 * arguments. The code is assuming that 'unit_x' is positive.
257 *
258 * Note: 64-bit math is used in order to avoid potential overflows, which
259 * is probably excessive in many cases. This particular function
260 * may need its own correctness test and performance tuning.
261 */
262 static force_inline void
pad_repeat_get_scanline_bounds(int32_t source_image_width,pixman_fixed_t vx,pixman_fixed_t unit_x,int32_t * width,int32_t * left_pad,int32_t * right_pad)263 pad_repeat_get_scanline_bounds (int32_t source_image_width,
264 pixman_fixed_t vx,
265 pixman_fixed_t unit_x,
266 int32_t * width,
267 int32_t * left_pad,
268 int32_t * right_pad)
269 {
270 int64_t max_vx = (int64_t) source_image_width << 16;
271 int64_t tmp;
272 if (vx < 0)
273 {
274 tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
275 if (tmp > *width)
276 {
277 *left_pad = *width;
278 *width = 0;
279 }
280 else
281 {
282 *left_pad = (int32_t) tmp;
283 *width -= (int32_t) tmp;
284 }
285 }
286 else
287 {
288 *left_pad = 0;
289 }
290 tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
291 if (tmp < 0)
292 {
293 *right_pad = *width;
294 *width = 0;
295 }
296 else if (tmp >= *width)
297 {
298 *right_pad = 0;
299 }
300 else
301 {
302 *right_pad = *width - (int32_t) tmp;
303 *width = (int32_t) tmp;
304 }
305 }
306
307 /* A macroified version of specialized nearest scalers for some
308 * common 8888 and 565 formats. It supports SRC and OVER ops.
309 *
310 * There are two repeat versions, one that handles repeat normal,
311 * and one without repeat handling that only works if the src region
312 * used is completely covered by the pre-repeated source samples.
313 *
314 * The loops are unrolled to process two pixels per iteration for better
315 * performance on most CPU architectures (superscalar processors
316 * can issue several operations simultaneously, other processors can hide
317 * instructions latencies by pipelining operations). Unrolling more
318 * does not make much sense because the compiler will start running out
319 * of spare registers soon.
320 */
321
322 #define GET_8888_ALPHA(s) ((s) >> 24)
323 /* This is not actually used since we don't have an OVER with
324 565 source, but it is needed to build. */
325 #define GET_0565_ALPHA(s) 0xff
326 #define GET_x888_ALPHA(s) 0xff
327
328 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
329 src_type_t, dst_type_t, OP, repeat_mode) \
330 static force_inline void \
331 scanline_func_name (dst_type_t *dst, \
332 const src_type_t *src, \
333 int32_t w, \
334 pixman_fixed_t vx, \
335 pixman_fixed_t unit_x, \
336 pixman_fixed_t src_width_fixed, \
337 pixman_bool_t fully_transparent_src) \
338 { \
339 uint32_t d; \
340 src_type_t s1, s2; \
341 uint8_t a1, a2; \
342 int x1, x2; \
343 \
344 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \
345 return; \
346 \
347 if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
348 abort(); \
349 \
350 while ((w -= 2) >= 0) \
351 { \
352 x1 = pixman_fixed_to_int (vx); \
353 vx += unit_x; \
354 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
355 { \
356 /* This works because we know that unit_x is positive */ \
357 while (vx >= 0) \
358 vx -= src_width_fixed; \
359 } \
360 s1 = *(src + x1); \
361 \
362 x2 = pixman_fixed_to_int (vx); \
363 vx += unit_x; \
364 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
365 { \
366 /* This works because we know that unit_x is positive */ \
367 while (vx >= 0) \
368 vx -= src_width_fixed; \
369 } \
370 s2 = *(src + x2); \
371 \
372 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
373 { \
374 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
375 a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
376 \
377 if (a1 == 0xff) \
378 { \
379 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
380 } \
381 else if (s1) \
382 { \
383 d = convert_ ## DST_FORMAT ## _to_8888 (*dst); \
384 s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \
385 a1 ^= 0xff; \
386 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
387 *dst = convert_8888_to_ ## DST_FORMAT (d); \
388 } \
389 dst++; \
390 \
391 if (a2 == 0xff) \
392 { \
393 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \
394 } \
395 else if (s2) \
396 { \
397 d = convert_## DST_FORMAT ## _to_8888 (*dst); \
398 s2 = convert_## SRC_FORMAT ## _to_8888 (s2); \
399 a2 ^= 0xff; \
400 UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
401 *dst = convert_8888_to_ ## DST_FORMAT (d); \
402 } \
403 dst++; \
404 } \
405 else /* PIXMAN_OP_SRC */ \
406 { \
407 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
408 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \
409 } \
410 } \
411 \
412 if (w & 1) \
413 { \
414 x1 = pixman_fixed_to_int (vx); \
415 s1 = *(src + x1); \
416 \
417 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
418 { \
419 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
420 \
421 if (a1 == 0xff) \
422 { \
423 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
424 } \
425 else if (s1) \
426 { \
427 d = convert_## DST_FORMAT ## _to_8888 (*dst); \
428 s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \
429 a1 ^= 0xff; \
430 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
431 *dst = convert_8888_to_ ## DST_FORMAT (d); \
432 } \
433 dst++; \
434 } \
435 else /* PIXMAN_OP_SRC */ \
436 { \
437 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \
438 } \
439 } \
440 }
441
442 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
443 dst_type_t, repeat_mode, have_mask, mask_is_solid) \
444 static void \
445 fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \
446 pixman_composite_info_t *info) \
447 { \
448 PIXMAN_COMPOSITE_ARGS (info); \
449 dst_type_t *dst_line; \
450 mask_type_t *mask_line; \
451 src_type_t *src_first_line; \
452 int y; \
453 pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width); \
454 pixman_fixed_t max_vy; \
455 pixman_vector_t v; \
456 pixman_fixed_t vx, vy; \
457 pixman_fixed_t unit_x, unit_y; \
458 int32_t left_pad, right_pad; \
459 \
460 src_type_t *src; \
461 dst_type_t *dst; \
462 mask_type_t solid_mask; \
463 const mask_type_t *mask = &solid_mask; \
464 int src_stride, mask_stride, dst_stride; \
465 \
466 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
467 if (have_mask) \
468 { \
469 if (mask_is_solid) \
470 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
471 else \
472 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
473 mask_stride, mask_line, 1); \
474 } \
475 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
476 * transformed from destination space to source space */ \
477 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
478 \
479 /* reference point is the center of the pixel */ \
480 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
481 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
482 v.vector[2] = pixman_fixed_1; \
483 \
484 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
485 return; \
486 \
487 unit_x = src_image->common.transform->matrix[0][0]; \
488 unit_y = src_image->common.transform->matrix[1][1]; \
489 \
490 /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
491 v.vector[0] -= pixman_fixed_e; \
492 v.vector[1] -= pixman_fixed_e; \
493 \
494 vx = v.vector[0]; \
495 vy = v.vector[1]; \
496 \
497 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
498 { \
499 max_vy = pixman_int_to_fixed (src_image->bits.height); \
500 \
501 /* Clamp repeating positions inside the actual samples */ \
502 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
503 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
504 } \
505 \
506 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
507 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
508 { \
509 pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \
510 &width, &left_pad, &right_pad); \
511 vx += left_pad * unit_x; \
512 } \
513 \
514 while (--height >= 0) \
515 { \
516 dst = dst_line; \
517 dst_line += dst_stride; \
518 if (have_mask && !mask_is_solid) \
519 { \
520 mask = mask_line; \
521 mask_line += mask_stride; \
522 } \
523 \
524 y = pixman_fixed_to_int (vy); \
525 vy += unit_y; \
526 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
527 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
528 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
529 { \
530 repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \
531 src = src_first_line + src_stride * y; \
532 if (left_pad > 0) \
533 { \
534 scanline_func (mask, dst, \
535 src + src_image->bits.width - src_image->bits.width + 1, \
536 left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \
537 } \
538 if (width > 0) \
539 { \
540 scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
541 dst + left_pad, src + src_image->bits.width, width, \
542 vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \
543 } \
544 if (right_pad > 0) \
545 { \
546 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
547 dst + left_pad + width, src + src_image->bits.width, \
548 right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \
549 } \
550 } \
551 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
552 { \
553 static const src_type_t zero[1] = { 0 }; \
554 if (y < 0 || y >= src_image->bits.height) \
555 { \
556 scanline_func (mask, dst, zero + 1, left_pad + width + right_pad, \
557 -pixman_fixed_e, 0, src_width_fixed, TRUE); \
558 continue; \
559 } \
560 src = src_first_line + src_stride * y; \
561 if (left_pad > 0) \
562 { \
563 scanline_func (mask, dst, zero + 1, left_pad, \
564 -pixman_fixed_e, 0, src_width_fixed, TRUE); \
565 } \
566 if (width > 0) \
567 { \
568 scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
569 dst + left_pad, src + src_image->bits.width, width, \
570 vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \
571 } \
572 if (right_pad > 0) \
573 { \
574 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
575 dst + left_pad + width, zero + 1, right_pad, \
576 -pixman_fixed_e, 0, src_width_fixed, TRUE); \
577 } \
578 } \
579 else \
580 { \
581 src = src_first_line + src_stride * y; \
582 scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed, \
583 unit_x, src_width_fixed, FALSE); \
584 } \
585 } \
586 }
587
588 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
589 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
590 dst_type_t, repeat_mode, have_mask, mask_is_solid) \
591 FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
592 dst_type_t, repeat_mode, have_mask, mask_is_solid)
593
594 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \
595 repeat_mode) \
596 static force_inline void \
597 scanline_func##scale_func_name##_wrapper ( \
598 const uint8_t *mask, \
599 dst_type_t *dst, \
600 const src_type_t *src, \
601 int32_t w, \
602 pixman_fixed_t vx, \
603 pixman_fixed_t unit_x, \
604 pixman_fixed_t max_vx, \
605 pixman_bool_t fully_transparent_src) \
606 { \
607 scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \
608 } \
609 FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \
610 src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
611
612 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
613 repeat_mode) \
614 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \
615 dst_type_t, repeat_mode)
616
617 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
618 src_type_t, dst_type_t, OP, repeat_mode) \
619 FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
620 SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
621 OP, repeat_mode) \
622 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \
623 scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
624 src_type_t, dst_type_t, repeat_mode)
625
626
627 #define SCALED_NEAREST_FLAGS \
628 (FAST_PATH_SCALE_TRANSFORM | \
629 FAST_PATH_NO_ALPHA_MAP | \
630 FAST_PATH_NEAREST_FILTER | \
631 FAST_PATH_NO_ACCESSORS | \
632 FAST_PATH_NARROW_FORMAT)
633
634 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \
635 { PIXMAN_OP_ ## op, \
636 PIXMAN_ ## s, \
637 (SCALED_NEAREST_FLAGS | \
638 FAST_PATH_NORMAL_REPEAT | \
639 FAST_PATH_X_UNIT_POSITIVE), \
640 PIXMAN_null, 0, \
641 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
642 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
643 }
644
645 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \
646 { PIXMAN_OP_ ## op, \
647 PIXMAN_ ## s, \
648 (SCALED_NEAREST_FLAGS | \
649 FAST_PATH_PAD_REPEAT | \
650 FAST_PATH_X_UNIT_POSITIVE), \
651 PIXMAN_null, 0, \
652 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
653 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
654 }
655
656 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \
657 { PIXMAN_OP_ ## op, \
658 PIXMAN_ ## s, \
659 (SCALED_NEAREST_FLAGS | \
660 FAST_PATH_NONE_REPEAT | \
661 FAST_PATH_X_UNIT_POSITIVE), \
662 PIXMAN_null, 0, \
663 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
664 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
665 }
666
667 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \
668 { PIXMAN_OP_ ## op, \
669 PIXMAN_ ## s, \
670 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
671 PIXMAN_null, 0, \
672 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
673 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
674 }
675
676 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
677 { PIXMAN_OP_ ## op, \
678 PIXMAN_ ## s, \
679 (SCALED_NEAREST_FLAGS | \
680 FAST_PATH_NORMAL_REPEAT | \
681 FAST_PATH_X_UNIT_POSITIVE), \
682 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
683 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
684 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
685 }
686
687 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
688 { PIXMAN_OP_ ## op, \
689 PIXMAN_ ## s, \
690 (SCALED_NEAREST_FLAGS | \
691 FAST_PATH_PAD_REPEAT | \
692 FAST_PATH_X_UNIT_POSITIVE), \
693 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
694 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
695 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
696 }
697
698 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
699 { PIXMAN_OP_ ## op, \
700 PIXMAN_ ## s, \
701 (SCALED_NEAREST_FLAGS | \
702 FAST_PATH_NONE_REPEAT | \
703 FAST_PATH_X_UNIT_POSITIVE), \
704 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
705 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
706 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
707 }
708
709 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
710 { PIXMAN_OP_ ## op, \
711 PIXMAN_ ## s, \
712 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
713 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
714 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
715 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
716 }
717
718 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
719 { PIXMAN_OP_ ## op, \
720 PIXMAN_ ## s, \
721 (SCALED_NEAREST_FLAGS | \
722 FAST_PATH_NORMAL_REPEAT | \
723 FAST_PATH_X_UNIT_POSITIVE), \
724 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
725 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
726 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
727 }
728
729 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
730 { PIXMAN_OP_ ## op, \
731 PIXMAN_ ## s, \
732 (SCALED_NEAREST_FLAGS | \
733 FAST_PATH_PAD_REPEAT | \
734 FAST_PATH_X_UNIT_POSITIVE), \
735 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
736 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
737 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
738 }
739
740 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
741 { PIXMAN_OP_ ## op, \
742 PIXMAN_ ## s, \
743 (SCALED_NEAREST_FLAGS | \
744 FAST_PATH_NONE_REPEAT | \
745 FAST_PATH_X_UNIT_POSITIVE), \
746 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
747 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
748 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
749 }
750
751 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
752 { PIXMAN_OP_ ## op, \
753 PIXMAN_ ## s, \
754 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \
755 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
756 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
757 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
758 }
759
760 /* Prefer the use of 'cover' variant, because it is faster */
761 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
762 SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
763 SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
764 SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
765 SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
766
767 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
768 SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
769 SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
770 SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
771
772 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \
773 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
774 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
775 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \
776 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
777
778 /*****************************************************************************/
779
780 /*
781 * Identify 5 zones in each scanline for bilinear scaling. Depending on
782 * whether 2 pixels to be interpolated are fetched from the image itself,
783 * from the padding area around it or from both image and padding area.
784 */
785 static force_inline void
bilinear_pad_repeat_get_scanline_bounds(int32_t source_image_width,pixman_fixed_t vx,pixman_fixed_t unit_x,int32_t * left_pad,int32_t * left_tz,int32_t * width,int32_t * right_tz,int32_t * right_pad)786 bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width,
787 pixman_fixed_t vx,
788 pixman_fixed_t unit_x,
789 int32_t * left_pad,
790 int32_t * left_tz,
791 int32_t * width,
792 int32_t * right_tz,
793 int32_t * right_pad)
794 {
795 int width1 = *width, left_pad1, right_pad1;
796 int width2 = *width, left_pad2, right_pad2;
797
798 pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
799 &width1, &left_pad1, &right_pad1);
800 pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
801 unit_x, &width2, &left_pad2, &right_pad2);
802
803 *left_pad = left_pad2;
804 *left_tz = left_pad1 - left_pad2;
805 *right_tz = right_pad2 - right_pad1;
806 *right_pad = right_pad1;
807 *width -= *left_pad + *left_tz + *right_tz + *right_pad;
808 }
809
810 /*
811 * Main loop template for single pass bilinear scaling. It needs to be
812 * provided with 'scanline_func' which should do the compositing operation.
813 * The needed function has the following prototype:
814 *
815 * scanline_func (dst_type_t * dst,
816 * const mask_type_ * mask,
817 * const src_type_t * src_top,
818 * const src_type_t * src_bottom,
819 * int32_t width,
820 * int weight_top,
821 * int weight_bottom,
822 * pixman_fixed_t vx,
823 * pixman_fixed_t unit_x,
824 * pixman_fixed_t max_vx,
825 * pixman_bool_t zero_src)
826 *
827 * Where:
828 * dst - destination scanline buffer for storing results
829 * mask - mask buffer (or single value for solid mask)
830 * src_top, src_bottom - two source scanlines
831 * width - number of pixels to process
832 * weight_top - weight of the top row for interpolation
833 * weight_bottom - weight of the bottom row for interpolation
834 * vx - initial position for fetching the first pair of
835 * pixels from the source buffer
836 * unit_x - position increment needed to move to the next pair
837 * of pixels
838 * max_vx - image size as a fixed point value, can be used for
839 * implementing NORMAL repeat (when it is supported)
840 * zero_src - boolean hint variable, which is set to TRUE when
841 * all source pixels are fetched from zero padding
842 * zone for NONE repeat
843 *
844 * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
845 * BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
846 * for NONE repeat when handling fuzzy antialiased top or bottom image
847 * edges. Also both top and bottom weight variables are guaranteed to
848 * have value, which is less than BILINEAR_INTERPOLATION_RANGE.
849 * For example, the weights can fit into unsigned byte or be used
850 * with 8-bit SIMD multiplication instructions for 8-bit interpolation
851 * precision.
852 */
853 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
854 dst_type_t, repeat_mode, flags) \
855 static void \
856 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \
857 pixman_composite_info_t *info) \
858 { \
859 PIXMAN_COMPOSITE_ARGS (info); \
860 dst_type_t *dst_line; \
861 mask_type_t *mask_line; \
862 src_type_t *src_first_line; \
863 int y1, y2; \
864 pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
865 pixman_vector_t v; \
866 pixman_fixed_t vx, vy; \
867 pixman_fixed_t unit_x, unit_y; \
868 int32_t left_pad, left_tz, right_tz, right_pad; \
869 \
870 dst_type_t *dst; \
871 mask_type_t solid_mask; \
872 const mask_type_t *mask = &solid_mask; \
873 int src_stride, mask_stride, dst_stride; \
874 \
875 int src_width; \
876 pixman_fixed_t src_width_fixed; \
877 int max_x; \
878 pixman_bool_t need_src_extension; \
879 \
880 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \
881 if (flags & FLAG_HAVE_SOLID_MASK) \
882 { \
883 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \
884 mask_stride = 0; \
885 } \
886 else if (flags & FLAG_HAVE_NON_SOLID_MASK) \
887 { \
888 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
889 mask_stride, mask_line, 1); \
890 } \
891 \
892 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
893 * transformed from destination space to source space */ \
894 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
895 \
896 /* reference point is the center of the pixel */ \
897 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
898 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
899 v.vector[2] = pixman_fixed_1; \
900 \
901 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
902 return; \
903 \
904 unit_x = src_image->common.transform->matrix[0][0]; \
905 unit_y = src_image->common.transform->matrix[1][1]; \
906 \
907 v.vector[0] -= pixman_fixed_1 / 2; \
908 v.vector[1] -= pixman_fixed_1 / 2; \
909 \
910 vy = v.vector[1]; \
911 \
912 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \
913 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
914 { \
915 bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \
916 &left_pad, &left_tz, &width, &right_tz, &right_pad); \
917 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
918 { \
919 /* PAD repeat does not need special handling for 'transition zones' and */ \
920 /* they can be combined with 'padding zones' safely */ \
921 left_pad += left_tz; \
922 right_pad += right_tz; \
923 left_tz = right_tz = 0; \
924 } \
925 v.vector[0] += left_pad * unit_x; \
926 } \
927 \
928 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
929 { \
930 vx = v.vector[0]; \
931 repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \
932 max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1; \
933 \
934 if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \
935 { \
936 src_width = 0; \
937 \
938 while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \
939 src_width += src_image->bits.width; \
940 \
941 need_src_extension = TRUE; \
942 } \
943 else \
944 { \
945 src_width = src_image->bits.width; \
946 need_src_extension = FALSE; \
947 } \
948 \
949 src_width_fixed = pixman_int_to_fixed (src_width); \
950 } \
951 \
952 while (--height >= 0) \
953 { \
954 int weight1, weight2; \
955 dst = dst_line; \
956 dst_line += dst_stride; \
957 vx = v.vector[0]; \
958 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
959 { \
960 mask = mask_line; \
961 mask_line += mask_stride; \
962 } \
963 \
964 y1 = pixman_fixed_to_int (vy); \
965 weight2 = pixman_fixed_to_bilinear_weight (vy); \
966 if (weight2) \
967 { \
968 /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */ \
969 y2 = y1 + 1; \
970 weight1 = BILINEAR_INTERPOLATION_RANGE - weight2; \
971 } \
972 else \
973 { \
974 /* set both top and bottom row to the same scanline and tweak weights */ \
975 y2 = y1; \
976 weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2; \
977 } \
978 vy += unit_y; \
979 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \
980 { \
981 src_type_t *src1, *src2; \
982 src_type_t buf1[2]; \
983 src_type_t buf2[2]; \
984 repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \
985 repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \
986 src1 = src_first_line + src_stride * y1; \
987 src2 = src_first_line + src_stride * y2; \
988 \
989 if (left_pad > 0) \
990 { \
991 buf1[0] = buf1[1] = src1[0]; \
992 buf2[0] = buf2[1] = src2[0]; \
993 scanline_func (dst, mask, \
994 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \
995 dst += left_pad; \
996 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
997 mask += left_pad; \
998 } \
999 if (width > 0) \
1000 { \
1001 scanline_func (dst, mask, \
1002 src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
1003 dst += width; \
1004 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1005 mask += width; \
1006 } \
1007 if (right_pad > 0) \
1008 { \
1009 buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \
1010 buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \
1011 scanline_func (dst, mask, \
1012 buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \
1013 } \
1014 } \
1015 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
1016 { \
1017 src_type_t *src1, *src2; \
1018 src_type_t buf1[2]; \
1019 src_type_t buf2[2]; \
1020 /* handle top/bottom zero padding by just setting weights to 0 if needed */ \
1021 if (y1 < 0) \
1022 { \
1023 weight1 = 0; \
1024 y1 = 0; \
1025 } \
1026 if (y1 >= src_image->bits.height) \
1027 { \
1028 weight1 = 0; \
1029 y1 = src_image->bits.height - 1; \
1030 } \
1031 if (y2 < 0) \
1032 { \
1033 weight2 = 0; \
1034 y2 = 0; \
1035 } \
1036 if (y2 >= src_image->bits.height) \
1037 { \
1038 weight2 = 0; \
1039 y2 = src_image->bits.height - 1; \
1040 } \
1041 src1 = src_first_line + src_stride * y1; \
1042 src2 = src_first_line + src_stride * y2; \
1043 \
1044 if (left_pad > 0) \
1045 { \
1046 buf1[0] = buf1[1] = 0; \
1047 buf2[0] = buf2[1] = 0; \
1048 scanline_func (dst, mask, \
1049 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \
1050 dst += left_pad; \
1051 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1052 mask += left_pad; \
1053 } \
1054 if (left_tz > 0) \
1055 { \
1056 buf1[0] = 0; \
1057 buf1[1] = src1[0]; \
1058 buf2[0] = 0; \
1059 buf2[1] = src2[0]; \
1060 scanline_func (dst, mask, \
1061 buf1, buf2, left_tz, weight1, weight2, \
1062 pixman_fixed_frac (vx), unit_x, 0, FALSE); \
1063 dst += left_tz; \
1064 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1065 mask += left_tz; \
1066 vx += left_tz * unit_x; \
1067 } \
1068 if (width > 0) \
1069 { \
1070 scanline_func (dst, mask, \
1071 src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \
1072 dst += width; \
1073 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1074 mask += width; \
1075 vx += width * unit_x; \
1076 } \
1077 if (right_tz > 0) \
1078 { \
1079 buf1[0] = src1[src_image->bits.width - 1]; \
1080 buf1[1] = 0; \
1081 buf2[0] = src2[src_image->bits.width - 1]; \
1082 buf2[1] = 0; \
1083 scanline_func (dst, mask, \
1084 buf1, buf2, right_tz, weight1, weight2, \
1085 pixman_fixed_frac (vx), unit_x, 0, FALSE); \
1086 dst += right_tz; \
1087 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1088 mask += right_tz; \
1089 } \
1090 if (right_pad > 0) \
1091 { \
1092 buf1[0] = buf1[1] = 0; \
1093 buf2[0] = buf2[1] = 0; \
1094 scanline_func (dst, mask, \
1095 buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \
1096 } \
1097 } \
1098 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \
1099 { \
1100 int32_t num_pixels; \
1101 int32_t width_remain; \
1102 src_type_t * src_line_top; \
1103 src_type_t * src_line_bottom; \
1104 src_type_t buf1[2]; \
1105 src_type_t buf2[2]; \
1106 src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \
1107 src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \
1108 int i, j; \
1109 \
1110 repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \
1111 repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \
1112 src_line_top = src_first_line + src_stride * y1; \
1113 src_line_bottom = src_first_line + src_stride * y2; \
1114 \
1115 if (need_src_extension) \
1116 { \
1117 for (i=0; i<src_width;) \
1118 { \
1119 for (j=0; j<src_image->bits.width; j++, i++) \
1120 { \
1121 extended_src_line0[i] = src_line_top[j]; \
1122 extended_src_line1[i] = src_line_bottom[j]; \
1123 } \
1124 } \
1125 \
1126 src_line_top = &extended_src_line0[0]; \
1127 src_line_bottom = &extended_src_line1[0]; \
1128 } \
1129 \
1130 /* Top & Bottom wrap around buffer */ \
1131 buf1[0] = src_line_top[src_width - 1]; \
1132 buf1[1] = src_line_top[0]; \
1133 buf2[0] = src_line_bottom[src_width - 1]; \
1134 buf2[1] = src_line_bottom[0]; \
1135 \
1136 width_remain = width; \
1137 \
1138 while (width_remain > 0) \
1139 { \
1140 /* We use src_width_fixed because it can make vx in original source range */ \
1141 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
1142 \
1143 /* Wrap around part */ \
1144 if (pixman_fixed_to_int (vx) == src_width - 1) \
1145 { \
1146 /* for positive unit_x \
1147 * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \
1148 * \
1149 * vx is in range [0, src_width_fixed - pixman_fixed_e] \
1150 * So we are safe from overflow. \
1151 */ \
1152 num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \
1153 \
1154 if (num_pixels > width_remain) \
1155 num_pixels = width_remain; \
1156 \
1157 scanline_func (dst, mask, buf1, buf2, num_pixels, \
1158 weight1, weight2, pixman_fixed_frac(vx), \
1159 unit_x, src_width_fixed, FALSE); \
1160 \
1161 width_remain -= num_pixels; \
1162 vx += num_pixels * unit_x; \
1163 dst += num_pixels; \
1164 \
1165 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1166 mask += num_pixels; \
1167 \
1168 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \
1169 } \
1170 \
1171 /* Normal scanline composite */ \
1172 if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \
1173 { \
1174 /* for positive unit_x \
1175 * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \
1176 * \
1177 * vx is in range [0, src_width_fixed - pixman_fixed_e] \
1178 * So we are safe from overflow here. \
1179 */ \
1180 num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \
1181 / unit_x) + 1; \
1182 \
1183 if (num_pixels > width_remain) \
1184 num_pixels = width_remain; \
1185 \
1186 scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \
1187 weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \
1188 \
1189 width_remain -= num_pixels; \
1190 vx += num_pixels * unit_x; \
1191 dst += num_pixels; \
1192 \
1193 if (flags & FLAG_HAVE_NON_SOLID_MASK) \
1194 mask += num_pixels; \
1195 } \
1196 } \
1197 } \
1198 else \
1199 { \
1200 scanline_func (dst, mask, src_first_line + src_stride * y1, \
1201 src_first_line + src_stride * y2, width, \
1202 weight1, weight2, vx, unit_x, max_vx, FALSE); \
1203 } \
1204 } \
1205 }
1206
1207 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
1208 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
1209 dst_type_t, repeat_mode, flags) \
1210 FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
1211 dst_type_t, repeat_mode, flags)
1212
1213 #define SCALED_BILINEAR_FLAGS \
1214 (FAST_PATH_SCALE_TRANSFORM | \
1215 FAST_PATH_NO_ALPHA_MAP | \
1216 FAST_PATH_BILINEAR_FILTER | \
1217 FAST_PATH_NO_ACCESSORS | \
1218 FAST_PATH_NARROW_FORMAT)
1219
1220 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \
1221 { PIXMAN_OP_ ## op, \
1222 PIXMAN_ ## s, \
1223 (SCALED_BILINEAR_FLAGS | \
1224 FAST_PATH_PAD_REPEAT | \
1225 FAST_PATH_X_UNIT_POSITIVE), \
1226 PIXMAN_null, 0, \
1227 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1228 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1229 }
1230
1231 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \
1232 { PIXMAN_OP_ ## op, \
1233 PIXMAN_ ## s, \
1234 (SCALED_BILINEAR_FLAGS | \
1235 FAST_PATH_NONE_REPEAT | \
1236 FAST_PATH_X_UNIT_POSITIVE), \
1237 PIXMAN_null, 0, \
1238 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1239 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1240 }
1241
1242 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \
1243 { PIXMAN_OP_ ## op, \
1244 PIXMAN_ ## s, \
1245 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1246 PIXMAN_null, 0, \
1247 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1248 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1249 }
1250
1251 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \
1252 { PIXMAN_OP_ ## op, \
1253 PIXMAN_ ## s, \
1254 (SCALED_BILINEAR_FLAGS | \
1255 FAST_PATH_NORMAL_REPEAT | \
1256 FAST_PATH_X_UNIT_POSITIVE), \
1257 PIXMAN_null, 0, \
1258 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1259 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1260 }
1261
1262 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
1263 { PIXMAN_OP_ ## op, \
1264 PIXMAN_ ## s, \
1265 (SCALED_BILINEAR_FLAGS | \
1266 FAST_PATH_PAD_REPEAT | \
1267 FAST_PATH_X_UNIT_POSITIVE), \
1268 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1269 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1270 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1271 }
1272
1273 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
1274 { PIXMAN_OP_ ## op, \
1275 PIXMAN_ ## s, \
1276 (SCALED_BILINEAR_FLAGS | \
1277 FAST_PATH_NONE_REPEAT | \
1278 FAST_PATH_X_UNIT_POSITIVE), \
1279 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1280 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1281 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1282 }
1283
1284 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
1285 { PIXMAN_OP_ ## op, \
1286 PIXMAN_ ## s, \
1287 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1288 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1289 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1290 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1291 }
1292
1293 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
1294 { PIXMAN_OP_ ## op, \
1295 PIXMAN_ ## s, \
1296 (SCALED_BILINEAR_FLAGS | \
1297 FAST_PATH_NORMAL_REPEAT | \
1298 FAST_PATH_X_UNIT_POSITIVE), \
1299 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
1300 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1301 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1302 }
1303
1304 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
1305 { PIXMAN_OP_ ## op, \
1306 PIXMAN_ ## s, \
1307 (SCALED_BILINEAR_FLAGS | \
1308 FAST_PATH_PAD_REPEAT | \
1309 FAST_PATH_X_UNIT_POSITIVE), \
1310 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1311 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1312 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \
1313 }
1314
1315 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
1316 { PIXMAN_OP_ ## op, \
1317 PIXMAN_ ## s, \
1318 (SCALED_BILINEAR_FLAGS | \
1319 FAST_PATH_NONE_REPEAT | \
1320 FAST_PATH_X_UNIT_POSITIVE), \
1321 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1322 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1323 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \
1324 }
1325
1326 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
1327 { PIXMAN_OP_ ## op, \
1328 PIXMAN_ ## s, \
1329 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \
1330 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1331 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1332 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \
1333 }
1334
1335 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
1336 { PIXMAN_OP_ ## op, \
1337 PIXMAN_ ## s, \
1338 (SCALED_BILINEAR_FLAGS | \
1339 FAST_PATH_NORMAL_REPEAT | \
1340 FAST_PATH_X_UNIT_POSITIVE), \
1341 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
1342 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
1343 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \
1344 }
1345
1346 /* Prefer the use of 'cover' variant, because it is faster */
1347 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \
1348 SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \
1349 SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \
1350 SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \
1351 SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
1352
1353 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \
1354 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
1355 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
1356 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \
1357 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
1358
1359 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \
1360 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
1361 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
1362 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \
1363 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
1364
1365 #endif
1366